connect(REDIS_HOST, REDIS_PORT); } return $redis; } /** * Check rate limit via Redis. Returns true if allowed. */ function checkRateLimit($action, $limit, $window) { try { $redis = getRedis(); $ip = $_SERVER['REMOTE_ADDR'] ?? 'unknown'; $key = REDIS_RATE_PREFIX . $ip . ':' . $action; $current = $redis->incr($key); if ($current === 1) { $redis->expire($key, $window); } return $current <= $limit; } catch (Exception $e) { return true; // Allow if Redis is down } } /** * Sanitize job ID to prevent path traversal attacks */ function sanitizeJobId($job_id) { if (!preg_match('/^pdf_[a-f0-9]+$/', $job_id)) { error('Invalid job ID format'); } return $job_id; } // CORS headers for API $allowed_origins = [ 'https://ai-sandbox.oliver.solutions', 'http://localhost:8888', 'http://127.0.0.1:8888', 'http://localhost:8000', 'http://127.0.0.1:8000', ]; $origin = $_SERVER['HTTP_ORIGIN'] ?? ''; if (in_array($origin, $allowed_origins) || (function_exists('isDevelopmentMode') && isDevelopmentMode())) { header('Access-Control-Allow-Origin: ' . ($origin ?: '*')); } else if ($origin) { header('Access-Control-Allow-Origin: null'); } else { header('Access-Control-Allow-Origin: ' . ($allowed_origins[0])); } header('Access-Control-Allow-Methods: POST, GET, OPTIONS, DELETE'); header('Access-Control-Allow-Headers: Content-Type, X-API-Key, Authorization'); header('Content-Type: application/json'); // Handle preflight if ($_SERVER['REQUEST_METHOD'] === 'OPTIONS') { exit(0); } // Require authentication for all API requests require_once __DIR__ . '/auth.php'; requireAuth(); // Get action $action = $_GET['action'] ?? $_POST['action'] ?? ''; switch ($action) { case 'upload': handleUpload(); break; case 'check': handleCheck(); break; case 'status': handleStatus(); break; case 'result': handleResult(); break; case 'list': handleList(); break; case 'delete': handleDelete(); break; case 'debug': handleDebug(); break; case 'image': handleImage(); break; case 'remediate': handleRemediate(); break; case 'download': handleDownload(); break; case 'stats': handleStats(); break; case 'batch_upload': handleBatchUpload(); break; case 'batch_status': handleBatchStatus(); break; case 'export': handleExport(); break; default: error('Invalid action'); } /** * Handle file upload */ function handleUpload() { // Rate limit: 10 uploads/hour per IP if (!checkRateLimit('upload', 10, 3600)) { http_response_code(429); echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded. Try again later.']); exit; } if (!isset($_FILES['pdf'])) { error('No file uploaded'); } $file = $_FILES['pdf']; // Validate file if ($file['error'] !== UPLOAD_ERR_OK) { error('Upload error: ' . $file['error']); } if ($file['size'] > MAX_FILE_SIZE) { error('File too large. Max size: ' . (MAX_FILE_SIZE / 1024 / 1024) . 'MB'); } $ext = strtolower(pathinfo($file['name'], PATHINFO_EXTENSION)); if (!in_array($ext, ALLOWED_EXTENSIONS)) { error('Invalid file type. Only PDF files allowed.'); } // Validate PDF magic bytes $header = file_get_contents($file['tmp_name'], false, null, 0, 5); if ($header !== '%PDF-') { error('File is not a valid PDF (invalid file header)'); } // Generate cryptographically secure job ID $job_id = 'pdf_' . bin2hex(random_bytes(16)); $filename = $job_id . '.pdf'; $filepath = UPLOAD_DIR . '/' . $filename; // Move file if (!move_uploaded_file($file['tmp_name'], $filepath)) { error('Failed to save file'); } // Create job metadata $job_data = [ 'job_id' => $job_id, 'original_filename' => $file['name'], 'uploaded_at' => date('Y-m-d H:i:s'), 'file_size' => $file['size'], 'status' => 'uploaded', 'filepath' => $filepath ]; file_put_contents( RESULTS_DIR . '/' . $job_id . '.meta.json', json_encode($job_data, JSON_PRETTY_PRINT) ); success([ 'job_id' => $job_id, 'filename' => $file['name'], 'message' => 'File uploaded successfully' ]); } /** * Handle PDF accessibility check — push job to Redis queue */ function handleCheck() { $job_id = $_POST['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); // Rate limit: 30 checks/hour per IP if (!checkRateLimit('check', 30, 3600)) { http_response_code(429); echo json_encode(['success' => false, 'error' => 'Rate limit exceeded. Try again later.']); exit; } $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); $quick_mode = $_POST['quick_mode'] ?? false; // Push job to Redis queue for worker processing try { $redis = getRedis(); $payload = json_encode([ 'job_id' => $job_id, 'pdf_path' => $job_data['filepath'], 'original_filename' => $job_data['original_filename'] ?? '', 'options' => [ 'quick_mode' => (bool)$quick_mode, ], 'queued_at' => time() ]); $redis->lPush(REDIS_QUEUE, $payload); // Set initial status in Redis $redis->setex(REDIS_STATUS_PREFIX . $job_id, 86400, json_encode([ 'status' => 'queued', 'progress' => 0, 'message' => 'Waiting in queue', 'updated_at' => time() ])); } catch (Exception $e) { // Fallback to direct exec if Redis is unavailable (local dev without Docker) $pdf_path = $job_data['filepath']; $output_path = RESULTS_DIR . '/' . $job_id . '.result.json'; $venv_python = __DIR__ . '/venv/bin/python3'; $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; $cmd = escapeshellcmd($python_bin . ' ' . PYTHON_SCRIPT) . ' ' . escapeshellarg($pdf_path) . ' ' . '--output ' . escapeshellarg($output_path); if ($quick_mode) { $cmd .= ' --quick'; } $anthropic_key = $_POST['anthropic_key'] ?? getenv('ANTHROPIC_API_KEY'); $google_key = $_POST['google_key'] ?? $_POST['google_credentials'] ?? getenv('GOOGLE_API_KEY'); if ($anthropic_key) { $cmd .= ' --anthropic-key ' . escapeshellarg($anthropic_key); } if ($google_key) { if (file_exists($google_key)) { $cmd .= ' --google-credentials ' . escapeshellarg($google_key); } else { $cmd .= ' --google-key ' . escapeshellarg($google_key); } } $env_path = getenv('PATH'); putenv("PATH=/opt/homebrew/bin:/usr/local/bin:{$env_path}"); $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; $cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1 &'; exec($cmd, $output, $return_code); } // Update meta file $job_data['status'] = 'queued'; $job_data['started_at'] = date('Y-m-d H:i:s'); file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); success([ 'job_id' => $job_id, 'status' => 'queued', 'message' => 'Check queued for processing' ]); } /** * Check job status — reads from Redis (real-time) with file fallback */ function handleStatus() { $job_id = $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); // Try Redis first for real-time progress try { $redis = getRedis(); $redis_status = $redis->get(REDIS_STATUS_PREFIX . $job_id); if ($redis_status) { $status_data = json_decode($redis_status, true); $job_data['status'] = $status_data['status']; $job_data['progress'] = $status_data['progress'] ?? 0; $job_data['status_message'] = $status_data['message'] ?? ''; } } catch (Exception $e) { // Redis unavailable — fall through to file-based check } // File-based fallback: check if result exists if (file_exists($result_file)) { $job_data['status'] = 'completed'; $job_data['completed_at'] = date('Y-m-d H:i:s', filemtime($result_file)); file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); } else if (file_exists($error_log) && $job_data['status'] === 'processing') { $error_content = file_get_contents($error_log); if (!empty($error_content)) { $started = strtotime($job_data['started_at'] ?? 'now'); if (time() - $started > 300) { $job_data['status'] = 'failed'; $job_data['error'] = 'Process timeout or error'; $job_data['error_log'] = substr($error_content, -1000); } } } success($job_data); } /** * Get check results */ function handleResult() { $job_id = $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; if (!file_exists($result_file)) { error('Results not found. Check may still be processing.'); } $result = json_decode(file_get_contents($result_file), true); success($result); } /** * List all jobs */ function handleList() { $jobs = []; $files = glob(RESULTS_DIR . '/*.meta.json'); foreach ($files as $file) { $job_data = json_decode(file_get_contents($file), true); // Check if completed $result_file = str_replace('.meta.json', '.result.json', $file); if (file_exists($result_file)) { $job_data['status'] = 'completed'; } $jobs[] = $job_data; } // Sort by upload time (newest first) usort($jobs, function($a, $b) { return strtotime($b['uploaded_at']) - strtotime($a['uploaded_at']); }); success(['jobs' => $jobs]); } /** * Delete a job */ function handleDelete() { $job_id = $_POST['job_id'] ?? $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); // Delete files @unlink($job_data['filepath']); @unlink($meta_file); @unlink(RESULTS_DIR . '/' . $job_id . '.result.json'); success(['message' => 'Job deleted']); } /** * Debug endpoint */ function handleDebug() { // Debug endpoint only available in development mode require_once __DIR__ . '/auth.php'; if (!isDevelopmentMode()) { error('Debug endpoint disabled in production'); } $job_id = $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; $debug_info = [ 'job_id' => $job_id, 'meta_exists' => file_exists($meta_file), 'result_exists' => file_exists($result_file), 'error_log_exists' => file_exists($error_log), 'files' => [] ]; if (file_exists($meta_file)) { $debug_info['meta'] = json_decode(file_get_contents($meta_file), true); } if (file_exists($error_log)) { $debug_info['error_log'] = file_get_contents($error_log); } if (file_exists($result_file)) { $debug_info['result_size'] = filesize($result_file); } // Test Python $venv_python = __DIR__ . '/venv/bin/python3'; exec($venv_python . ' --version 2>&1', $python_version); $debug_info['python_version'] = implode("\n", $python_version); success($debug_info); } /** * Serve page images */ function handleImage() { $job_id = $_GET['job_id'] ?? ''; $page_num = $_GET['page'] ?? ''; if (empty($job_id) || empty($page_num)) { error('Job ID and page number required'); } $job_id = sanitizeJobId($job_id); // Find the image file $images_dir = RESULTS_DIR . '/' . $job_id . '.result_images'; $image_file = $images_dir . '/page_' . intval($page_num) . '.png'; if (!file_exists($image_file)) { http_response_code(404); header('Content-Type: application/json'); echo json_encode(['success' => false, 'error' => 'Image not found']); exit; } // Serve the image header('Content-Type: image/png'); header('Cache-Control: public, max-age=86400'); // Cache for 1 day readfile($image_file); exit; } /** * Auto-remediate PDF accessibility issues */ function handleRemediate() { $job_id = $_POST['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; if (!file_exists($meta_file) || !file_exists($result_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); $result_data = json_decode(file_get_contents($result_file), true); // Check if there are fixable issues if (!isset($result_data['auto_fixable_count']) || $result_data['auto_fixable_count'] == 0) { error('No auto-fixable issues found'); } $original_pdf = $job_data['filepath']; $remediated_pdf = UPLOAD_DIR . '/' . $job_id . '_remediated.pdf'; // Use absolute venv path $venv_python = __DIR__ . '/venv/bin/python3'; $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; $remediation_script = __DIR__ . '/pdf_remediation.py'; // Build command - apply all safe fixes $cmd = escapeshellcmd($python_bin . ' ' . $remediation_script) . ' ' . escapeshellarg($original_pdf) . ' ' . '--output ' . escapeshellarg($remediated_pdf) . ' ' . '--all'; // Set PATH for poppler $env_path = getenv('PATH'); $poppler_paths = '/opt/homebrew/bin:/usr/local/bin'; putenv("PATH={$poppler_paths}:{$env_path}"); // Run remediation $error_log = RESULTS_DIR . '/' . $job_id . '.remediation.log'; $cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1'; exec($cmd, $output, $return_code); // Check if remediation succeeded if ($return_code !== 0 || !file_exists($remediated_pdf)) { $log_content = file_exists($error_log) ? file_get_contents($error_log) : 'Unknown error'; error('Remediation failed: ' . substr($log_content, -500)); } // Store remediated file info $job_data['remediated_pdf'] = $remediated_pdf; $job_data['remediated_at'] = date('Y-m-d H:i:s'); file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); success([ 'job_id' => $job_id, 'remediated_pdf' => basename($remediated_pdf), 'original_filename' => $job_data['original_filename'], 'fixes_applied' => $result_data['auto_fixable_count'], 'download_url' => 'api.php?action=download&job_id=' . $job_id . '&type=remediated', 'message' => 'PDF remediated successfully' ]); } /** * Download original or remediated PDF */ function handleDownload() { $job_id = $_GET['job_id'] ?? ''; $type = $_GET['type'] ?? 'original'; // 'original' or 'remediated' if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); if ($type === 'remediated') { if (!isset($job_data['remediated_pdf']) || !file_exists($job_data['remediated_pdf'])) { error('Remediated PDF not found'); } $file_path = $job_data['remediated_pdf']; $filename = pathinfo($job_data['original_filename'], PATHINFO_FILENAME) . '_fixed.pdf'; } else { $file_path = $job_data['filepath']; $filename = $job_data['original_filename']; } // Serve the file header('Content-Type: application/pdf'); header('Content-Disposition: attachment; filename="' . $filename . '"'); header('Content-Length: ' . filesize($file_path)); readfile($file_path); exit; } /** * Get aggregate job statistics */ function handleStats() { $stats = [ 'total_jobs' => 0, 'completed' => 0, 'failed' => 0, 'processing' => 0, 'queue_length' => 0 ]; // Count jobs from meta files $files = glob(RESULTS_DIR . '/*.meta.json'); foreach ($files as $file) { $job = json_decode(file_get_contents($file), true); $stats['total_jobs']++; $result_file = str_replace('.meta.json', '.result.json', $file); if (file_exists($result_file)) { $stats['completed']++; } else if (($job['status'] ?? '') === 'failed') { $stats['failed']++; } else { $stats['processing']++; } } // Get queue length from Redis try { $redis = getRedis(); $stats['queue_length'] = $redis->lLen(REDIS_QUEUE); } catch (Exception $e) { // Redis unavailable } success($stats); } /** * Handle batch file upload — accepts multiple PDFs */ function handleBatchUpload() { if (!checkRateLimit('upload', 10, 3600)) { http_response_code(429); echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded.']); exit; } if (!isset($_FILES['pdfs']) || !is_array($_FILES['pdfs']['name'])) { error('No files uploaded. Use "pdfs[]" as the file field name.'); } $batch_id = 'batch_' . bin2hex(random_bytes(8)); $file_count = count($_FILES['pdfs']['name']); $uploaded = []; $errors = []; for ($i = 0; $i < $file_count; $i++) { $name = $_FILES['pdfs']['name'][$i]; $tmp = $_FILES['pdfs']['tmp_name'][$i]; $size = $_FILES['pdfs']['size'][$i]; $err = $_FILES['pdfs']['error'][$i]; if ($err !== UPLOAD_ERR_OK) { $errors[] = ['filename' => $name, 'error' => "Upload error code: $err"]; continue; } if ($size > MAX_FILE_SIZE) { $errors[] = ['filename' => $name, 'error' => 'File too large']; continue; } $ext = strtolower(pathinfo($name, PATHINFO_EXTENSION)); if (!in_array($ext, ALLOWED_EXTENSIONS)) { $errors[] = ['filename' => $name, 'error' => 'Not a PDF file']; continue; } $header = file_get_contents($tmp, false, null, 0, 5); if ($header !== '%PDF-') { $errors[] = ['filename' => $name, 'error' => 'Invalid PDF header']; continue; } $job_id = 'pdf_' . bin2hex(random_bytes(16)); $filename = $job_id . '.pdf'; $filepath = UPLOAD_DIR . '/' . $filename; if (!move_uploaded_file($tmp, $filepath)) { $errors[] = ['filename' => $name, 'error' => 'Failed to save']; continue; } $job_data = [ 'job_id' => $job_id, 'batch_id' => $batch_id, 'original_filename' => $name, 'uploaded_at' => date('Y-m-d H:i:s'), 'file_size' => $size, 'status' => 'uploaded', 'filepath' => $filepath ]; file_put_contents( RESULTS_DIR . '/' . $job_id . '.meta.json', json_encode($job_data, JSON_PRETTY_PRINT) ); $uploaded[] = ['job_id' => $job_id, 'filename' => $name]; } // Save batch manifest $batch_data = [ 'batch_id' => $batch_id, 'created_at' => date('Y-m-d H:i:s'), 'total_files' => $file_count, 'jobs' => array_column($uploaded, 'job_id'), ]; file_put_contents( RESULTS_DIR . '/' . $batch_id . '.batch.json', json_encode($batch_data, JSON_PRETTY_PRINT) ); success([ 'batch_id' => $batch_id, 'uploaded' => $uploaded, 'errors' => $errors, 'message' => count($uploaded) . ' of ' . $file_count . ' files uploaded' ]); } /** * Get status of a batch job */ function handleBatchStatus() { $batch_id = $_GET['batch_id'] ?? ''; if (empty($batch_id) || !preg_match('/^batch_[a-f0-9]+$/', $batch_id)) { error('Invalid batch ID'); } $batch_file = RESULTS_DIR . '/' . $batch_id . '.batch.json'; if (!file_exists($batch_file)) { error('Batch not found'); } $batch = json_decode(file_get_contents($batch_file), true); $jobs = []; $completed = 0; $failed = 0; foreach ($batch['jobs'] as $job_id) { $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; $status = 'unknown'; $score = null; $filename = ''; if (file_exists($meta_file)) { $meta = json_decode(file_get_contents($meta_file), true); $status = $meta['status'] ?? 'uploaded'; $filename = $meta['original_filename'] ?? ''; } if (file_exists($result_file)) { $status = 'completed'; $result = json_decode(file_get_contents($result_file), true); $score = $result['accessibility_score'] ?? null; $completed++; } else if ($status === 'failed') { $failed++; } $jobs[] = [ 'job_id' => $job_id, 'filename' => $filename, 'status' => $status, 'score' => $score ]; } $total = count($batch['jobs']); $overall_status = ($completed === $total) ? 'completed' : (($completed + $failed === $total) ? 'finished' : 'processing'); success([ 'batch_id' => $batch_id, 'status' => $overall_status, 'total' => $total, 'completed' => $completed, 'failed' => $failed, 'jobs' => $jobs ]); } /** * Export results as HTML or JSON */ function handleExport() { $job_id = $_GET['job_id'] ?? ''; $format = $_GET['format'] ?? 'json'; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; if (!file_exists($result_file)) { error('Results not found'); } $result = json_decode(file_get_contents($result_file), true); if ($format === 'html') { // Generate HTML report via Python $venv_python = __DIR__ . '/venv/bin/python3'; $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; $report_script = __DIR__ . '/report_generator.py'; $html_file = RESULTS_DIR . '/' . $job_id . '.report.html'; $cmd = escapeshellcmd($python_bin . ' ' . $report_script) . ' --input ' . escapeshellarg($result_file) . ' --output ' . escapeshellarg($html_file); exec($cmd . ' 2>&1', $output, $return_code); if ($return_code !== 0 || !file_exists($html_file)) { error('Report generation failed'); } header('Content-Type: text/html; charset=utf-8'); header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.html"'); readfile($html_file); exit; } // Default: JSON download header('Content-Type: application/json'); header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.json"'); echo json_encode($result, JSON_PRETTY_PRINT); exit; } /** * Send success response */ function success($data) { echo json_encode([ 'success' => true, 'data' => $data ]); exit; } /** * Send error response */ function error($message) { http_response_code(400); echo json_encode([ 'success' => false, 'error' => $message ]); exit; }