= $limit) { return false; } $timestamps[] = $now; file_put_contents($file, json_encode(array_values($timestamps))); return true; } /** * Sanitize job ID to prevent path traversal attacks */ function sanitizeJobId($job_id) { if (!preg_match('/^pdf_[a-f0-9]+$/', $job_id)) { error('Invalid job ID format'); } return $job_id; } /** * Get an OIDC identity token for authenticating to Cloud Run. * Uses a GCP service account key to create a self-signed JWT, * then exchanges it for an identity token via Google's OAuth endpoint. */ function getCloudRunToken() { static $cachedToken = null; static $cachedExpiry = 0; // Return cached token if still valid (with 5-min buffer) if ($cachedToken && time() < ($cachedExpiry - 300)) { return $cachedToken; } $keyPath = GCP_SA_KEY_PATH; if (!file_exists($keyPath)) { throw new Exception("GCP service account key not found: $keyPath"); } $sa = json_decode(file_get_contents($keyPath), true); if (!$sa || !isset($sa['client_email']) || !isset($sa['private_key'])) { throw new Exception("Invalid service account key file"); } $now = time(); $expiry = $now + 3600; // Build JWT header and claims $header = base64url_encode(json_encode(['alg' => 'RS256', 'typ' => 'JWT'])); $claims = base64url_encode(json_encode([ 'iss' => $sa['client_email'], 'sub' => $sa['client_email'], 'aud' => 'https://oauth2.googleapis.com/token', 'iat' => $now, 'exp' => $expiry, 'target_audience' => CLOUD_RUN_URL, ])); // Sign with RSA-SHA256 $signingInput = "$header.$claims"; $signature = ''; $privateKey = openssl_pkey_get_private($sa['private_key']); if (!$privateKey) { throw new Exception("Failed to parse service account private key"); } openssl_sign($signingInput, $signature, $privateKey, OPENSSL_ALGO_SHA256); $jwt = $signingInput . '.' . base64url_encode($signature); // Exchange JWT for identity token $ch = curl_init('https://oauth2.googleapis.com/token'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => http_build_query([ 'grant_type' => 'urn:ietf:params:oauth:grant-type:jwt-bearer', 'assertion' => $jwt, ]), CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 10, ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($httpCode !== 200) { throw new Exception("Failed to get identity token: HTTP $httpCode - $response"); } $tokenData = json_decode($response, true); if (!isset($tokenData['id_token'])) { throw new Exception("No id_token in response: $response"); } $cachedToken = $tokenData['id_token']; $cachedExpiry = $expiry; return $cachedToken; } /** * Base64url encode (no padding, URL-safe) */ function base64url_encode($data) { return rtrim(strtr(base64_encode($data), '+/', '-_'), '='); } /** * Get PostgreSQL PDO connection (lazy singleton) */ function getDB() { static $pdo = null; if ($pdo === null) { $dsn = sprintf('pgsql:host=%s;port=%d;dbname=%s', DB_HOST, DB_PORT, DB_NAME); $pdo = new PDO($dsn, DB_USER, DB_PASSWORD, [ PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION, ]); } return $pdo; } /** * Insert or update a job record in PostgreSQL */ function updateJobInDatabase($job_id, $filename, $status, $results = null) { try { $pdo = getDB(); $score = null; $grade = null; $total_issues = null; $critical_count = null; $error_count = null; $warning_count = null; $result_json = null; $processing_time = null; if ($results) { $score = $results['accessibility_score'] ?? null; $grade = $results['grade'] ?? null; $issues = $results['issues'] ?? []; $total_issues = count($issues); $critical_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'CRITICAL')); $error_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'ERROR')); $warning_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'WARNING')); $result_json = json_encode($results); $processing_time = $results['stats']['processing_time'] ?? null; } $sql = "INSERT INTO jobs (job_id, filename, status, score, grade, total_issues, critical_count, error_count, warning_count, result_json, processing_time, completed_at) VALUES (:job_id, :filename, :status, :score, :grade, :total_issues, :critical_count, :error_count, :warning_count, :result_json::jsonb, :processing_time, CASE WHEN :status2 = 'completed' THEN NOW() ELSE NULL END) ON CONFLICT (job_id) DO UPDATE SET status = EXCLUDED.status, score = COALESCE(EXCLUDED.score, jobs.score), grade = COALESCE(EXCLUDED.grade, jobs.grade), total_issues = COALESCE(EXCLUDED.total_issues, jobs.total_issues), critical_count = COALESCE(EXCLUDED.critical_count, jobs.critical_count), error_count = COALESCE(EXCLUDED.error_count, jobs.error_count), warning_count = COALESCE(EXCLUDED.warning_count, jobs.warning_count), result_json = COALESCE(EXCLUDED.result_json, jobs.result_json), processing_time = COALESCE(EXCLUDED.processing_time, jobs.processing_time), completed_at = CASE WHEN EXCLUDED.status = 'completed' THEN NOW() ELSE jobs.completed_at END"; $stmt = $pdo->prepare($sql); $stmt->execute([ ':job_id' => $job_id, ':filename' => $filename, ':status' => $status, ':score' => $score, ':grade' => $grade, ':total_issues' => $total_issues, ':critical_count' => $critical_count, ':error_count' => $error_count, ':warning_count' => $warning_count, ':result_json' => $result_json, ':processing_time' => $processing_time, ':status2' => $status, ]); } catch (Exception $e) { error_log("DB update failed for $job_id: " . $e->getMessage()); } } // CORS headers for API $allowed_origins = [ 'https://ai-sandbox.oliver.solutions', 'http://localhost:8888', 'http://127.0.0.1:8888', 'http://localhost:8000', 'http://127.0.0.1:8000', ]; $origin = $_SERVER['HTTP_ORIGIN'] ?? ''; if (in_array($origin, $allowed_origins) || (function_exists('isDevelopmentMode') && isDevelopmentMode())) { header('Access-Control-Allow-Origin: ' . ($origin ?: '*')); } else if ($origin) { header('Access-Control-Allow-Origin: null'); } else { header('Access-Control-Allow-Origin: ' . ($allowed_origins[0])); } header('Access-Control-Allow-Methods: POST, GET, OPTIONS, DELETE'); header('Access-Control-Allow-Headers: Content-Type, X-API-Key, Authorization'); header('Content-Type: application/json'); // Handle preflight if ($_SERVER['REQUEST_METHOD'] === 'OPTIONS') { exit(0); } // Require authentication for all API requests require_once __DIR__ . '/auth.php'; requireAuth(); // Get action $action = $_GET['action'] ?? $_POST['action'] ?? ''; switch ($action) { case 'upload': handleUpload(); break; case 'check': handleCheck(); break; case 'status': handleStatus(); break; case 'result': handleResult(); break; case 'list': handleList(); break; case 'delete': handleDelete(); break; case 'debug': handleDebug(); break; case 'image': handleImage(); break; case 'remediate': handleRemediate(); break; case 'download': handleDownload(); break; case 'stats': handleStats(); break; case 'batch_upload': handleBatchUpload(); break; case 'batch_status': handleBatchStatus(); break; case 'export': handleExport(); break; case 'dismiss': handleDismiss(); break; case 'undismiss': handleUndismiss(); break; default: error('Invalid action'); } /** * Handle file upload */ function handleUpload() { // Rate limit: 10 uploads/hour per IP if (!checkRateLimit('upload', 10, 3600)) { http_response_code(429); echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded. Try again later.']); exit; } if (!isset($_FILES['pdf'])) { error('No file uploaded'); } $file = $_FILES['pdf']; // Validate file if ($file['error'] !== UPLOAD_ERR_OK) { error('Upload error: ' . $file['error']); } if ($file['size'] > MAX_FILE_SIZE) { error('File too large. Max size: ' . (MAX_FILE_SIZE / 1024 / 1024) . 'MB'); } $ext = strtolower(pathinfo($file['name'], PATHINFO_EXTENSION)); if (!in_array($ext, ALLOWED_EXTENSIONS)) { error('Invalid file type. Only PDF files allowed.'); } // Validate PDF magic bytes $header = file_get_contents($file['tmp_name'], false, null, 0, 5); if ($header !== '%PDF-') { error('File is not a valid PDF (invalid file header)'); } // Generate cryptographically secure job ID $job_id = 'pdf_' . bin2hex(random_bytes(16)); $filename = $job_id . '.pdf'; $filepath = UPLOAD_DIR . '/' . $filename; // Move file if (!move_uploaded_file($file['tmp_name'], $filepath)) { error('Failed to save file'); } // Create job metadata $job_data = [ 'job_id' => $job_id, 'original_filename' => $file['name'], 'uploaded_at' => date('Y-m-d H:i:s'), 'file_size' => $file['size'], 'status' => 'uploaded', 'filepath' => $filepath ]; file_put_contents( RESULTS_DIR . '/' . $job_id . '.meta.json', json_encode($job_data, JSON_PRETTY_PRINT) ); success([ 'job_id' => $job_id, 'filename' => $file['name'], 'message' => 'File uploaded successfully' ]); } /** * Handle PDF accessibility check — send PDF to Cloud Run synchronously */ function handleCheck() { set_time_limit(900); // Allow up to 15 minutes $job_id = $_POST['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); // Rate limit: 30 checks/hour per IP if (!checkRateLimit('check', 30, 3600)) { http_response_code(429); echo json_encode(['success' => false, 'error' => 'Rate limit exceeded. Try again later.']); exit; } $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); $quick_mode = $_POST['quick_mode'] ?? false; // Update meta to processing $job_data['status'] = 'processing'; $job_data['started_at'] = date('Y-m-d H:i:s'); file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); // If Cloud Run URL is configured, send to Cloud Run if (!empty(CLOUD_RUN_URL)) { try { $token = getCloudRunToken(); $pdf_path = $job_data['filepath']; if (!file_exists($pdf_path)) { error('PDF file not found on server'); } // Build multipart POST to Cloud Run $ch = curl_init(CLOUD_RUN_URL . '/check'); $postFields = [ 'pdf' => new CURLFile($pdf_path, 'application/pdf', basename($pdf_path)), 'job_id' => $job_id, 'quick_mode' => $quick_mode ? 'true' : 'false', 'original_filename' => $job_data['original_filename'] ?? '', ]; curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $postFields, CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => CLOUD_RUN_TIMEOUT, CURLOPT_HTTPHEADER => [ 'Authorization: Bearer ' . $token, ], ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $curlError = curl_error($ch); curl_close($ch); if ($curlError) { throw new Exception("Cloud Run request failed: $curlError"); } if ($httpCode !== 200) { $errorBody = json_decode($response, true); $errorMsg = $errorBody['error'] ?? "HTTP $httpCode"; throw new Exception("Cloud Run returned error: $errorMsg"); } $result = json_decode($response, true); if (!$result || !isset($result['success'])) { throw new Exception("Invalid response from Cloud Run"); } if (!$result['success']) { throw new Exception($result['error'] ?? 'Unknown Cloud Run error'); } $checkResult = $result['data']; // Write result JSON to disk $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; file_put_contents($result_file, json_encode($checkResult, JSON_PRETTY_PRINT)); // Update meta $job_data['status'] = 'completed'; $job_data['completed_at'] = date('Y-m-d H:i:s'); file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); // Update PostgreSQL updateJobInDatabase($job_id, $job_data['original_filename'] ?? '', 'completed', $checkResult); success([ 'job_id' => $job_id, 'status' => 'completed', 'message' => 'Check completed' ]); } catch (Exception $e) { // Mark as failed $job_data['status'] = 'failed'; $job_data['error'] = $e->getMessage(); file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); updateJobInDatabase($job_id, $job_data['original_filename'] ?? '', 'failed'); error('Processing failed: ' . $e->getMessage()); } } else { // Fallback to local exec (development without Cloud Run) $pdf_path = $job_data['filepath']; $output_path = RESULTS_DIR . '/' . $job_id . '.result.json'; $venv_python = __DIR__ . '/venv/bin/python3'; $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; $cmd = escapeshellcmd($python_bin . ' ' . PYTHON_SCRIPT) . ' ' . escapeshellarg($pdf_path) . ' ' . '--output ' . escapeshellarg($output_path); if ($quick_mode) { $cmd .= ' --quick'; } $anthropic_key = $_POST['anthropic_key'] ?? getenv('ANTHROPIC_API_KEY'); $google_key = $_POST['google_key'] ?? $_POST['google_credentials'] ?? getenv('GOOGLE_API_KEY'); if ($anthropic_key) { $cmd .= ' --anthropic-key ' . escapeshellarg($anthropic_key); } if ($google_key) { if (file_exists($google_key)) { $cmd .= ' --google-credentials ' . escapeshellarg($google_key); } else { $cmd .= ' --google-key ' . escapeshellarg($google_key); } } $env_path = getenv('PATH'); putenv("PATH=/opt/homebrew/bin:/usr/local/bin:{$env_path}"); $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; $cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1 &'; exec($cmd, $output, $return_code); success([ 'job_id' => $job_id, 'status' => 'processing', 'message' => 'Check started (local mode)' ]); } } /** * Check job status — pure file-based */ function handleStatus() { $job_id = $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); // Check if result file exists (definitive completion signal) if (file_exists($result_file)) { $job_data['status'] = 'completed'; $job_data['completed_at'] = $job_data['completed_at'] ?? date('Y-m-d H:i:s', filemtime($result_file)); } else if (file_exists($error_log) && in_array($job_data['status'], ['processing', 'queued'])) { $error_content = file_get_contents($error_log); if (!empty($error_content)) { $started = strtotime($job_data['started_at'] ?? 'now'); if (time() - $started > 900) { $job_data['status'] = 'failed'; $job_data['error'] = 'Process timeout or error'; $job_data['error_log'] = substr($error_content, -1000); } } } $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; $job_data['dismissed_indices'] = file_exists($dismiss_file) ? array_map('intval', array_keys(json_decode(file_get_contents($dismiss_file), true) ?: [])) : []; success($job_data); } /** * Get check results */ function handleResult() { $job_id = $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; if (!file_exists($result_file)) { error('Results not found. Check may still be processing.'); } $result = json_decode(file_get_contents($result_file), true); // Inject dismissed indices so frontend can restore dismiss state on reload $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; $result['dismissed_indices'] = file_exists($dismiss_file) ? array_map('intval', array_keys(json_decode(file_get_contents($dismiss_file), true) ?: [])) : []; success($result); } /** * List all jobs */ function handleList() { $jobs = []; $files = glob(RESULTS_DIR . '/*.meta.json'); foreach ($files as $file) { $job_data = json_decode(file_get_contents($file), true); // Check if completed $result_file = str_replace('.meta.json', '.result.json', $file); if (file_exists($result_file)) { $job_data['status'] = 'completed'; } $jobs[] = $job_data; } // Sort by upload time (newest first) usort($jobs, function($a, $b) { return strtotime($b['uploaded_at']) - strtotime($a['uploaded_at']); }); success(['jobs' => $jobs]); } /** * Delete a job */ function handleDelete() { $job_id = $_POST['job_id'] ?? $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); // Delete files @unlink($job_data['filepath']); @unlink($meta_file); @unlink(RESULTS_DIR . '/' . $job_id . '.result.json'); success(['message' => 'Job deleted']); } /** * Debug endpoint */ function handleDebug() { // Debug endpoint only available in development mode require_once __DIR__ . '/auth.php'; if (!isDevelopmentMode()) { error('Debug endpoint disabled in production'); } $job_id = $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; $debug_info = [ 'job_id' => $job_id, 'meta_exists' => file_exists($meta_file), 'result_exists' => file_exists($result_file), 'error_log_exists' => file_exists($error_log), 'cloud_run_url' => CLOUD_RUN_URL ?: '(not configured — local mode)', 'files' => [] ]; if (file_exists($meta_file)) { $debug_info['meta'] = json_decode(file_get_contents($meta_file), true); } if (file_exists($error_log)) { $debug_info['error_log'] = file_get_contents($error_log); } if (file_exists($result_file)) { $debug_info['result_size'] = filesize($result_file); } // Test Python $venv_python = __DIR__ . '/venv/bin/python3'; exec($venv_python . ' --version 2>&1', $python_version); $debug_info['python_version'] = implode("\n", $python_version); success($debug_info); } /** * Serve page images — redirect to GCS URL or serve local file */ function handleImage() { $job_id = $_GET['job_id'] ?? ''; $page_num = $_GET['page'] ?? ''; if (empty($job_id) || empty($page_num)) { error('Job ID and page number required'); } $job_id = sanitizeJobId($job_id); $page_num = intval($page_num); // Check result JSON for GCS URLs $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; if (file_exists($result_file)) { $result = json_decode(file_get_contents($result_file), true); $page_images = $result['page_images'] ?? []; // Check if the page image value is a URL (GCS) $image_value = $page_images[$page_num] ?? $page_images[strval($page_num)] ?? null; if ($image_value && (strpos($image_value, 'http://') === 0 || strpos($image_value, 'https://') === 0)) { // Redirect to GCS URL header('HTTP/1.1 302 Found'); header('Location: ' . $image_value); header('Cache-Control: public, max-age=86400'); exit; } } // Fallback: serve local image file $images_dir = RESULTS_DIR . '/' . $job_id . '.result_images'; $image_file = $images_dir . '/page_' . $page_num . '.png'; if (!file_exists($image_file)) { http_response_code(404); header('Content-Type: application/json'); echo json_encode(['success' => false, 'error' => 'Image not found']); exit; } // Serve the image header('Content-Type: image/png'); header('Cache-Control: public, max-age=86400'); // Cache for 1 day readfile($image_file); exit; } /** * Auto-remediate PDF accessibility issues */ function handleRemediate() { $job_id = $_POST['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; if (!file_exists($meta_file) || !file_exists($result_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); $result_data = json_decode(file_get_contents($result_file), true); // Check if there are fixable issues if (!isset($result_data['auto_fixable_count']) || $result_data['auto_fixable_count'] == 0) { error('No auto-fixable issues found'); } $original_pdf = $job_data['filepath']; $remediated_pdf = UPLOAD_DIR . '/' . $job_id . '_remediated.pdf'; // Use absolute venv path $venv_python = __DIR__ . '/venv/bin/python3'; $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; $remediation_script = __DIR__ . '/pdf_remediation.py'; // Build command - apply all safe fixes $cmd = escapeshellcmd($python_bin . ' ' . $remediation_script) . ' ' . escapeshellarg($original_pdf) . ' ' . '--output ' . escapeshellarg($remediated_pdf) . ' ' . '--all'; // Set PATH for poppler $env_path = getenv('PATH'); $poppler_paths = '/opt/homebrew/bin:/usr/local/bin'; putenv("PATH={$poppler_paths}:{$env_path}"); // Run remediation $error_log = RESULTS_DIR . '/' . $job_id . '.remediation.log'; $cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1'; exec($cmd, $output, $return_code); // Check if remediation succeeded if ($return_code !== 0 || !file_exists($remediated_pdf)) { $log_content = file_exists($error_log) ? file_get_contents($error_log) : 'Unknown error'; $truncated = strlen($log_content) > 2000 ? '...' . substr($log_content, -2000) : $log_content; error('Remediation failed: ' . $truncated); } // Store remediated file info $job_data['remediated_pdf'] = $remediated_pdf; $job_data['remediated_at'] = date('Y-m-d H:i:s'); file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); success([ 'job_id' => $job_id, 'remediated_pdf' => basename($remediated_pdf), 'original_filename' => $job_data['original_filename'], 'fixes_applied' => $result_data['auto_fixable_count'], 'download_url' => 'api.php?action=download&job_id=' . $job_id . '&type=remediated', 'message' => 'PDF remediated successfully' ]); } /** * Download original or remediated PDF */ function handleDownload() { $job_id = $_GET['job_id'] ?? ''; $type = $_GET['type'] ?? 'original'; // 'original' or 'remediated' if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); if ($type === 'remediated') { if (!isset($job_data['remediated_pdf']) || !file_exists($job_data['remediated_pdf'])) { error('Remediated PDF not found'); } $file_path = $job_data['remediated_pdf']; $filename = pathinfo($job_data['original_filename'], PATHINFO_FILENAME) . '_fixed.pdf'; } else { $file_path = $job_data['filepath']; $filename = $job_data['original_filename']; } // Serve the file header('Content-Type: application/pdf'); header('Content-Disposition: attachment; filename="' . $filename . '"'); header('Content-Length: ' . filesize($file_path)); readfile($file_path); exit; } /** * Get aggregate job statistics */ function handleStats() { $stats = [ 'total_jobs' => 0, 'completed' => 0, 'failed' => 0, 'processing' => 0, ]; // Count jobs from meta files $files = glob(RESULTS_DIR . '/*.meta.json'); foreach ($files as $file) { $job = json_decode(file_get_contents($file), true); $stats['total_jobs']++; $result_file = str_replace('.meta.json', '.result.json', $file); if (file_exists($result_file)) { $stats['completed']++; } else if (($job['status'] ?? '') === 'failed') { $stats['failed']++; } else { $stats['processing']++; } } success($stats); } /** * Handle batch file upload — accepts multiple PDFs */ function handleBatchUpload() { if (!checkRateLimit('upload', 10, 3600)) { http_response_code(429); echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded.']); exit; } if (!isset($_FILES['pdfs']) || !is_array($_FILES['pdfs']['name'])) { error('No files uploaded. Use "pdfs[]" as the file field name.'); } $batch_id = 'batch_' . bin2hex(random_bytes(8)); $file_count = count($_FILES['pdfs']['name']); $uploaded = []; $errors = []; for ($i = 0; $i < $file_count; $i++) { $name = $_FILES['pdfs']['name'][$i]; $tmp = $_FILES['pdfs']['tmp_name'][$i]; $size = $_FILES['pdfs']['size'][$i]; $err = $_FILES['pdfs']['error'][$i]; if ($err !== UPLOAD_ERR_OK) { $errors[] = ['filename' => $name, 'error' => "Upload error code: $err"]; continue; } if ($size > MAX_FILE_SIZE) { $errors[] = ['filename' => $name, 'error' => 'File too large']; continue; } $ext = strtolower(pathinfo($name, PATHINFO_EXTENSION)); if (!in_array($ext, ALLOWED_EXTENSIONS)) { $errors[] = ['filename' => $name, 'error' => 'Not a PDF file']; continue; } $header = file_get_contents($tmp, false, null, 0, 5); if ($header !== '%PDF-') { $errors[] = ['filename' => $name, 'error' => 'Invalid PDF header']; continue; } $job_id = 'pdf_' . bin2hex(random_bytes(16)); $filename = $job_id . '.pdf'; $filepath = UPLOAD_DIR . '/' . $filename; if (!move_uploaded_file($tmp, $filepath)) { $errors[] = ['filename' => $name, 'error' => 'Failed to save']; continue; } $job_data = [ 'job_id' => $job_id, 'batch_id' => $batch_id, 'original_filename' => $name, 'uploaded_at' => date('Y-m-d H:i:s'), 'file_size' => $size, 'status' => 'uploaded', 'filepath' => $filepath ]; file_put_contents( RESULTS_DIR . '/' . $job_id . '.meta.json', json_encode($job_data, JSON_PRETTY_PRINT) ); $uploaded[] = ['job_id' => $job_id, 'filename' => $name]; } // Save batch manifest $batch_data = [ 'batch_id' => $batch_id, 'created_at' => date('Y-m-d H:i:s'), 'total_files' => $file_count, 'jobs' => array_column($uploaded, 'job_id'), ]; file_put_contents( RESULTS_DIR . '/' . $batch_id . '.batch.json', json_encode($batch_data, JSON_PRETTY_PRINT) ); success([ 'batch_id' => $batch_id, 'uploaded' => $uploaded, 'errors' => $errors, 'message' => count($uploaded) . ' of ' . $file_count . ' files uploaded' ]); } /** * Get status of a batch job */ function handleBatchStatus() { $batch_id = $_GET['batch_id'] ?? ''; if (empty($batch_id) || !preg_match('/^batch_[a-f0-9]+$/', $batch_id)) { error('Invalid batch ID'); } $batch_file = RESULTS_DIR . '/' . $batch_id . '.batch.json'; if (!file_exists($batch_file)) { error('Batch not found'); } $batch = json_decode(file_get_contents($batch_file), true); $jobs = []; $completed = 0; $failed = 0; foreach ($batch['jobs'] as $job_id) { $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; $status = 'unknown'; $score = null; $filename = ''; if (file_exists($meta_file)) { $meta = json_decode(file_get_contents($meta_file), true); $status = $meta['status'] ?? 'uploaded'; $filename = $meta['original_filename'] ?? ''; } if (file_exists($result_file)) { $status = 'completed'; $result = json_decode(file_get_contents($result_file), true); $score = $result['accessibility_score'] ?? null; $completed++; } else if ($status === 'failed') { $failed++; } $jobs[] = [ 'job_id' => $job_id, 'filename' => $filename, 'status' => $status, 'score' => $score ]; } $total = count($batch['jobs']); $overall_status = ($completed === $total) ? 'completed' : (($completed + $failed === $total) ? 'finished' : 'processing'); success([ 'batch_id' => $batch_id, 'status' => $overall_status, 'total' => $total, 'completed' => $completed, 'failed' => $failed, 'jobs' => $jobs ]); } /** * Export results as HTML or JSON */ function handleExport() { $job_id = $_GET['job_id'] ?? ''; $format = $_GET['format'] ?? 'json'; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; if (!file_exists($result_file)) { error('Results not found'); } $result = json_decode(file_get_contents($result_file), true); if ($format === 'html') { // Generate HTML report via Python $venv_python = __DIR__ . '/venv/bin/python3'; $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; $report_script = __DIR__ . '/report_generator.py'; $html_file = RESULTS_DIR . '/' . $job_id . '.report.html'; $cmd = escapeshellcmd($python_bin . ' ' . $report_script) . ' --input ' . escapeshellarg($result_file) . ' --output ' . escapeshellarg($html_file); exec($cmd . ' 2>&1', $output, $return_code); if ($return_code !== 0 || !file_exists($html_file)) { error('Report generation failed'); } header('Content-Type: text/html; charset=utf-8'); header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.html"'); readfile($html_file); exit; } if ($format === 'pdf') { // Generate PDF report via Python WeasyPrint $venv_python = __DIR__ . '/venv/bin/python3'; $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; $report_script = __DIR__ . '/report_generator.py'; $pdf_file = RESULTS_DIR . '/' . $job_id . '.report.pdf'; $cmd = escapeshellcmd($python_bin . ' ' . $report_script) . ' --input ' . escapeshellarg($result_file) . ' --output ' . escapeshellarg($pdf_file) . ' --format pdf'; exec($cmd . ' 2>&1', $output, $return_code); if ($return_code !== 0 || !file_exists($pdf_file)) { error('PDF report generation failed: ' . implode("\n", $output)); } header('Content-Type: application/pdf'); header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.pdf"'); header('Content-Length: ' . filesize($pdf_file)); readfile($pdf_file); exit; } // Default: JSON download header('Content-Type: application/json'); header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.json"'); echo json_encode($result, JSON_PRETTY_PRINT); exit; } /** * Dismiss an issue (mark as false positive) */ function handleDismiss() { $data = json_decode(file_get_contents('php://input'), true) ?: []; $job_id = $data['job_id'] ?? ''; $issue_index = isset($data['issue_index']) ? (int)$data['issue_index'] : -1; $reason = substr($data['reason'] ?? '', 0, 255); if (empty($job_id) || $issue_index < 0) { error('job_id and issue_index required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; $dismissed = file_exists($dismiss_file) ? json_decode(file_get_contents($dismiss_file), true) : []; $dismissed[$issue_index] = ['reason' => $reason, 'dismissed_at' => date('Y-m-d H:i:s')]; file_put_contents($dismiss_file, json_encode($dismissed)); success(['dismissed' => true, 'issue_index' => $issue_index]); } /** * Undismiss an issue */ function handleUndismiss() { $data = json_decode(file_get_contents('php://input'), true) ?: []; $job_id = $data['job_id'] ?? ''; $issue_index = isset($data['issue_index']) ? (int)$data['issue_index'] : -1; if (empty($job_id) || $issue_index < 0) { error('job_id and issue_index required'); } $job_id = sanitizeJobId($job_id); $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; if (file_exists($dismiss_file)) { $dismissed = json_decode(file_get_contents($dismiss_file), true); unset($dismissed[$issue_index]); file_put_contents($dismiss_file, json_encode($dismissed)); } success(['undismissed' => true, 'issue_index' => $issue_index]); } /** * Send success response */ function success($data) { echo json_encode([ 'success' => true, 'data' => $data ]); exit; } /** * Send error response */ function error($message) { http_response_code(400); echo json_encode([ 'success' => false, 'error' => $message ]); exit; }