$payload['oid'] ?? null, 'name' => $payload['name'] ?? ($payload['unique_name'] ?? ($payload['upn'] ?? null)), 'email'=> $payload['email'] ?? ($payload['upn'] ?? null), ]; } define('CLOUD_RUN_TIMEOUT', 900); // 15 minutes define('GCP_SA_KEY_PATH', getenv('GCP_SA_KEY_PATH') ?: __DIR__ . '/pdf-api-invoker-key.json'); define('RATE_LIMIT_DIR', __DIR__ . '/rate_limits'); // Database configuration define('DB_HOST', getenv('DB_HOST') ?: 'localhost'); define('DB_PORT', intval(getenv('DB_PORT') ?: 5432)); define('DB_NAME', getenv('DB_NAME') ?: 'pdf_checker'); define('DB_USER', getenv('DB_USER') ?: 'pdf_checker'); define('DB_PASSWORD', getenv('DB_PASSWORD') ?: 'dev_password'); // Create directories if they don't exist if (!is_dir(UPLOAD_DIR)) mkdir(UPLOAD_DIR, 0755, true); if (!is_dir(RESULTS_DIR)) mkdir(RESULTS_DIR, 0755, true); if (!is_dir(RATE_LIMIT_DIR)) mkdir(RATE_LIMIT_DIR, 0755, true); /** * Check rate limit via filesystem. Returns true if allowed. * Stores timestamps in JSON files per IP+action. */ function checkRateLimit($action, $limit, $window) { $ip = $_SERVER['REMOTE_ADDR'] ?? 'unknown'; $key = preg_replace('/[^a-zA-Z0-9_-]/', '_', $ip . '_' . $action); $file = RATE_LIMIT_DIR . '/' . $key . '.json'; $now = time(); $timestamps = []; if (file_exists($file)) { $data = json_decode(file_get_contents($file), true); if (is_array($data)) { // Filter to only timestamps within the window $timestamps = array_filter($data, function($ts) use ($now, $window) { return ($now - $ts) < $window; }); } } if (count($timestamps) >= $limit) { return false; } $timestamps[] = $now; file_put_contents($file, json_encode(array_values($timestamps))); return true; } /** * Sanitize job ID to prevent path traversal attacks */ function sanitizeJobId($job_id) { if (!preg_match('/^pdf_[a-f0-9]+$/', $job_id)) { error('Invalid job ID format'); } return $job_id; } /** * Get an OIDC identity token for authenticating to Cloud Run. * Uses a GCP service account key to create a self-signed JWT, * then exchanges it for an identity token via Google's OAuth endpoint. */ function getCloudRunToken() { static $cachedToken = null; static $cachedExpiry = 0; // Return cached token if still valid (with 5-min buffer) if ($cachedToken && time() < ($cachedExpiry - 300)) { return $cachedToken; } $keyPath = GCP_SA_KEY_PATH; if (!file_exists($keyPath)) { throw new Exception("GCP service account key not found: $keyPath"); } $sa = json_decode(file_get_contents($keyPath), true); if (!$sa || !isset($sa['client_email']) || !isset($sa['private_key'])) { throw new Exception("Invalid service account key file"); } $now = time(); $expiry = $now + 3600; // Build JWT header and claims $header = base64url_encode(json_encode(['alg' => 'RS256', 'typ' => 'JWT'])); $claims = base64url_encode(json_encode([ 'iss' => $sa['client_email'], 'sub' => $sa['client_email'], 'aud' => 'https://oauth2.googleapis.com/token', 'iat' => $now, 'exp' => $expiry, 'target_audience' => CLOUD_RUN_URL, ])); // Sign with RSA-SHA256 $signingInput = "$header.$claims"; $signature = ''; $privateKey = openssl_pkey_get_private($sa['private_key']); if (!$privateKey) { throw new Exception("Failed to parse service account private key"); } openssl_sign($signingInput, $signature, $privateKey, OPENSSL_ALGO_SHA256); $jwt = $signingInput . '.' . base64url_encode($signature); // Exchange JWT for identity token $ch = curl_init('https://oauth2.googleapis.com/token'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => http_build_query([ 'grant_type' => 'urn:ietf:params:oauth:grant-type:jwt-bearer', 'assertion' => $jwt, ]), CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 10, ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($httpCode !== 200) { throw new Exception("Failed to get identity token: HTTP $httpCode - $response"); } $tokenData = json_decode($response, true); if (!isset($tokenData['id_token'])) { throw new Exception("No id_token in response: $response"); } $cachedToken = $tokenData['id_token']; $cachedExpiry = $expiry; return $cachedToken; } /** * Base64url encode (no padding, URL-safe) */ function base64url_encode($data) { return rtrim(strtr(base64_encode($data), '+/', '-_'), '='); } /** * Get PostgreSQL PDO connection (lazy singleton) */ function getDB() { static $pdo = null; if ($pdo === null) { $dsn = sprintf('pgsql:host=%s;port=%d;dbname=%s', DB_HOST, DB_PORT, DB_NAME); $pdo = new PDO($dsn, DB_USER, DB_PASSWORD, [ PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION, ]); } return $pdo; } /** * Insert or update a job record in PostgreSQL */ function updateJobInDatabase($job_id, $filename, $status, $results = null) { try { $pdo = getDB(); $score = null; $grade = null; $total_issues = null; $critical_count = null; $error_count = null; $warning_count = null; $result_json = null; $processing_time = null; if ($results) { $score = $results['accessibility_score'] ?? null; $grade = $results['grade'] ?? null; $issues = $results['issues'] ?? []; $total_issues = count($issues); $critical_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'CRITICAL')); $error_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'ERROR')); $warning_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'WARNING')); $result_json = json_encode($results); $processing_time = $results['stats']['processing_time'] ?? null; } $sql = "INSERT INTO jobs (job_id, filename, status, score, grade, total_issues, critical_count, error_count, warning_count, result_json, processing_time, completed_at) VALUES (:job_id, :filename, :status, :score, :grade, :total_issues, :critical_count, :error_count, :warning_count, :result_json::jsonb, :processing_time, CASE WHEN :status2 = 'completed' THEN NOW() ELSE NULL END) ON CONFLICT (job_id) DO UPDATE SET status = EXCLUDED.status, score = COALESCE(EXCLUDED.score, jobs.score), grade = COALESCE(EXCLUDED.grade, jobs.grade), total_issues = COALESCE(EXCLUDED.total_issues, jobs.total_issues), critical_count = COALESCE(EXCLUDED.critical_count, jobs.critical_count), error_count = COALESCE(EXCLUDED.error_count, jobs.error_count), warning_count = COALESCE(EXCLUDED.warning_count, jobs.warning_count), result_json = COALESCE(EXCLUDED.result_json, jobs.result_json), processing_time = COALESCE(EXCLUDED.processing_time, jobs.processing_time), completed_at = CASE WHEN EXCLUDED.status = 'completed' THEN NOW() ELSE jobs.completed_at END"; $stmt = $pdo->prepare($sql); $stmt->execute([ ':job_id' => $job_id, ':filename' => $filename, ':status' => $status, ':score' => $score, ':grade' => $grade, ':total_issues' => $total_issues, ':critical_count' => $critical_count, ':error_count' => $error_count, ':warning_count' => $warning_count, ':result_json' => $result_json, ':processing_time' => $processing_time, ':status2' => $status, ]); } catch (Exception $e) { error_log("DB update failed for $job_id: " . $e->getMessage()); } } // CORS headers for API $allowed_origins = [ 'https://ai-sandbox.oliver.solutions', 'http://localhost:8888', 'http://127.0.0.1:8888', 'http://localhost:8000', 'http://127.0.0.1:8000', ]; $origin = $_SERVER['HTTP_ORIGIN'] ?? ''; if (in_array($origin, $allowed_origins) || (function_exists('isDevelopmentMode') && isDevelopmentMode())) { header('Access-Control-Allow-Origin: ' . ($origin ?: '*')); } else if ($origin) { header('Access-Control-Allow-Origin: null'); } else { header('Access-Control-Allow-Origin: ' . ($allowed_origins[0])); } header('Access-Control-Allow-Methods: POST, GET, OPTIONS, DELETE'); header('Access-Control-Allow-Headers: Content-Type, X-API-Key, Authorization'); header('Content-Type: application/json'); // Handle preflight if ($_SERVER['REQUEST_METHOD'] === 'OPTIONS') { exit(0); } // Require authentication for all API requests require_once __DIR__ . '/auth.php'; requireAuth(); // Get action $action = $_GET['action'] ?? $_POST['action'] ?? ''; switch ($action) { case 'upload': handleUpload(); break; case 'check': handleCheck(); break; case 'status': handleStatus(); break; case 'result': handleResult(); break; case 'list': handleList(); break; case 'delete': handleDelete(); break; case 'debug': handleDebug(); break; case 'image': handleImage(); break; case 'remediate': handleRemediate(); break; case 'download': handleDownload(); break; case 'stats': handleStats(); break; case 'batch_upload': handleBatchUpload(); break; case 'batch_status': handleBatchStatus(); break; case 'export': handleExport(); break; case 'save_adjusted_result': handleSaveAdjustedResult(); break; case 'dismiss': handleDismiss(); break; case 'undismiss': handleUndismiss(); break; case 'override_check': handleOverrideCheck(); break; case 'unoverride_check': handleUnoverrideCheck(); break; default: error('Invalid action'); } /** * Handle file upload */ function handleUpload() { // Rate limit: 10 uploads/hour per IP if (!checkRateLimit('upload', 10, 3600)) { http_response_code(429); echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded. Try again later.']); exit; } if (!isset($_FILES['pdf'])) { error('No file uploaded'); } $file = $_FILES['pdf']; // Validate file if ($file['error'] !== UPLOAD_ERR_OK) { error('Upload error: ' . $file['error']); } if ($file['size'] > MAX_FILE_SIZE) { error('File too large. Max size: ' . (MAX_FILE_SIZE / 1024 / 1024) . 'MB'); } $ext = strtolower(pathinfo($file['name'], PATHINFO_EXTENSION)); if (!in_array($ext, ALLOWED_EXTENSIONS)) { error('Invalid file type. Only PDF files allowed.'); } // Validate PDF magic bytes $header = file_get_contents($file['tmp_name'], false, null, 0, 5); if ($header !== '%PDF-') { error('File is not a valid PDF (invalid file header)'); } // Generate cryptographically secure job ID $job_id = 'pdf_' . bin2hex(random_bytes(16)); $filename = $job_id . '.pdf'; $filepath = UPLOAD_DIR . '/' . $filename; // Move file if (!move_uploaded_file($file['tmp_name'], $filepath)) { error('Failed to save file'); } // Attach authenticated user to this job $user = extractUserFromToken(); // Create job metadata $job_data = [ 'job_id' => $job_id, 'original_filename' => $file['name'], 'uploaded_at' => date('Y-m-d H:i:s'), 'file_size' => $file['size'], 'status' => 'uploaded', 'filepath' => $filepath, 'user_id' => $user['oid'] ?? null, 'user_name' => $user['name'] ?? null, 'user_email'=> $user['email'] ?? null, ]; file_put_contents( RESULTS_DIR . '/' . $job_id . '.meta.json', json_encode($job_data, JSON_PRETTY_PRINT) ); success([ 'job_id' => $job_id, 'filename' => $file['name'], 'message' => 'File uploaded successfully' ]); } /** * Handle PDF accessibility check — send PDF to Cloud Run synchronously */ function handleCheck() { set_time_limit(900); // Allow up to 15 minutes $job_id = $_POST['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); // Rate limit: 30 checks/hour per IP if (!checkRateLimit('check', 30, 3600)) { http_response_code(429); echo json_encode(['success' => false, 'error' => 'Rate limit exceeded. Try again later.']); exit; } $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); $quick_mode = $_POST['quick_mode'] ?? false; // Update meta to processing $job_data['status'] = 'processing'; $job_data['started_at'] = date('Y-m-d H:i:s'); file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); // If Cloud Run URL is configured, send to Cloud Run if (!empty(CLOUD_RUN_URL)) { try { $token = getCloudRunToken(); $pdf_path = $job_data['filepath']; if (!file_exists($pdf_path)) { error('PDF file not found on server'); } // Build multipart POST to Cloud Run $ch = curl_init(CLOUD_RUN_URL . '/check'); $postFields = [ 'pdf' => new CURLFile($pdf_path, 'application/pdf', basename($pdf_path)), 'job_id' => $job_id, 'quick_mode' => $quick_mode ? 'true' : 'false', 'original_filename' => $job_data['original_filename'] ?? '', ]; curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $postFields, CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => CLOUD_RUN_TIMEOUT, CURLOPT_HTTPHEADER => [ 'Authorization: Bearer ' . $token, ], ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $curlError = curl_error($ch); curl_close($ch); if ($curlError) { throw new Exception("Cloud Run request failed: $curlError"); } if ($httpCode !== 200) { $errorBody = json_decode($response, true); $errorMsg = $errorBody['error'] ?? "HTTP $httpCode"; throw new Exception("Cloud Run returned error: $errorMsg"); } $result = json_decode($response, true); if (!$result || !isset($result['success'])) { throw new Exception("Invalid response from Cloud Run"); } if (!$result['success']) { throw new Exception($result['error'] ?? 'Unknown Cloud Run error'); } $checkResult = $result['data']; // Write result JSON to disk $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; file_put_contents($result_file, json_encode($checkResult, JSON_PRETTY_PRINT)); // Update meta $job_data['status'] = 'completed'; $job_data['completed_at'] = date('Y-m-d H:i:s'); file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); // Update PostgreSQL updateJobInDatabase($job_id, $job_data['original_filename'] ?? '', 'completed', $checkResult); success([ 'job_id' => $job_id, 'status' => 'completed', 'message' => 'Check completed' ]); } catch (Exception $e) { // Mark as failed $job_data['status'] = 'failed'; $job_data['error'] = $e->getMessage(); file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); updateJobInDatabase($job_id, $job_data['original_filename'] ?? '', 'failed'); error('Processing failed: ' . $e->getMessage()); } } else { // Fallback to local exec (development without Cloud Run) $pdf_path = $job_data['filepath']; $output_path = RESULTS_DIR . '/' . $job_id . '.result.json'; $venv_python = __DIR__ . '/venv/bin/python3'; $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; $cmd = escapeshellcmd($python_bin . ' ' . PYTHON_SCRIPT) . ' ' . escapeshellarg($pdf_path) . ' ' . '--output ' . escapeshellarg($output_path); if ($quick_mode) { $cmd .= ' --quick'; } $anthropic_key = $_POST['anthropic_key'] ?? getenv('ANTHROPIC_API_KEY'); $google_key = $_POST['google_key'] ?? $_POST['google_credentials'] ?? getenv('GOOGLE_API_KEY'); if ($anthropic_key) { $cmd .= ' --anthropic-key ' . escapeshellarg($anthropic_key); } if ($google_key) { if (file_exists($google_key)) { $cmd .= ' --google-credentials ' . escapeshellarg($google_key); } else { $cmd .= ' --google-key ' . escapeshellarg($google_key); } } $env_path = getenv('PATH'); putenv("PATH=/opt/homebrew/bin:/usr/local/bin:{$env_path}"); $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; $cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1 &'; exec($cmd, $output, $return_code); success([ 'job_id' => $job_id, 'status' => 'processing', 'message' => 'Check started (local mode)' ]); } } /** * Check job status — pure file-based */ function handleStatus() { $job_id = $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); // Check if result file exists (definitive completion signal) if (file_exists($result_file)) { $job_data['status'] = 'completed'; $job_data['completed_at'] = $job_data['completed_at'] ?? date('Y-m-d H:i:s', filemtime($result_file)); } else if (file_exists($error_log) && in_array($job_data['status'], ['processing', 'queued'])) { $error_content = file_get_contents($error_log); if (!empty($error_content)) { $started = strtotime($job_data['started_at'] ?? 'now'); if (time() - $started > 900) { $job_data['status'] = 'failed'; $job_data['error'] = 'Process timeout or error'; $job_data['error_log'] = substr($error_content, -1000); } } } $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; $job_data['dismissed_indices'] = file_exists($dismiss_file) ? array_map('intval', array_keys(json_decode(file_get_contents($dismiss_file), true) ?: [])) : []; success($job_data); } /** * Get check results */ function handleResult() { $job_id = $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; if (!file_exists($result_file)) { error('Results not found. Check may still be processing.'); } $result = json_decode(file_get_contents($result_file), true); // If an adjusted result exists, overlay only the score/wcag fields so the // frontend can display the adjusted score on reload while keeping the original // severity_counts and score_breakdown as the recalculation baseline. $adjusted_file = RESULTS_DIR . '/' . $job_id . '.adjusted.json'; if (file_exists($adjusted_file)) { $adjusted = json_decode(file_get_contents($adjusted_file), true); $result['accessibility_score'] = $adjusted['accessibility_score'] ?? $result['accessibility_score']; $result['grade'] = $adjusted['grade'] ?? $result['grade']; $result['wcag_compliance'] = $adjusted['wcag_compliance'] ?? $result['wcag_compliance']; $result['score_breakdown']['adjusted'] = true; } // Inject dismissed indices so frontend can restore dismiss state on reload $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; $result['dismissed_indices'] = file_exists($dismiss_file) ? array_map('intval', array_keys(json_decode(file_get_contents($dismiss_file), true) ?: [])) : []; // Inject overridden check names so frontend can restore override state on reload $override_file = RESULTS_DIR . '/' . $job_id . '.overrides.json'; $result['overridden_checks'] = file_exists($override_file) ? array_keys(json_decode(file_get_contents($override_file), true) ?: []) : []; success($result); } /** * List all jobs */ function handleList() { $user = extractUserFromToken(); $current_user_id = $user['oid'] ?? null; $jobs = []; $files = glob(RESULTS_DIR . '/*.meta.json'); foreach ($files as $file) { $job_data = json_decode(file_get_contents($file), true); // User isolation: // - Authenticated user: show their own jobs + legacy jobs (no user_id) // - Unauthenticated (dev mode): show only legacy jobs (no user_id) $job_user_id = $job_data['user_id'] ?? null; if ($current_user_id !== null) { // Skip jobs that belong to a DIFFERENT authenticated user if ($job_user_id !== null && $job_user_id !== $current_user_id) continue; } else { // Unauthenticated — skip user-owned jobs if ($job_user_id !== null) continue; } // Enrich with result summary — prefer adjusted result if available $result_file = str_replace('.meta.json', '.result.json', $file); $adjusted_file = str_replace('.meta.json', '.adjusted.json', $file); $source_file = file_exists($adjusted_file) ? $adjusted_file : $result_file; if (file_exists($source_file)) { $job_data['status'] = 'completed'; $result = json_decode(file_get_contents($source_file), true); $job_data['score'] = $result['accessibility_score'] ?? ($result['score'] ?? null); $job_data['grade'] = $result['grade'] ?? null; $job_data['total_issues'] = $result['total_issues'] ?? null; $job_data['critical_count'] = $result['severity_counts']['critical'] ?? 0; $job_data['error_count'] = $result['severity_counts']['error'] ?? 0; $job_data['score_adjusted'] = file_exists($adjusted_file); } $jobs[] = $job_data; } // Sort by upload time (newest first) usort($jobs, function($a, $b) { return strtotime($b['uploaded_at']) - strtotime($a['uploaded_at']); }); success(['jobs' => $jobs]); } /** * Delete a job */ function handleDelete() { $job_id = $_POST['job_id'] ?? $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); // Delete all files associated with this job @unlink($job_data['filepath'] ?? ''); @unlink($meta_file); @unlink(RESULTS_DIR . '/' . $job_id . '.result.json'); @unlink(RESULTS_DIR . '/' . $job_id . '.dismissed.json'); @unlink(RESULTS_DIR . '/' . $job_id . '.overrides.json'); @unlink(RESULTS_DIR . '/' . $job_id . '.error.log'); success(['message' => 'Job deleted']); } /** * Debug endpoint */ function handleDebug() { // Debug endpoint only available in development mode require_once __DIR__ . '/auth.php'; if (!isDevelopmentMode()) { error('Debug endpoint disabled in production'); } $job_id = $_GET['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; $debug_info = [ 'job_id' => $job_id, 'meta_exists' => file_exists($meta_file), 'result_exists' => file_exists($result_file), 'error_log_exists' => file_exists($error_log), 'cloud_run_url' => CLOUD_RUN_URL ?: '(not configured — local mode)', 'files' => [] ]; if (file_exists($meta_file)) { $debug_info['meta'] = json_decode(file_get_contents($meta_file), true); } if (file_exists($error_log)) { $debug_info['error_log'] = file_get_contents($error_log); } if (file_exists($result_file)) { $debug_info['result_size'] = filesize($result_file); } // Test Python $venv_python = __DIR__ . '/venv/bin/python3'; exec($venv_python . ' --version 2>&1', $python_version); $debug_info['python_version'] = implode("\n", $python_version); success($debug_info); } /** * Serve page images — redirect to GCS URL or serve local file */ function handleImage() { $job_id = $_GET['job_id'] ?? ''; $page_num = $_GET['page'] ?? ''; if (empty($job_id) || empty($page_num)) { error('Job ID and page number required'); } $job_id = sanitizeJobId($job_id); $page_num = intval($page_num); // Check result JSON for GCS URLs $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; if (file_exists($result_file)) { $result = json_decode(file_get_contents($result_file), true); $page_images = $result['page_images'] ?? []; // Check if the page image value is a URL (GCS) $image_value = $page_images[$page_num] ?? $page_images[strval($page_num)] ?? null; if ($image_value && (strpos($image_value, 'http://') === 0 || strpos($image_value, 'https://') === 0)) { // Redirect to GCS URL header('HTTP/1.1 302 Found'); header('Location: ' . $image_value); header('Cache-Control: public, max-age=86400'); exit; } } // Fallback: serve local image file $images_dir = RESULTS_DIR . '/' . $job_id . '.result_images'; $image_file = $images_dir . '/page_' . $page_num . '.png'; if (!file_exists($image_file)) { http_response_code(404); header('Content-Type: application/json'); echo json_encode(['success' => false, 'error' => 'Image not found']); exit; } // Serve the image header('Content-Type: image/png'); header('Cache-Control: public, max-age=86400'); // Cache for 1 day readfile($image_file); exit; } /** * Auto-remediate PDF accessibility issues */ function handleRemediate() { $job_id = $_POST['job_id'] ?? ''; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; if (!file_exists($meta_file) || !file_exists($result_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); $result_data = json_decode(file_get_contents($result_file), true); // Check if there are fixable issues if (!isset($result_data['auto_fixable_count']) || $result_data['auto_fixable_count'] == 0) { error('No auto-fixable issues found'); } $original_pdf = $job_data['filepath']; $remediated_pdf = UPLOAD_DIR . '/' . $job_id . '_remediated.pdf'; // Use absolute venv path $venv_python = __DIR__ . '/venv/bin/python3'; $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; $remediation_script = __DIR__ . '/pdf_remediation.py'; // Build command - apply all safe fixes $cmd = escapeshellcmd($python_bin . ' ' . $remediation_script) . ' ' . escapeshellarg($original_pdf) . ' ' . '--output ' . escapeshellarg($remediated_pdf) . ' ' . '--all'; // Set PATH for poppler $env_path = getenv('PATH'); $poppler_paths = '/opt/homebrew/bin:/usr/local/bin'; putenv("PATH={$poppler_paths}:{$env_path}"); // Run remediation $error_log = RESULTS_DIR . '/' . $job_id . '.remediation.log'; $cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1'; exec($cmd, $output, $return_code); // Check if remediation succeeded if ($return_code !== 0 || !file_exists($remediated_pdf)) { $log_content = file_exists($error_log) ? file_get_contents($error_log) : 'Unknown error'; $truncated = strlen($log_content) > 2000 ? '...' . substr($log_content, -2000) : $log_content; error('Remediation failed: ' . $truncated); } // Store remediated file info $job_data['remediated_pdf'] = $remediated_pdf; $job_data['remediated_at'] = date('Y-m-d H:i:s'); file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); success([ 'job_id' => $job_id, 'remediated_pdf' => basename($remediated_pdf), 'original_filename' => $job_data['original_filename'], 'fixes_applied' => $result_data['auto_fixable_count'], 'download_url' => 'api.php?action=download&job_id=' . $job_id . '&type=remediated', 'message' => 'PDF remediated successfully' ]); } /** * Download original or remediated PDF */ function handleDownload() { $job_id = $_GET['job_id'] ?? ''; $type = $_GET['type'] ?? 'original'; // 'original' or 'remediated' if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $job_data = json_decode(file_get_contents($meta_file), true); if ($type === 'remediated') { if (!isset($job_data['remediated_pdf']) || !file_exists($job_data['remediated_pdf'])) { error('Remediated PDF not found'); } $file_path = $job_data['remediated_pdf']; $filename = pathinfo($job_data['original_filename'], PATHINFO_FILENAME) . '_fixed.pdf'; } else { $file_path = $job_data['filepath']; $filename = $job_data['original_filename']; } // Serve the file header('Content-Type: application/pdf'); header('Content-Disposition: attachment; filename="' . $filename . '"'); header('Content-Length: ' . filesize($file_path)); readfile($file_path); exit; } /** * Get aggregate job statistics */ function handleStats() { $stats = [ 'total_jobs' => 0, 'completed' => 0, 'failed' => 0, 'processing' => 0, ]; // Count jobs from meta files $files = glob(RESULTS_DIR . '/*.meta.json'); foreach ($files as $file) { $job = json_decode(file_get_contents($file), true); $stats['total_jobs']++; $result_file = str_replace('.meta.json', '.result.json', $file); if (file_exists($result_file)) { $stats['completed']++; } else if (($job['status'] ?? '') === 'failed') { $stats['failed']++; } else { $stats['processing']++; } } success($stats); } /** * Handle batch file upload — accepts multiple PDFs */ function handleBatchUpload() { if (!checkRateLimit('upload', 10, 3600)) { http_response_code(429); echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded.']); exit; } if (!isset($_FILES['pdfs']) || !is_array($_FILES['pdfs']['name'])) { error('No files uploaded. Use "pdfs[]" as the file field name.'); } $batch_id = 'batch_' . bin2hex(random_bytes(8)); $file_count = count($_FILES['pdfs']['name']); $uploaded = []; $errors = []; for ($i = 0; $i < $file_count; $i++) { $name = $_FILES['pdfs']['name'][$i]; $tmp = $_FILES['pdfs']['tmp_name'][$i]; $size = $_FILES['pdfs']['size'][$i]; $err = $_FILES['pdfs']['error'][$i]; if ($err !== UPLOAD_ERR_OK) { $errors[] = ['filename' => $name, 'error' => "Upload error code: $err"]; continue; } if ($size > MAX_FILE_SIZE) { $errors[] = ['filename' => $name, 'error' => 'File too large']; continue; } $ext = strtolower(pathinfo($name, PATHINFO_EXTENSION)); if (!in_array($ext, ALLOWED_EXTENSIONS)) { $errors[] = ['filename' => $name, 'error' => 'Not a PDF file']; continue; } $header = file_get_contents($tmp, false, null, 0, 5); if ($header !== '%PDF-') { $errors[] = ['filename' => $name, 'error' => 'Invalid PDF header']; continue; } $job_id = 'pdf_' . bin2hex(random_bytes(16)); $filename = $job_id . '.pdf'; $filepath = UPLOAD_DIR . '/' . $filename; if (!move_uploaded_file($tmp, $filepath)) { $errors[] = ['filename' => $name, 'error' => 'Failed to save']; continue; } $job_data = [ 'job_id' => $job_id, 'batch_id' => $batch_id, 'original_filename' => $name, 'uploaded_at' => date('Y-m-d H:i:s'), 'file_size' => $size, 'status' => 'uploaded', 'filepath' => $filepath ]; file_put_contents( RESULTS_DIR . '/' . $job_id . '.meta.json', json_encode($job_data, JSON_PRETTY_PRINT) ); $uploaded[] = ['job_id' => $job_id, 'filename' => $name]; } // Save batch manifest $batch_data = [ 'batch_id' => $batch_id, 'created_at' => date('Y-m-d H:i:s'), 'total_files' => $file_count, 'jobs' => array_column($uploaded, 'job_id'), ]; file_put_contents( RESULTS_DIR . '/' . $batch_id . '.batch.json', json_encode($batch_data, JSON_PRETTY_PRINT) ); success([ 'batch_id' => $batch_id, 'uploaded' => $uploaded, 'errors' => $errors, 'message' => count($uploaded) . ' of ' . $file_count . ' files uploaded' ]); } /** * Get status of a batch job */ function handleBatchStatus() { $batch_id = $_GET['batch_id'] ?? ''; if (empty($batch_id) || !preg_match('/^batch_[a-f0-9]+$/', $batch_id)) { error('Invalid batch ID'); } $batch_file = RESULTS_DIR . '/' . $batch_id . '.batch.json'; if (!file_exists($batch_file)) { error('Batch not found'); } $batch = json_decode(file_get_contents($batch_file), true); $jobs = []; $completed = 0; $failed = 0; foreach ($batch['jobs'] as $job_id) { $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; $status = 'unknown'; $score = null; $filename = ''; if (file_exists($meta_file)) { $meta = json_decode(file_get_contents($meta_file), true); $status = $meta['status'] ?? 'uploaded'; $filename = $meta['original_filename'] ?? ''; } if (file_exists($result_file)) { $status = 'completed'; $result = json_decode(file_get_contents($result_file), true); $score = $result['accessibility_score'] ?? null; $completed++; } else if ($status === 'failed') { $failed++; } $jobs[] = [ 'job_id' => $job_id, 'filename' => $filename, 'status' => $status, 'score' => $score ]; } $total = count($batch['jobs']); $overall_status = ($completed === $total) ? 'completed' : (($completed + $failed === $total) ? 'finished' : 'processing'); success([ 'batch_id' => $batch_id, 'status' => $overall_status, 'total' => $total, 'completed' => $completed, 'failed' => $failed, 'jobs' => $jobs ]); } /** * Export results as HTML or JSON */ function handleExport() { $job_id = $_GET['job_id'] ?? ''; $format = $_GET['format'] ?? 'json'; if (empty($job_id)) { error('Job ID required'); } $job_id = sanitizeJobId($job_id); // Prefer adjusted result if available (created by save_adjusted_result) $adj_file = RESULTS_DIR . '/' . $job_id . '.adjusted.json'; $result_file = file_exists($adj_file) ? $adj_file : RESULTS_DIR . '/' . $job_id . '.result.json'; if (!file_exists($result_file)) { error('Results not found'); } $result = json_decode(file_get_contents($result_file), true); if ($format === 'html') { // Generate HTML report via Python $venv_python = __DIR__ . '/venv/bin/python3'; $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; $report_script = __DIR__ . '/report_generator.py'; $html_file = RESULTS_DIR . '/' . $job_id . '.report.html'; $cmd = escapeshellcmd($python_bin . ' ' . $report_script) . ' --input ' . escapeshellarg($result_file) . ' --output ' . escapeshellarg($html_file); exec($cmd . ' 2>&1', $output, $return_code); if ($return_code !== 0 || !file_exists($html_file)) { error('Report generation failed'); } header('Content-Type: text/html; charset=utf-8'); header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.html"'); readfile($html_file); exit; } if ($format === 'pdf') { // Generate PDF report via Python WeasyPrint $venv_python = __DIR__ . '/venv/bin/python3'; $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; $report_script = __DIR__ . '/report_generator.py'; $pdf_file = RESULTS_DIR . '/' . $job_id . '.report.pdf'; $cmd = escapeshellcmd($python_bin . ' ' . $report_script) . ' --input ' . escapeshellarg($result_file) . ' --output ' . escapeshellarg($pdf_file) . ' --format pdf'; exec($cmd . ' 2>&1', $output, $return_code); if ($return_code !== 0 || !file_exists($pdf_file)) { error('PDF report generation failed: ' . implode("\n", $output)); } header('Content-Type: application/pdf'); header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.pdf"'); header('Content-Length: ' . filesize($pdf_file)); readfile($pdf_file); exit; } // Default: JSON download header('Content-Type: application/json'); header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.json"'); echo json_encode($result, JSON_PRETTY_PRINT); exit; } /** * Save an adjusted result merging dismissed issues and check overrides into a new JSON file. * The export endpoint will prefer this file over the original result. */ function handleSaveAdjustedResult() { $data = json_decode(file_get_contents('php://input'), true) ?: []; $job_id = $data['job_id'] ?? ''; if (empty($job_id)) { error('job_id required'); } $job_id = sanitizeJobId($job_id); $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; if (!file_exists($result_file)) { error('Results not found'); } $result = json_decode(file_get_contents($result_file), true); // Load dismissed and overrides $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; $override_file = RESULTS_DIR . '/' . $job_id . '.overrides.json'; $dismissed = file_exists($dismiss_file) ? json_decode(file_get_contents($dismiss_file), true) : []; $overrides = file_exists($override_file) ? json_decode(file_get_contents($override_file), true) : []; // 1. Mark dismissed issues in the issues array if (!empty($dismissed) && isset($result['issues'])) { foreach ($result['issues'] as $idx => &$issue) { if (isset($dismissed[$idx])) { $issue['dismissed'] = true; } } unset($issue); } // 2. Recalculate score (mirrors JS recalculateScore()) $bd = $result['score_breakdown'] ?? []; $origSC = $result['severity_counts'] ?? []; $adj_crit = (int)($origSC['critical'] ?? 0); $adj_err = (int)($origSC['error'] ?? 0); // Subtract dismissed CRITICAL / ERROR issues foreach ($dismissed as $idx => $info) { $sev = strtoupper($result['issues'][$idx]['severity'] ?? ''); if ($sev === 'CRITICAL') $adj_crit = max(0, $adj_crit - 1); if ($sev === 'ERROR') $adj_err = max(0, $adj_err - 1); } $new_penalty = min(20, $adj_crit * 5 + $adj_err * 2); $checks_total = (int)($bd['checks_total'] ?? 0); $checks_passed = (int)($bd['checks_passed'] ?? 0); $new_passed = min($checks_total, $checks_passed + count($overrides)); $new_base = $checks_total > 0 ? (int)round(100 * $new_passed / $checks_total) : 0; $new_score = max(0, $new_base - $new_penalty); $result['accessibility_score'] = $new_score; $result['severity_counts']['critical'] = $adj_crit; $result['severity_counts']['error'] = $adj_err; $result['score_breakdown']['final_score'] = $new_score; $result['score_breakdown']['checks_passed'] = $new_passed; $result['score_breakdown']['base_score'] = $new_base; $result['score_breakdown']['penalty'] = $new_penalty; $result['score_breakdown']['adjusted'] = true; // 3. Recompute WCAG compliance badges based on non-dismissed CRITICAL/ERROR issues $wcag_levels = [ '1.1.1'=>'A','1.2.1'=>'A','1.2.2'=>'A','1.2.3'=>'A', '1.2.4'=>'AA','1.2.5'=>'AA', '1.3.1'=>'A','1.3.2'=>'A','1.3.3'=>'A', '1.3.4'=>'AA','1.3.5'=>'AA', '1.4.1'=>'A','1.4.2'=>'A', '1.4.3'=>'AA','1.4.4'=>'AA','1.4.5'=>'AA', '1.4.10'=>'AA','1.4.11'=>'AA','1.4.12'=>'AA','1.4.13'=>'AA', '2.1.1'=>'A','2.1.2'=>'A','2.1.4'=>'A', '2.2.1'=>'A','2.2.2'=>'A', '2.3.1'=>'A', '2.4.1'=>'A','2.4.2'=>'A','2.4.3'=>'A','2.4.4'=>'A', '2.4.5'=>'AA','2.4.6'=>'AA','2.4.7'=>'AA', '2.5.1'=>'A','2.5.2'=>'A','2.5.3'=>'A','2.5.4'=>'A', '3.1.1'=>'A','3.1.2'=>'AA', '3.2.1'=>'A','3.2.2'=>'A','3.2.3'=>'AA','3.2.4'=>'AA', '3.3.1'=>'A','3.3.2'=>'A','3.3.3'=>'AA','3.3.4'=>'AA', '4.1.1'=>'A','4.1.2'=>'A','4.1.3'=>'AA', ]; $failing_a = []; $failing_aa = []; if (isset($result['issues'])) { foreach ($result['issues'] as $issue) { if (!empty($issue['dismissed'])) continue; $sev = strtoupper($issue['severity'] ?? ''); if ($sev !== 'CRITICAL' && $sev !== 'ERROR') continue; $crit = $issue['wcag_criterion'] ?? ''; if (!$crit || !isset($wcag_levels[$crit])) continue; $lvl = $wcag_levels[$crit]; if ($lvl === 'A' && !in_array($crit, $failing_a)) $failing_a[] = $crit; if ($lvl === 'AA' && !in_array($crit, $failing_aa)) $failing_aa[] = $crit; } } $result['wcag_compliance']['level_a'] = empty($failing_a); $result['wcag_compliance']['level_aa'] = empty($failing_a) && empty($failing_aa); $result['wcag_compliance']['level_a_failures'] = $failing_a; $result['wcag_compliance']['level_aa_failures'] = $failing_aa; // 4. Mark overridden checks in checks_performed if (!empty($overrides) && isset($result['checks_performed'])) { foreach ($result['checks_performed'] as &$check) { if (isset($overrides[$check['name']])) { $check['passed'] = true; $check['manual'] = true; } } unset($check); } // 5. Update Matterhorn checkpoints for H-type CPs linked to overridden checks $check_to_cp = [ 'Color Contrast' => ['04'], 'Image Accessibility' => ['13'], 'Heading Structure' => ['14'], ]; $cp_to_check = []; foreach ($check_to_cp as $checkName => $cpIds) { foreach ($cpIds as $cpId) { $cp_to_check[$cpId] = $checkName; } } if (!empty($overrides) && isset($result['matterhorn_summary']['checkpoints'])) { foreach ($result['matterhorn_summary']['checkpoints'] as &$cp) { $cpId = $cp['id']; if (isset($cp_to_check[$cpId]) && isset($overrides[$cp_to_check[$cpId]])) { $cp['status'] = 'PASS'; $cp['manual'] = true; } } unset($cp); // Recompute overall_passed $all_pass = true; foreach ($result['matterhorn_summary']['checkpoints'] as $cp) { if ($cp['status'] === 'FAIL') { $all_pass = false; break; } } $result['matterhorn_summary']['overall_passed'] = $all_pass; } $adj_file = RESULTS_DIR . '/' . $job_id . '.adjusted.json'; file_put_contents($adj_file, json_encode($result)); success(['saved' => true, 'score' => $new_score]); } /** * Dismiss an issue (mark as false positive) */ function handleDismiss() { $data = json_decode(file_get_contents('php://input'), true) ?: []; $job_id = $data['job_id'] ?? ''; $issue_index = isset($data['issue_index']) ? (int)$data['issue_index'] : -1; $reason = substr($data['reason'] ?? '', 0, 255); if (empty($job_id) || $issue_index < 0) { error('job_id and issue_index required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; $dismissed = file_exists($dismiss_file) ? json_decode(file_get_contents($dismiss_file), true) : []; $dismissed[$issue_index] = ['reason' => $reason, 'dismissed_at' => date('Y-m-d H:i:s')]; file_put_contents($dismiss_file, json_encode($dismissed)); success(['dismissed' => true, 'issue_index' => $issue_index]); } /** * Undismiss an issue */ function handleUndismiss() { $data = json_decode(file_get_contents('php://input'), true) ?: []; $job_id = $data['job_id'] ?? ''; $issue_index = isset($data['issue_index']) ? (int)$data['issue_index'] : -1; if (empty($job_id) || $issue_index < 0) { error('job_id and issue_index required'); } $job_id = sanitizeJobId($job_id); $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; if (file_exists($dismiss_file)) { $dismissed = json_decode(file_get_contents($dismiss_file), true); unset($dismissed[$issue_index]); file_put_contents($dismiss_file, json_encode($dismissed)); } success(['undismissed' => true, 'issue_index' => $issue_index]); } /** * Override a check (mark as manually passed) */ function handleOverrideCheck() { $data = json_decode(file_get_contents('php://input'), true) ?: []; $job_id = $data['job_id'] ?? ''; $check_name = strip_tags(substr($data['check_name'] ?? '', 0, 200)); if (empty($job_id) || empty($check_name)) { error('job_id and check_name required'); } $job_id = sanitizeJobId($job_id); $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; if (!file_exists($meta_file)) { error('Job not found'); } $override_file = RESULTS_DIR . '/' . $job_id . '.overrides.json'; $overrides = file_exists($override_file) ? json_decode(file_get_contents($override_file), true) : []; $overrides[$check_name] = ['overridden_at' => date('Y-m-d H:i:s')]; file_put_contents($override_file, json_encode($overrides)); success(['overridden' => true, 'check_name' => $check_name]); } /** * Remove a check override */ function handleUnoverrideCheck() { $data = json_decode(file_get_contents('php://input'), true) ?: []; $job_id = $data['job_id'] ?? ''; $check_name = strip_tags(substr($data['check_name'] ?? '', 0, 200)); if (empty($job_id) || empty($check_name)) { error('job_id and check_name required'); } $job_id = sanitizeJobId($job_id); $override_file = RESULTS_DIR . '/' . $job_id . '.overrides.json'; if (file_exists($override_file)) { $overrides = json_decode(file_get_contents($override_file), true); unset($overrides[$check_name]); file_put_contents($override_file, json_encode($overrides)); } success(['unoverridden' => true, 'check_name' => $check_name]); } /** * Send success response */ function success($data) { echo json_encode([ 'success' => true, 'data' => $data ]); exit; } /** * Send error response */ function error($message) { http_response_code(400); echo json_encode([ 'success' => false, 'error' => $message ]); exit; }