pdf-accessibility/api.php
Vadym Samoilenko c4ffb94351 Merge Cloud Run migration; resolve handleResult() conflict
Keep dismissed_indices injection in handleResult() from our QA
fixes alongside the Cloud Run rewrite from origin/master.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 18:08:04 +00:00

1240 lines
39 KiB
PHP

<?php
/**
* Enterprise PDF Accessibility Checker - API Backend
*
* Handles file uploads, sends PDFs to Cloud Run for processing,
* and serves results. No Redis dependency — uses Cloud Run for
* processing and file-based rate limiting.
*/
// Load .env file if getenv doesn't work (Apache doesn't set env vars by default)
$envFile = __DIR__ . '/.env';
if (file_exists($envFile)) {
$lines = file($envFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
foreach ($lines as $line) {
$line = trim($line);
if ($line === '' || $line[0] === '#') continue;
if (strpos($line, '=') === false) continue;
list($key, $val) = explode('=', $line, 2);
$key = trim($key);
$val = trim($val);
if (!getenv($key)) {
putenv("$key=$val");
}
}
}
// Configuration
define('UPLOAD_DIR', __DIR__ . '/uploads');
define('RESULTS_DIR', __DIR__ . '/results');
define('PYTHON_SCRIPT', __DIR__ . '/enterprise_pdf_checker.py');
define('MAX_FILE_SIZE', 50 * 1024 * 1024); // 50MB
define('ALLOWED_EXTENSIONS', ['pdf']);
// Cloud Run configuration
define('CLOUD_RUN_URL', getenv('CLOUD_RUN_URL') ?: '');
define('CLOUD_RUN_TIMEOUT', 900); // 15 minutes
define('GCP_SA_KEY_PATH', getenv('GCP_SA_KEY_PATH') ?: __DIR__ . '/pdf-api-invoker-key.json');
define('RATE_LIMIT_DIR', __DIR__ . '/rate_limits');
// Database configuration
define('DB_HOST', getenv('DB_HOST') ?: 'localhost');
define('DB_PORT', intval(getenv('DB_PORT') ?: 5432));
define('DB_NAME', getenv('DB_NAME') ?: 'pdf_checker');
define('DB_USER', getenv('DB_USER') ?: 'pdf_checker');
define('DB_PASSWORD', getenv('DB_PASSWORD') ?: 'dev_password');
// Create directories if they don't exist
if (!is_dir(UPLOAD_DIR)) mkdir(UPLOAD_DIR, 0755, true);
if (!is_dir(RESULTS_DIR)) mkdir(RESULTS_DIR, 0755, true);
if (!is_dir(RATE_LIMIT_DIR)) mkdir(RATE_LIMIT_DIR, 0755, true);
/**
* Check rate limit via filesystem. Returns true if allowed.
* Stores timestamps in JSON files per IP+action.
*/
function checkRateLimit($action, $limit, $window) {
$ip = $_SERVER['REMOTE_ADDR'] ?? 'unknown';
$key = preg_replace('/[^a-zA-Z0-9_-]/', '_', $ip . '_' . $action);
$file = RATE_LIMIT_DIR . '/' . $key . '.json';
$now = time();
$timestamps = [];
if (file_exists($file)) {
$data = json_decode(file_get_contents($file), true);
if (is_array($data)) {
// Filter to only timestamps within the window
$timestamps = array_filter($data, function($ts) use ($now, $window) {
return ($now - $ts) < $window;
});
}
}
if (count($timestamps) >= $limit) {
return false;
}
$timestamps[] = $now;
file_put_contents($file, json_encode(array_values($timestamps)));
return true;
}
/**
* Sanitize job ID to prevent path traversal attacks
*/
function sanitizeJobId($job_id) {
if (!preg_match('/^pdf_[a-f0-9]+$/', $job_id)) {
error('Invalid job ID format');
}
return $job_id;
}
/**
* Get an OIDC identity token for authenticating to Cloud Run.
* Uses a GCP service account key to create a self-signed JWT,
* then exchanges it for an identity token via Google's OAuth endpoint.
*/
function getCloudRunToken() {
static $cachedToken = null;
static $cachedExpiry = 0;
// Return cached token if still valid (with 5-min buffer)
if ($cachedToken && time() < ($cachedExpiry - 300)) {
return $cachedToken;
}
$keyPath = GCP_SA_KEY_PATH;
if (!file_exists($keyPath)) {
throw new Exception("GCP service account key not found: $keyPath");
}
$sa = json_decode(file_get_contents($keyPath), true);
if (!$sa || !isset($sa['client_email']) || !isset($sa['private_key'])) {
throw new Exception("Invalid service account key file");
}
$now = time();
$expiry = $now + 3600;
// Build JWT header and claims
$header = base64url_encode(json_encode(['alg' => 'RS256', 'typ' => 'JWT']));
$claims = base64url_encode(json_encode([
'iss' => $sa['client_email'],
'sub' => $sa['client_email'],
'aud' => 'https://oauth2.googleapis.com/token',
'iat' => $now,
'exp' => $expiry,
'target_audience' => CLOUD_RUN_URL,
]));
// Sign with RSA-SHA256
$signingInput = "$header.$claims";
$signature = '';
$privateKey = openssl_pkey_get_private($sa['private_key']);
if (!$privateKey) {
throw new Exception("Failed to parse service account private key");
}
openssl_sign($signingInput, $signature, $privateKey, OPENSSL_ALGO_SHA256);
$jwt = $signingInput . '.' . base64url_encode($signature);
// Exchange JWT for identity token
$ch = curl_init('https://oauth2.googleapis.com/token');
curl_setopt_array($ch, [
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => http_build_query([
'grant_type' => 'urn:ietf:params:oauth:grant-type:jwt-bearer',
'assertion' => $jwt,
]),
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => 10,
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200) {
throw new Exception("Failed to get identity token: HTTP $httpCode - $response");
}
$tokenData = json_decode($response, true);
if (!isset($tokenData['id_token'])) {
throw new Exception("No id_token in response: $response");
}
$cachedToken = $tokenData['id_token'];
$cachedExpiry = $expiry;
return $cachedToken;
}
/**
* Base64url encode (no padding, URL-safe)
*/
function base64url_encode($data) {
return rtrim(strtr(base64_encode($data), '+/', '-_'), '=');
}
/**
* Get PostgreSQL PDO connection (lazy singleton)
*/
function getDB() {
static $pdo = null;
if ($pdo === null) {
$dsn = sprintf('pgsql:host=%s;port=%d;dbname=%s', DB_HOST, DB_PORT, DB_NAME);
$pdo = new PDO($dsn, DB_USER, DB_PASSWORD, [
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
]);
}
return $pdo;
}
/**
* Insert or update a job record in PostgreSQL
*/
function updateJobInDatabase($job_id, $filename, $status, $results = null) {
try {
$pdo = getDB();
$score = null;
$grade = null;
$total_issues = null;
$critical_count = null;
$error_count = null;
$warning_count = null;
$result_json = null;
$processing_time = null;
if ($results) {
$score = $results['accessibility_score'] ?? null;
$grade = $results['grade'] ?? null;
$issues = $results['issues'] ?? [];
$total_issues = count($issues);
$critical_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'CRITICAL'));
$error_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'ERROR'));
$warning_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'WARNING'));
$result_json = json_encode($results);
$processing_time = $results['stats']['processing_time'] ?? null;
}
$sql = "INSERT INTO jobs (job_id, filename, status, score, grade, total_issues,
critical_count, error_count, warning_count, result_json, processing_time,
completed_at)
VALUES (:job_id, :filename, :status, :score, :grade, :total_issues,
:critical_count, :error_count, :warning_count, :result_json::jsonb, :processing_time,
CASE WHEN :status2 = 'completed' THEN NOW() ELSE NULL END)
ON CONFLICT (job_id) DO UPDATE SET
status = EXCLUDED.status,
score = COALESCE(EXCLUDED.score, jobs.score),
grade = COALESCE(EXCLUDED.grade, jobs.grade),
total_issues = COALESCE(EXCLUDED.total_issues, jobs.total_issues),
critical_count = COALESCE(EXCLUDED.critical_count, jobs.critical_count),
error_count = COALESCE(EXCLUDED.error_count, jobs.error_count),
warning_count = COALESCE(EXCLUDED.warning_count, jobs.warning_count),
result_json = COALESCE(EXCLUDED.result_json, jobs.result_json),
processing_time = COALESCE(EXCLUDED.processing_time, jobs.processing_time),
completed_at = CASE WHEN EXCLUDED.status = 'completed' THEN NOW() ELSE jobs.completed_at END";
$stmt = $pdo->prepare($sql);
$stmt->execute([
':job_id' => $job_id,
':filename' => $filename,
':status' => $status,
':score' => $score,
':grade' => $grade,
':total_issues' => $total_issues,
':critical_count' => $critical_count,
':error_count' => $error_count,
':warning_count' => $warning_count,
':result_json' => $result_json,
':processing_time' => $processing_time,
':status2' => $status,
]);
} catch (Exception $e) {
error_log("DB update failed for $job_id: " . $e->getMessage());
}
}
// CORS headers for API
$allowed_origins = [
'https://ai-sandbox.oliver.solutions',
'http://localhost:8888',
'http://127.0.0.1:8888',
'http://localhost:8000',
'http://127.0.0.1:8000',
];
$origin = $_SERVER['HTTP_ORIGIN'] ?? '';
if (in_array($origin, $allowed_origins) || (function_exists('isDevelopmentMode') && isDevelopmentMode())) {
header('Access-Control-Allow-Origin: ' . ($origin ?: '*'));
} else if ($origin) {
header('Access-Control-Allow-Origin: null');
} else {
header('Access-Control-Allow-Origin: ' . ($allowed_origins[0]));
}
header('Access-Control-Allow-Methods: POST, GET, OPTIONS, DELETE');
header('Access-Control-Allow-Headers: Content-Type, X-API-Key, Authorization');
header('Content-Type: application/json');
// Handle preflight
if ($_SERVER['REQUEST_METHOD'] === 'OPTIONS') {
exit(0);
}
// Require authentication for all API requests
require_once __DIR__ . '/auth.php';
requireAuth();
// Get action
$action = $_GET['action'] ?? $_POST['action'] ?? '';
switch ($action) {
case 'upload':
handleUpload();
break;
case 'check':
handleCheck();
break;
case 'status':
handleStatus();
break;
case 'result':
handleResult();
break;
case 'list':
handleList();
break;
case 'delete':
handleDelete();
break;
case 'debug':
handleDebug();
break;
case 'image':
handleImage();
break;
case 'remediate':
handleRemediate();
break;
case 'download':
handleDownload();
break;
case 'stats':
handleStats();
break;
case 'batch_upload':
handleBatchUpload();
break;
case 'batch_status':
handleBatchStatus();
break;
case 'export':
handleExport();
break;
case 'dismiss':
handleDismiss();
break;
case 'undismiss':
handleUndismiss();
break;
default:
error('Invalid action');
}
/**
* Handle file upload
*/
function handleUpload() {
// Rate limit: 10 uploads/hour per IP
if (!checkRateLimit('upload', 10, 3600)) {
http_response_code(429);
echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded. Try again later.']);
exit;
}
if (!isset($_FILES['pdf'])) {
error('No file uploaded');
}
$file = $_FILES['pdf'];
// Validate file
if ($file['error'] !== UPLOAD_ERR_OK) {
error('Upload error: ' . $file['error']);
}
if ($file['size'] > MAX_FILE_SIZE) {
error('File too large. Max size: ' . (MAX_FILE_SIZE / 1024 / 1024) . 'MB');
}
$ext = strtolower(pathinfo($file['name'], PATHINFO_EXTENSION));
if (!in_array($ext, ALLOWED_EXTENSIONS)) {
error('Invalid file type. Only PDF files allowed.');
}
// Validate PDF magic bytes
$header = file_get_contents($file['tmp_name'], false, null, 0, 5);
if ($header !== '%PDF-') {
error('File is not a valid PDF (invalid file header)');
}
// Generate cryptographically secure job ID
$job_id = 'pdf_' . bin2hex(random_bytes(16));
$filename = $job_id . '.pdf';
$filepath = UPLOAD_DIR . '/' . $filename;
// Move file
if (!move_uploaded_file($file['tmp_name'], $filepath)) {
error('Failed to save file');
}
// Create job metadata
$job_data = [
'job_id' => $job_id,
'original_filename' => $file['name'],
'uploaded_at' => date('Y-m-d H:i:s'),
'file_size' => $file['size'],
'status' => 'uploaded',
'filepath' => $filepath
];
file_put_contents(
RESULTS_DIR . '/' . $job_id . '.meta.json',
json_encode($job_data, JSON_PRETTY_PRINT)
);
success([
'job_id' => $job_id,
'filename' => $file['name'],
'message' => 'File uploaded successfully'
]);
}
/**
* Handle PDF accessibility check — send PDF to Cloud Run synchronously
*/
function handleCheck() {
set_time_limit(900); // Allow up to 15 minutes
$job_id = $_POST['job_id'] ?? '';
if (empty($job_id)) {
error('Job ID required');
}
$job_id = sanitizeJobId($job_id);
// Rate limit: 30 checks/hour per IP
if (!checkRateLimit('check', 30, 3600)) {
http_response_code(429);
echo json_encode(['success' => false, 'error' => 'Rate limit exceeded. Try again later.']);
exit;
}
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
if (!file_exists($meta_file)) {
error('Job not found');
}
$job_data = json_decode(file_get_contents($meta_file), true);
$quick_mode = $_POST['quick_mode'] ?? false;
// Update meta to processing
$job_data['status'] = 'processing';
$job_data['started_at'] = date('Y-m-d H:i:s');
file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT));
// If Cloud Run URL is configured, send to Cloud Run
if (!empty(CLOUD_RUN_URL)) {
try {
$token = getCloudRunToken();
$pdf_path = $job_data['filepath'];
if (!file_exists($pdf_path)) {
error('PDF file not found on server');
}
// Build multipart POST to Cloud Run
$ch = curl_init(CLOUD_RUN_URL . '/check');
$postFields = [
'pdf' => new CURLFile($pdf_path, 'application/pdf', basename($pdf_path)),
'job_id' => $job_id,
'quick_mode' => $quick_mode ? 'true' : 'false',
'original_filename' => $job_data['original_filename'] ?? '',
];
curl_setopt_array($ch, [
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => $postFields,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => CLOUD_RUN_TIMEOUT,
CURLOPT_HTTPHEADER => [
'Authorization: Bearer ' . $token,
],
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$curlError = curl_error($ch);
curl_close($ch);
if ($curlError) {
throw new Exception("Cloud Run request failed: $curlError");
}
if ($httpCode !== 200) {
$errorBody = json_decode($response, true);
$errorMsg = $errorBody['error'] ?? "HTTP $httpCode";
throw new Exception("Cloud Run returned error: $errorMsg");
}
$result = json_decode($response, true);
if (!$result || !isset($result['success'])) {
throw new Exception("Invalid response from Cloud Run");
}
if (!$result['success']) {
throw new Exception($result['error'] ?? 'Unknown Cloud Run error');
}
$checkResult = $result['data'];
// Write result JSON to disk
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
file_put_contents($result_file, json_encode($checkResult, JSON_PRETTY_PRINT));
// Update meta
$job_data['status'] = 'completed';
$job_data['completed_at'] = date('Y-m-d H:i:s');
file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT));
// Update PostgreSQL
updateJobInDatabase($job_id, $job_data['original_filename'] ?? '', 'completed', $checkResult);
success([
'job_id' => $job_id,
'status' => 'completed',
'message' => 'Check completed'
]);
} catch (Exception $e) {
// Mark as failed
$job_data['status'] = 'failed';
$job_data['error'] = $e->getMessage();
file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT));
updateJobInDatabase($job_id, $job_data['original_filename'] ?? '', 'failed');
error('Processing failed: ' . $e->getMessage());
}
} else {
// Fallback to local exec (development without Cloud Run)
$pdf_path = $job_data['filepath'];
$output_path = RESULTS_DIR . '/' . $job_id . '.result.json';
$venv_python = __DIR__ . '/venv/bin/python3';
$python_bin = file_exists($venv_python) ? $venv_python : 'python3';
$cmd = escapeshellcmd($python_bin . ' ' . PYTHON_SCRIPT) . ' ' .
escapeshellarg($pdf_path) . ' ' .
'--output ' . escapeshellarg($output_path);
if ($quick_mode) {
$cmd .= ' --quick';
}
$anthropic_key = $_POST['anthropic_key'] ?? getenv('ANTHROPIC_API_KEY');
$google_key = $_POST['google_key'] ?? $_POST['google_credentials'] ?? getenv('GOOGLE_API_KEY');
if ($anthropic_key) {
$cmd .= ' --anthropic-key ' . escapeshellarg($anthropic_key);
}
if ($google_key) {
if (file_exists($google_key)) {
$cmd .= ' --google-credentials ' . escapeshellarg($google_key);
} else {
$cmd .= ' --google-key ' . escapeshellarg($google_key);
}
}
$env_path = getenv('PATH');
putenv("PATH=/opt/homebrew/bin:/usr/local/bin:{$env_path}");
$error_log = RESULTS_DIR . '/' . $job_id . '.error.log';
$cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1 &';
exec($cmd, $output, $return_code);
success([
'job_id' => $job_id,
'status' => 'processing',
'message' => 'Check started (local mode)'
]);
}
}
/**
* Check job status — pure file-based
*/
function handleStatus() {
$job_id = $_GET['job_id'] ?? '';
if (empty($job_id)) {
error('Job ID required');
}
$job_id = sanitizeJobId($job_id);
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
$error_log = RESULTS_DIR . '/' . $job_id . '.error.log';
if (!file_exists($meta_file)) {
error('Job not found');
}
$job_data = json_decode(file_get_contents($meta_file), true);
// Check if result file exists (definitive completion signal)
if (file_exists($result_file)) {
$job_data['status'] = 'completed';
$job_data['completed_at'] = $job_data['completed_at'] ?? date('Y-m-d H:i:s', filemtime($result_file));
} else if (file_exists($error_log) && in_array($job_data['status'], ['processing', 'queued'])) {
$error_content = file_get_contents($error_log);
if (!empty($error_content)) {
$started = strtotime($job_data['started_at'] ?? 'now');
if (time() - $started > 900) {
$job_data['status'] = 'failed';
$job_data['error'] = 'Process timeout or error';
$job_data['error_log'] = substr($error_content, -1000);
}
}
}
$dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json';
$job_data['dismissed_indices'] = file_exists($dismiss_file)
? array_map('intval', array_keys(json_decode(file_get_contents($dismiss_file), true) ?: []))
: [];
success($job_data);
}
/**
* Get check results
*/
function handleResult() {
$job_id = $_GET['job_id'] ?? '';
if (empty($job_id)) {
error('Job ID required');
}
$job_id = sanitizeJobId($job_id);
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
if (!file_exists($result_file)) {
error('Results not found. Check may still be processing.');
}
$result = json_decode(file_get_contents($result_file), true);
// Inject dismissed indices so frontend can restore dismiss state on reload
$dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json';
$result['dismissed_indices'] = file_exists($dismiss_file)
? array_map('intval', array_keys(json_decode(file_get_contents($dismiss_file), true) ?: []))
: [];
success($result);
}
/**
* List all jobs
*/
function handleList() {
$jobs = [];
$files = glob(RESULTS_DIR . '/*.meta.json');
foreach ($files as $file) {
$job_data = json_decode(file_get_contents($file), true);
// Check if completed
$result_file = str_replace('.meta.json', '.result.json', $file);
if (file_exists($result_file)) {
$job_data['status'] = 'completed';
}
$jobs[] = $job_data;
}
// Sort by upload time (newest first)
usort($jobs, function($a, $b) {
return strtotime($b['uploaded_at']) - strtotime($a['uploaded_at']);
});
success(['jobs' => $jobs]);
}
/**
* Delete a job
*/
function handleDelete() {
$job_id = $_POST['job_id'] ?? $_GET['job_id'] ?? '';
if (empty($job_id)) {
error('Job ID required');
}
$job_id = sanitizeJobId($job_id);
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
if (!file_exists($meta_file)) {
error('Job not found');
}
$job_data = json_decode(file_get_contents($meta_file), true);
// Delete files
@unlink($job_data['filepath']);
@unlink($meta_file);
@unlink(RESULTS_DIR . '/' . $job_id . '.result.json');
success(['message' => 'Job deleted']);
}
/**
* Debug endpoint
*/
function handleDebug() {
// Debug endpoint only available in development mode
require_once __DIR__ . '/auth.php';
if (!isDevelopmentMode()) {
error('Debug endpoint disabled in production');
}
$job_id = $_GET['job_id'] ?? '';
if (empty($job_id)) {
error('Job ID required');
}
$job_id = sanitizeJobId($job_id);
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
$error_log = RESULTS_DIR . '/' . $job_id . '.error.log';
$debug_info = [
'job_id' => $job_id,
'meta_exists' => file_exists($meta_file),
'result_exists' => file_exists($result_file),
'error_log_exists' => file_exists($error_log),
'cloud_run_url' => CLOUD_RUN_URL ?: '(not configured — local mode)',
'files' => []
];
if (file_exists($meta_file)) {
$debug_info['meta'] = json_decode(file_get_contents($meta_file), true);
}
if (file_exists($error_log)) {
$debug_info['error_log'] = file_get_contents($error_log);
}
if (file_exists($result_file)) {
$debug_info['result_size'] = filesize($result_file);
}
// Test Python
$venv_python = __DIR__ . '/venv/bin/python3';
exec($venv_python . ' --version 2>&1', $python_version);
$debug_info['python_version'] = implode("\n", $python_version);
success($debug_info);
}
/**
* Serve page images — redirect to GCS URL or serve local file
*/
function handleImage() {
$job_id = $_GET['job_id'] ?? '';
$page_num = $_GET['page'] ?? '';
if (empty($job_id) || empty($page_num)) {
error('Job ID and page number required');
}
$job_id = sanitizeJobId($job_id);
$page_num = intval($page_num);
// Check result JSON for GCS URLs
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
if (file_exists($result_file)) {
$result = json_decode(file_get_contents($result_file), true);
$page_images = $result['page_images'] ?? [];
// Check if the page image value is a URL (GCS)
$image_value = $page_images[$page_num] ?? $page_images[strval($page_num)] ?? null;
if ($image_value && (strpos($image_value, 'http://') === 0 || strpos($image_value, 'https://') === 0)) {
// Redirect to GCS URL
header('HTTP/1.1 302 Found');
header('Location: ' . $image_value);
header('Cache-Control: public, max-age=86400');
exit;
}
}
// Fallback: serve local image file
$images_dir = RESULTS_DIR . '/' . $job_id . '.result_images';
$image_file = $images_dir . '/page_' . $page_num . '.png';
if (!file_exists($image_file)) {
http_response_code(404);
header('Content-Type: application/json');
echo json_encode(['success' => false, 'error' => 'Image not found']);
exit;
}
// Serve the image
header('Content-Type: image/png');
header('Cache-Control: public, max-age=86400'); // Cache for 1 day
readfile($image_file);
exit;
}
/**
* Auto-remediate PDF accessibility issues
*/
function handleRemediate() {
$job_id = $_POST['job_id'] ?? '';
if (empty($job_id)) {
error('Job ID required');
}
$job_id = sanitizeJobId($job_id);
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
if (!file_exists($meta_file) || !file_exists($result_file)) {
error('Job not found');
}
$job_data = json_decode(file_get_contents($meta_file), true);
$result_data = json_decode(file_get_contents($result_file), true);
// Check if there are fixable issues
if (!isset($result_data['auto_fixable_count']) || $result_data['auto_fixable_count'] == 0) {
error('No auto-fixable issues found');
}
$original_pdf = $job_data['filepath'];
$remediated_pdf = UPLOAD_DIR . '/' . $job_id . '_remediated.pdf';
// Use absolute venv path
$venv_python = __DIR__ . '/venv/bin/python3';
$python_bin = file_exists($venv_python) ? $venv_python : 'python3';
$remediation_script = __DIR__ . '/pdf_remediation.py';
// Build command - apply all safe fixes
$cmd = escapeshellcmd($python_bin . ' ' . $remediation_script) . ' ' .
escapeshellarg($original_pdf) . ' ' .
'--output ' . escapeshellarg($remediated_pdf) . ' ' .
'--all';
// Set PATH for poppler
$env_path = getenv('PATH');
$poppler_paths = '/opt/homebrew/bin:/usr/local/bin';
putenv("PATH={$poppler_paths}:{$env_path}");
// Run remediation
$error_log = RESULTS_DIR . '/' . $job_id . '.remediation.log';
$cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1';
exec($cmd, $output, $return_code);
// Check if remediation succeeded
if ($return_code !== 0 || !file_exists($remediated_pdf)) {
$log_content = file_exists($error_log) ? file_get_contents($error_log) : 'Unknown error';
$truncated = strlen($log_content) > 2000 ? '...' . substr($log_content, -2000) : $log_content;
error('Remediation failed: ' . $truncated);
}
// Store remediated file info
$job_data['remediated_pdf'] = $remediated_pdf;
$job_data['remediated_at'] = date('Y-m-d H:i:s');
file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT));
success([
'job_id' => $job_id,
'remediated_pdf' => basename($remediated_pdf),
'original_filename' => $job_data['original_filename'],
'fixes_applied' => $result_data['auto_fixable_count'],
'download_url' => 'api.php?action=download&job_id=' . $job_id . '&type=remediated',
'message' => 'PDF remediated successfully'
]);
}
/**
* Download original or remediated PDF
*/
function handleDownload() {
$job_id = $_GET['job_id'] ?? '';
$type = $_GET['type'] ?? 'original'; // 'original' or 'remediated'
if (empty($job_id)) {
error('Job ID required');
}
$job_id = sanitizeJobId($job_id);
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
if (!file_exists($meta_file)) {
error('Job not found');
}
$job_data = json_decode(file_get_contents($meta_file), true);
if ($type === 'remediated') {
if (!isset($job_data['remediated_pdf']) || !file_exists($job_data['remediated_pdf'])) {
error('Remediated PDF not found');
}
$file_path = $job_data['remediated_pdf'];
$filename = pathinfo($job_data['original_filename'], PATHINFO_FILENAME) . '_fixed.pdf';
} else {
$file_path = $job_data['filepath'];
$filename = $job_data['original_filename'];
}
// Serve the file
header('Content-Type: application/pdf');
header('Content-Disposition: attachment; filename="' . $filename . '"');
header('Content-Length: ' . filesize($file_path));
readfile($file_path);
exit;
}
/**
* Get aggregate job statistics
*/
function handleStats() {
$stats = [
'total_jobs' => 0,
'completed' => 0,
'failed' => 0,
'processing' => 0,
];
// Count jobs from meta files
$files = glob(RESULTS_DIR . '/*.meta.json');
foreach ($files as $file) {
$job = json_decode(file_get_contents($file), true);
$stats['total_jobs']++;
$result_file = str_replace('.meta.json', '.result.json', $file);
if (file_exists($result_file)) {
$stats['completed']++;
} else if (($job['status'] ?? '') === 'failed') {
$stats['failed']++;
} else {
$stats['processing']++;
}
}
success($stats);
}
/**
* Handle batch file upload — accepts multiple PDFs
*/
function handleBatchUpload() {
if (!checkRateLimit('upload', 10, 3600)) {
http_response_code(429);
echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded.']);
exit;
}
if (!isset($_FILES['pdfs']) || !is_array($_FILES['pdfs']['name'])) {
error('No files uploaded. Use "pdfs[]" as the file field name.');
}
$batch_id = 'batch_' . bin2hex(random_bytes(8));
$file_count = count($_FILES['pdfs']['name']);
$uploaded = [];
$errors = [];
for ($i = 0; $i < $file_count; $i++) {
$name = $_FILES['pdfs']['name'][$i];
$tmp = $_FILES['pdfs']['tmp_name'][$i];
$size = $_FILES['pdfs']['size'][$i];
$err = $_FILES['pdfs']['error'][$i];
if ($err !== UPLOAD_ERR_OK) {
$errors[] = ['filename' => $name, 'error' => "Upload error code: $err"];
continue;
}
if ($size > MAX_FILE_SIZE) {
$errors[] = ['filename' => $name, 'error' => 'File too large'];
continue;
}
$ext = strtolower(pathinfo($name, PATHINFO_EXTENSION));
if (!in_array($ext, ALLOWED_EXTENSIONS)) {
$errors[] = ['filename' => $name, 'error' => 'Not a PDF file'];
continue;
}
$header = file_get_contents($tmp, false, null, 0, 5);
if ($header !== '%PDF-') {
$errors[] = ['filename' => $name, 'error' => 'Invalid PDF header'];
continue;
}
$job_id = 'pdf_' . bin2hex(random_bytes(16));
$filename = $job_id . '.pdf';
$filepath = UPLOAD_DIR . '/' . $filename;
if (!move_uploaded_file($tmp, $filepath)) {
$errors[] = ['filename' => $name, 'error' => 'Failed to save'];
continue;
}
$job_data = [
'job_id' => $job_id,
'batch_id' => $batch_id,
'original_filename' => $name,
'uploaded_at' => date('Y-m-d H:i:s'),
'file_size' => $size,
'status' => 'uploaded',
'filepath' => $filepath
];
file_put_contents(
RESULTS_DIR . '/' . $job_id . '.meta.json',
json_encode($job_data, JSON_PRETTY_PRINT)
);
$uploaded[] = ['job_id' => $job_id, 'filename' => $name];
}
// Save batch manifest
$batch_data = [
'batch_id' => $batch_id,
'created_at' => date('Y-m-d H:i:s'),
'total_files' => $file_count,
'jobs' => array_column($uploaded, 'job_id'),
];
file_put_contents(
RESULTS_DIR . '/' . $batch_id . '.batch.json',
json_encode($batch_data, JSON_PRETTY_PRINT)
);
success([
'batch_id' => $batch_id,
'uploaded' => $uploaded,
'errors' => $errors,
'message' => count($uploaded) . ' of ' . $file_count . ' files uploaded'
]);
}
/**
* Get status of a batch job
*/
function handleBatchStatus() {
$batch_id = $_GET['batch_id'] ?? '';
if (empty($batch_id) || !preg_match('/^batch_[a-f0-9]+$/', $batch_id)) {
error('Invalid batch ID');
}
$batch_file = RESULTS_DIR . '/' . $batch_id . '.batch.json';
if (!file_exists($batch_file)) {
error('Batch not found');
}
$batch = json_decode(file_get_contents($batch_file), true);
$jobs = [];
$completed = 0;
$failed = 0;
foreach ($batch['jobs'] as $job_id) {
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
$status = 'unknown';
$score = null;
$filename = '';
if (file_exists($meta_file)) {
$meta = json_decode(file_get_contents($meta_file), true);
$status = $meta['status'] ?? 'uploaded';
$filename = $meta['original_filename'] ?? '';
}
if (file_exists($result_file)) {
$status = 'completed';
$result = json_decode(file_get_contents($result_file), true);
$score = $result['accessibility_score'] ?? null;
$completed++;
} else if ($status === 'failed') {
$failed++;
}
$jobs[] = [
'job_id' => $job_id,
'filename' => $filename,
'status' => $status,
'score' => $score
];
}
$total = count($batch['jobs']);
$overall_status = ($completed === $total) ? 'completed' :
(($completed + $failed === $total) ? 'finished' : 'processing');
success([
'batch_id' => $batch_id,
'status' => $overall_status,
'total' => $total,
'completed' => $completed,
'failed' => $failed,
'jobs' => $jobs
]);
}
/**
* Export results as HTML or JSON
*/
function handleExport() {
$job_id = $_GET['job_id'] ?? '';
$format = $_GET['format'] ?? 'json';
if (empty($job_id)) {
error('Job ID required');
}
$job_id = sanitizeJobId($job_id);
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
if (!file_exists($result_file)) {
error('Results not found');
}
$result = json_decode(file_get_contents($result_file), true);
if ($format === 'html') {
// Generate HTML report via Python
$venv_python = __DIR__ . '/venv/bin/python3';
$python_bin = file_exists($venv_python) ? $venv_python : 'python3';
$report_script = __DIR__ . '/report_generator.py';
$html_file = RESULTS_DIR . '/' . $job_id . '.report.html';
$cmd = escapeshellcmd($python_bin . ' ' . $report_script) .
' --input ' . escapeshellarg($result_file) .
' --output ' . escapeshellarg($html_file);
exec($cmd . ' 2>&1', $output, $return_code);
if ($return_code !== 0 || !file_exists($html_file)) {
error('Report generation failed');
}
header('Content-Type: text/html; charset=utf-8');
header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.html"');
readfile($html_file);
exit;
}
if ($format === 'pdf') {
// Generate PDF report via Python WeasyPrint
$venv_python = __DIR__ . '/venv/bin/python3';
$python_bin = file_exists($venv_python) ? $venv_python : 'python3';
$report_script = __DIR__ . '/report_generator.py';
$pdf_file = RESULTS_DIR . '/' . $job_id . '.report.pdf';
$cmd = escapeshellcmd($python_bin . ' ' . $report_script) .
' --input ' . escapeshellarg($result_file) .
' --output ' . escapeshellarg($pdf_file) .
' --format pdf';
exec($cmd . ' 2>&1', $output, $return_code);
if ($return_code !== 0 || !file_exists($pdf_file)) {
error('PDF report generation failed: ' . implode("\n", $output));
}
header('Content-Type: application/pdf');
header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.pdf"');
header('Content-Length: ' . filesize($pdf_file));
readfile($pdf_file);
exit;
}
// Default: JSON download
header('Content-Type: application/json');
header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.json"');
echo json_encode($result, JSON_PRETTY_PRINT);
exit;
}
/**
* Dismiss an issue (mark as false positive)
*/
function handleDismiss() {
$data = json_decode(file_get_contents('php://input'), true) ?: [];
$job_id = $data['job_id'] ?? '';
$issue_index = isset($data['issue_index']) ? (int)$data['issue_index'] : -1;
$reason = substr($data['reason'] ?? '', 0, 255);
if (empty($job_id) || $issue_index < 0) {
error('job_id and issue_index required');
}
$job_id = sanitizeJobId($job_id);
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
if (!file_exists($meta_file)) {
error('Job not found');
}
$dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json';
$dismissed = file_exists($dismiss_file) ? json_decode(file_get_contents($dismiss_file), true) : [];
$dismissed[$issue_index] = ['reason' => $reason, 'dismissed_at' => date('Y-m-d H:i:s')];
file_put_contents($dismiss_file, json_encode($dismissed));
success(['dismissed' => true, 'issue_index' => $issue_index]);
}
/**
* Undismiss an issue
*/
function handleUndismiss() {
$data = json_decode(file_get_contents('php://input'), true) ?: [];
$job_id = $data['job_id'] ?? '';
$issue_index = isset($data['issue_index']) ? (int)$data['issue_index'] : -1;
if (empty($job_id) || $issue_index < 0) {
error('job_id and issue_index required');
}
$job_id = sanitizeJobId($job_id);
$dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json';
if (file_exists($dismiss_file)) {
$dismissed = json_decode(file_get_contents($dismiss_file), true);
unset($dismissed[$issue_index]);
file_put_contents($dismiss_file, json_encode($dismissed));
}
success(['undismissed' => true, 'issue_index' => $issue_index]);
}
/**
* Send success response
*/
function success($data) {
echo json_encode([
'success' => true,
'data' => $data
]);
exit;
}
/**
* Send error response
*/
function error($message) {
http_response_code(400);
echo json_encode([
'success' => false,
'error' => $message
]);
exit;
}