- Redesigned frontend with Outfit/Figtree typography, coral accent palette, noise texture, glassmorphism header, and staggered animations - Split monolithic index.html into modular JS (app, api, upload, batch, results, page-viewer, utils) and extracted CSS - Fixed worker.py to generate page images for Visual Page Inspector - Added Docker Compose stack (web, worker, redis, postgres) - Added batch upload, HTML report export, rate limiting, and Redis queue - Extended test suite with checker, remediation, worker, and DB tests Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
913 lines
26 KiB
PHP
913 lines
26 KiB
PHP
<?php
|
|
/**
|
|
* Enterprise PDF Accessibility Checker - API Backend
|
|
*
|
|
* Handles file uploads, job processing, and result retrieval
|
|
*/
|
|
|
|
// Load .env file if getenv doesn't work (Apache doesn't set env vars by default)
|
|
$envFile = __DIR__ . '/.env';
|
|
if (file_exists($envFile)) {
|
|
$lines = file($envFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
|
foreach ($lines as $line) {
|
|
$line = trim($line);
|
|
if ($line === '' || $line[0] === '#') continue;
|
|
if (strpos($line, '=') === false) continue;
|
|
list($key, $val) = explode('=', $line, 2);
|
|
$key = trim($key);
|
|
$val = trim($val);
|
|
if (!getenv($key)) {
|
|
putenv("$key=$val");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Configuration
|
|
define('UPLOAD_DIR', __DIR__ . '/uploads');
|
|
define('RESULTS_DIR', __DIR__ . '/results');
|
|
define('PYTHON_SCRIPT', __DIR__ . '/enterprise_pdf_checker.py');
|
|
define('MAX_FILE_SIZE', 50 * 1024 * 1024); // 50MB
|
|
define('ALLOWED_EXTENSIONS', ['pdf']);
|
|
|
|
// Redis configuration
|
|
define('REDIS_HOST', getenv('REDIS_HOST') ?: 'localhost');
|
|
define('REDIS_PORT', intval(getenv('REDIS_PORT') ?: 6379));
|
|
define('REDIS_QUEUE', 'pdf:queue');
|
|
define('REDIS_STATUS_PREFIX', 'pdf:status:');
|
|
define('REDIS_RATE_PREFIX', 'pdf:rate:');
|
|
|
|
// Create directories if they don't exist
|
|
if (!is_dir(UPLOAD_DIR)) mkdir(UPLOAD_DIR, 0755, true);
|
|
if (!is_dir(RESULTS_DIR)) mkdir(RESULTS_DIR, 0755, true);
|
|
|
|
/**
|
|
* Get Redis connection (lazy singleton)
|
|
*/
|
|
function getRedis() {
|
|
static $redis = null;
|
|
if ($redis === null) {
|
|
$redis = new Redis();
|
|
$redis->connect(REDIS_HOST, REDIS_PORT);
|
|
}
|
|
return $redis;
|
|
}
|
|
|
|
/**
|
|
* Check rate limit via Redis. Returns true if allowed.
|
|
*/
|
|
function checkRateLimit($action, $limit, $window) {
|
|
try {
|
|
$redis = getRedis();
|
|
$ip = $_SERVER['REMOTE_ADDR'] ?? 'unknown';
|
|
$key = REDIS_RATE_PREFIX . $ip . ':' . $action;
|
|
$current = $redis->incr($key);
|
|
if ($current === 1) {
|
|
$redis->expire($key, $window);
|
|
}
|
|
return $current <= $limit;
|
|
} catch (Exception $e) {
|
|
return true; // Allow if Redis is down
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Sanitize job ID to prevent path traversal attacks
|
|
*/
|
|
function sanitizeJobId($job_id) {
|
|
if (!preg_match('/^pdf_[a-f0-9]+$/', $job_id)) {
|
|
error('Invalid job ID format');
|
|
}
|
|
return $job_id;
|
|
}
|
|
|
|
// CORS headers for API
|
|
$allowed_origins = [
|
|
'https://ai-sandbox.oliver.solutions',
|
|
'http://localhost:8888',
|
|
'http://127.0.0.1:8888',
|
|
'http://localhost:8000',
|
|
'http://127.0.0.1:8000',
|
|
];
|
|
$origin = $_SERVER['HTTP_ORIGIN'] ?? '';
|
|
if (in_array($origin, $allowed_origins) || (function_exists('isDevelopmentMode') && isDevelopmentMode())) {
|
|
header('Access-Control-Allow-Origin: ' . ($origin ?: '*'));
|
|
} else if ($origin) {
|
|
header('Access-Control-Allow-Origin: null');
|
|
} else {
|
|
header('Access-Control-Allow-Origin: ' . ($allowed_origins[0]));
|
|
}
|
|
header('Access-Control-Allow-Methods: POST, GET, OPTIONS, DELETE');
|
|
header('Access-Control-Allow-Headers: Content-Type, X-API-Key, Authorization');
|
|
header('Content-Type: application/json');
|
|
|
|
// Handle preflight
|
|
if ($_SERVER['REQUEST_METHOD'] === 'OPTIONS') {
|
|
exit(0);
|
|
}
|
|
|
|
// Require authentication for all API requests
|
|
require_once __DIR__ . '/auth.php';
|
|
requireAuth();
|
|
|
|
// Get action
|
|
$action = $_GET['action'] ?? $_POST['action'] ?? '';
|
|
|
|
switch ($action) {
|
|
case 'upload':
|
|
handleUpload();
|
|
break;
|
|
case 'check':
|
|
handleCheck();
|
|
break;
|
|
case 'status':
|
|
handleStatus();
|
|
break;
|
|
case 'result':
|
|
handleResult();
|
|
break;
|
|
case 'list':
|
|
handleList();
|
|
break;
|
|
case 'delete':
|
|
handleDelete();
|
|
break;
|
|
case 'debug':
|
|
handleDebug();
|
|
break;
|
|
case 'image':
|
|
handleImage();
|
|
break;
|
|
case 'remediate':
|
|
handleRemediate();
|
|
break;
|
|
case 'download':
|
|
handleDownload();
|
|
break;
|
|
case 'stats':
|
|
handleStats();
|
|
break;
|
|
case 'batch_upload':
|
|
handleBatchUpload();
|
|
break;
|
|
case 'batch_status':
|
|
handleBatchStatus();
|
|
break;
|
|
case 'export':
|
|
handleExport();
|
|
break;
|
|
default:
|
|
error('Invalid action');
|
|
}
|
|
|
|
/**
|
|
* Handle file upload
|
|
*/
|
|
function handleUpload() {
|
|
// Rate limit: 10 uploads/hour per IP
|
|
if (!checkRateLimit('upload', 10, 3600)) {
|
|
http_response_code(429);
|
|
echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded. Try again later.']);
|
|
exit;
|
|
}
|
|
|
|
if (!isset($_FILES['pdf'])) {
|
|
error('No file uploaded');
|
|
}
|
|
|
|
$file = $_FILES['pdf'];
|
|
|
|
// Validate file
|
|
if ($file['error'] !== UPLOAD_ERR_OK) {
|
|
error('Upload error: ' . $file['error']);
|
|
}
|
|
|
|
if ($file['size'] > MAX_FILE_SIZE) {
|
|
error('File too large. Max size: ' . (MAX_FILE_SIZE / 1024 / 1024) . 'MB');
|
|
}
|
|
|
|
$ext = strtolower(pathinfo($file['name'], PATHINFO_EXTENSION));
|
|
if (!in_array($ext, ALLOWED_EXTENSIONS)) {
|
|
error('Invalid file type. Only PDF files allowed.');
|
|
}
|
|
|
|
// Validate PDF magic bytes
|
|
$header = file_get_contents($file['tmp_name'], false, null, 0, 5);
|
|
if ($header !== '%PDF-') {
|
|
error('File is not a valid PDF (invalid file header)');
|
|
}
|
|
|
|
// Generate cryptographically secure job ID
|
|
$job_id = 'pdf_' . bin2hex(random_bytes(16));
|
|
$filename = $job_id . '.pdf';
|
|
$filepath = UPLOAD_DIR . '/' . $filename;
|
|
|
|
// Move file
|
|
if (!move_uploaded_file($file['tmp_name'], $filepath)) {
|
|
error('Failed to save file');
|
|
}
|
|
|
|
// Create job metadata
|
|
$job_data = [
|
|
'job_id' => $job_id,
|
|
'original_filename' => $file['name'],
|
|
'uploaded_at' => date('Y-m-d H:i:s'),
|
|
'file_size' => $file['size'],
|
|
'status' => 'uploaded',
|
|
'filepath' => $filepath
|
|
];
|
|
|
|
file_put_contents(
|
|
RESULTS_DIR . '/' . $job_id . '.meta.json',
|
|
json_encode($job_data, JSON_PRETTY_PRINT)
|
|
);
|
|
|
|
success([
|
|
'job_id' => $job_id,
|
|
'filename' => $file['name'],
|
|
'message' => 'File uploaded successfully'
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Handle PDF accessibility check — push job to Redis queue
|
|
*/
|
|
function handleCheck() {
|
|
$job_id = $_POST['job_id'] ?? '';
|
|
|
|
if (empty($job_id)) {
|
|
error('Job ID required');
|
|
}
|
|
$job_id = sanitizeJobId($job_id);
|
|
|
|
// Rate limit: 30 checks/hour per IP
|
|
if (!checkRateLimit('check', 30, 3600)) {
|
|
http_response_code(429);
|
|
echo json_encode(['success' => false, 'error' => 'Rate limit exceeded. Try again later.']);
|
|
exit;
|
|
}
|
|
|
|
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
|
|
|
|
if (!file_exists($meta_file)) {
|
|
error('Job not found');
|
|
}
|
|
|
|
$job_data = json_decode(file_get_contents($meta_file), true);
|
|
|
|
$quick_mode = $_POST['quick_mode'] ?? false;
|
|
|
|
// Push job to Redis queue for worker processing
|
|
try {
|
|
$redis = getRedis();
|
|
$payload = json_encode([
|
|
'job_id' => $job_id,
|
|
'pdf_path' => $job_data['filepath'],
|
|
'original_filename' => $job_data['original_filename'] ?? '',
|
|
'options' => [
|
|
'quick_mode' => (bool)$quick_mode,
|
|
],
|
|
'queued_at' => time()
|
|
]);
|
|
$redis->lPush(REDIS_QUEUE, $payload);
|
|
|
|
// Set initial status in Redis
|
|
$redis->setex(REDIS_STATUS_PREFIX . $job_id, 86400, json_encode([
|
|
'status' => 'queued',
|
|
'progress' => 0,
|
|
'message' => 'Waiting in queue',
|
|
'updated_at' => time()
|
|
]));
|
|
} catch (Exception $e) {
|
|
// Fallback to direct exec if Redis is unavailable (local dev without Docker)
|
|
$pdf_path = $job_data['filepath'];
|
|
$output_path = RESULTS_DIR . '/' . $job_id . '.result.json';
|
|
$venv_python = __DIR__ . '/venv/bin/python3';
|
|
$python_bin = file_exists($venv_python) ? $venv_python : 'python3';
|
|
|
|
$cmd = escapeshellcmd($python_bin . ' ' . PYTHON_SCRIPT) . ' ' .
|
|
escapeshellarg($pdf_path) . ' ' .
|
|
'--output ' . escapeshellarg($output_path);
|
|
|
|
if ($quick_mode) {
|
|
$cmd .= ' --quick';
|
|
}
|
|
|
|
$anthropic_key = $_POST['anthropic_key'] ?? getenv('ANTHROPIC_API_KEY');
|
|
$google_key = $_POST['google_key'] ?? $_POST['google_credentials'] ?? getenv('GOOGLE_API_KEY');
|
|
|
|
if ($anthropic_key) {
|
|
$cmd .= ' --anthropic-key ' . escapeshellarg($anthropic_key);
|
|
}
|
|
if ($google_key) {
|
|
if (file_exists($google_key)) {
|
|
$cmd .= ' --google-credentials ' . escapeshellarg($google_key);
|
|
} else {
|
|
$cmd .= ' --google-key ' . escapeshellarg($google_key);
|
|
}
|
|
}
|
|
|
|
$env_path = getenv('PATH');
|
|
putenv("PATH=/opt/homebrew/bin:/usr/local/bin:{$env_path}");
|
|
|
|
$error_log = RESULTS_DIR . '/' . $job_id . '.error.log';
|
|
$cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1 &';
|
|
exec($cmd, $output, $return_code);
|
|
}
|
|
|
|
// Update meta file
|
|
$job_data['status'] = 'queued';
|
|
$job_data['started_at'] = date('Y-m-d H:i:s');
|
|
file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT));
|
|
|
|
success([
|
|
'job_id' => $job_id,
|
|
'status' => 'queued',
|
|
'message' => 'Check queued for processing'
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Check job status — reads from Redis (real-time) with file fallback
|
|
*/
|
|
function handleStatus() {
|
|
$job_id = $_GET['job_id'] ?? '';
|
|
|
|
if (empty($job_id)) {
|
|
error('Job ID required');
|
|
}
|
|
$job_id = sanitizeJobId($job_id);
|
|
|
|
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
|
|
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
|
|
$error_log = RESULTS_DIR . '/' . $job_id . '.error.log';
|
|
|
|
if (!file_exists($meta_file)) {
|
|
error('Job not found');
|
|
}
|
|
|
|
$job_data = json_decode(file_get_contents($meta_file), true);
|
|
|
|
// Try Redis first for real-time progress
|
|
try {
|
|
$redis = getRedis();
|
|
$redis_status = $redis->get(REDIS_STATUS_PREFIX . $job_id);
|
|
if ($redis_status) {
|
|
$status_data = json_decode($redis_status, true);
|
|
$job_data['status'] = $status_data['status'];
|
|
$job_data['progress'] = $status_data['progress'] ?? 0;
|
|
$job_data['status_message'] = $status_data['message'] ?? '';
|
|
}
|
|
} catch (Exception $e) {
|
|
// Redis unavailable — fall through to file-based check
|
|
}
|
|
|
|
// File-based fallback: check if result exists
|
|
if (file_exists($result_file)) {
|
|
$job_data['status'] = 'completed';
|
|
$job_data['completed_at'] = date('Y-m-d H:i:s', filemtime($result_file));
|
|
file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT));
|
|
} else if (file_exists($error_log) && $job_data['status'] === 'processing') {
|
|
$error_content = file_get_contents($error_log);
|
|
if (!empty($error_content)) {
|
|
$started = strtotime($job_data['started_at'] ?? 'now');
|
|
if (time() - $started > 300) {
|
|
$job_data['status'] = 'failed';
|
|
$job_data['error'] = 'Process timeout or error';
|
|
$job_data['error_log'] = substr($error_content, -1000);
|
|
}
|
|
}
|
|
}
|
|
|
|
success($job_data);
|
|
}
|
|
|
|
/**
|
|
* Get check results
|
|
*/
|
|
function handleResult() {
|
|
$job_id = $_GET['job_id'] ?? '';
|
|
|
|
if (empty($job_id)) {
|
|
error('Job ID required');
|
|
}
|
|
$job_id = sanitizeJobId($job_id);
|
|
|
|
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
|
|
|
|
if (!file_exists($result_file)) {
|
|
error('Results not found. Check may still be processing.');
|
|
}
|
|
|
|
$result = json_decode(file_get_contents($result_file), true);
|
|
|
|
success($result);
|
|
}
|
|
|
|
/**
|
|
* List all jobs
|
|
*/
|
|
function handleList() {
|
|
$jobs = [];
|
|
|
|
$files = glob(RESULTS_DIR . '/*.meta.json');
|
|
|
|
foreach ($files as $file) {
|
|
$job_data = json_decode(file_get_contents($file), true);
|
|
|
|
// Check if completed
|
|
$result_file = str_replace('.meta.json', '.result.json', $file);
|
|
if (file_exists($result_file)) {
|
|
$job_data['status'] = 'completed';
|
|
}
|
|
|
|
$jobs[] = $job_data;
|
|
}
|
|
|
|
// Sort by upload time (newest first)
|
|
usort($jobs, function($a, $b) {
|
|
return strtotime($b['uploaded_at']) - strtotime($a['uploaded_at']);
|
|
});
|
|
|
|
success(['jobs' => $jobs]);
|
|
}
|
|
|
|
/**
|
|
* Delete a job
|
|
*/
|
|
function handleDelete() {
|
|
$job_id = $_POST['job_id'] ?? $_GET['job_id'] ?? '';
|
|
|
|
if (empty($job_id)) {
|
|
error('Job ID required');
|
|
}
|
|
$job_id = sanitizeJobId($job_id);
|
|
|
|
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
|
|
|
|
if (!file_exists($meta_file)) {
|
|
error('Job not found');
|
|
}
|
|
|
|
$job_data = json_decode(file_get_contents($meta_file), true);
|
|
|
|
// Delete files
|
|
@unlink($job_data['filepath']);
|
|
@unlink($meta_file);
|
|
@unlink(RESULTS_DIR . '/' . $job_id . '.result.json');
|
|
|
|
success(['message' => 'Job deleted']);
|
|
}
|
|
|
|
/**
|
|
* Debug endpoint
|
|
*/
|
|
function handleDebug() {
|
|
// Debug endpoint only available in development mode
|
|
require_once __DIR__ . '/auth.php';
|
|
if (!isDevelopmentMode()) {
|
|
error('Debug endpoint disabled in production');
|
|
}
|
|
|
|
$job_id = $_GET['job_id'] ?? '';
|
|
|
|
if (empty($job_id)) {
|
|
error('Job ID required');
|
|
}
|
|
$job_id = sanitizeJobId($job_id);
|
|
|
|
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
|
|
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
|
|
$error_log = RESULTS_DIR . '/' . $job_id . '.error.log';
|
|
|
|
$debug_info = [
|
|
'job_id' => $job_id,
|
|
'meta_exists' => file_exists($meta_file),
|
|
'result_exists' => file_exists($result_file),
|
|
'error_log_exists' => file_exists($error_log),
|
|
'files' => []
|
|
];
|
|
|
|
if (file_exists($meta_file)) {
|
|
$debug_info['meta'] = json_decode(file_get_contents($meta_file), true);
|
|
}
|
|
|
|
if (file_exists($error_log)) {
|
|
$debug_info['error_log'] = file_get_contents($error_log);
|
|
}
|
|
|
|
if (file_exists($result_file)) {
|
|
$debug_info['result_size'] = filesize($result_file);
|
|
}
|
|
|
|
// Test Python
|
|
$venv_python = __DIR__ . '/venv/bin/python3';
|
|
exec($venv_python . ' --version 2>&1', $python_version);
|
|
$debug_info['python_version'] = implode("\n", $python_version);
|
|
|
|
success($debug_info);
|
|
}
|
|
|
|
/**
|
|
* Serve page images
|
|
*/
|
|
function handleImage() {
|
|
$job_id = $_GET['job_id'] ?? '';
|
|
$page_num = $_GET['page'] ?? '';
|
|
|
|
if (empty($job_id) || empty($page_num)) {
|
|
error('Job ID and page number required');
|
|
}
|
|
$job_id = sanitizeJobId($job_id);
|
|
|
|
// Find the image file
|
|
$images_dir = RESULTS_DIR . '/' . $job_id . '.result_images';
|
|
$image_file = $images_dir . '/page_' . intval($page_num) . '.png';
|
|
|
|
if (!file_exists($image_file)) {
|
|
http_response_code(404);
|
|
header('Content-Type: application/json');
|
|
echo json_encode(['success' => false, 'error' => 'Image not found']);
|
|
exit;
|
|
}
|
|
|
|
// Serve the image
|
|
header('Content-Type: image/png');
|
|
header('Cache-Control: public, max-age=86400'); // Cache for 1 day
|
|
readfile($image_file);
|
|
exit;
|
|
}
|
|
|
|
/**
|
|
* Auto-remediate PDF accessibility issues
|
|
*/
|
|
function handleRemediate() {
|
|
$job_id = $_POST['job_id'] ?? '';
|
|
|
|
if (empty($job_id)) {
|
|
error('Job ID required');
|
|
}
|
|
$job_id = sanitizeJobId($job_id);
|
|
|
|
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
|
|
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
|
|
|
|
if (!file_exists($meta_file) || !file_exists($result_file)) {
|
|
error('Job not found');
|
|
}
|
|
|
|
$job_data = json_decode(file_get_contents($meta_file), true);
|
|
$result_data = json_decode(file_get_contents($result_file), true);
|
|
|
|
// Check if there are fixable issues
|
|
if (!isset($result_data['auto_fixable_count']) || $result_data['auto_fixable_count'] == 0) {
|
|
error('No auto-fixable issues found');
|
|
}
|
|
|
|
$original_pdf = $job_data['filepath'];
|
|
$remediated_pdf = UPLOAD_DIR . '/' . $job_id . '_remediated.pdf';
|
|
|
|
// Use absolute venv path
|
|
$venv_python = __DIR__ . '/venv/bin/python3';
|
|
$python_bin = file_exists($venv_python) ? $venv_python : 'python3';
|
|
$remediation_script = __DIR__ . '/pdf_remediation.py';
|
|
|
|
// Build command - apply all safe fixes
|
|
$cmd = escapeshellcmd($python_bin . ' ' . $remediation_script) . ' ' .
|
|
escapeshellarg($original_pdf) . ' ' .
|
|
'--output ' . escapeshellarg($remediated_pdf) . ' ' .
|
|
'--all';
|
|
|
|
// Set PATH for poppler
|
|
$env_path = getenv('PATH');
|
|
$poppler_paths = '/opt/homebrew/bin:/usr/local/bin';
|
|
putenv("PATH={$poppler_paths}:{$env_path}");
|
|
|
|
// Run remediation
|
|
$error_log = RESULTS_DIR . '/' . $job_id . '.remediation.log';
|
|
$cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1';
|
|
|
|
exec($cmd, $output, $return_code);
|
|
|
|
// Check if remediation succeeded
|
|
if ($return_code !== 0 || !file_exists($remediated_pdf)) {
|
|
$log_content = file_exists($error_log) ? file_get_contents($error_log) : 'Unknown error';
|
|
error('Remediation failed: ' . substr($log_content, -500));
|
|
}
|
|
|
|
// Store remediated file info
|
|
$job_data['remediated_pdf'] = $remediated_pdf;
|
|
$job_data['remediated_at'] = date('Y-m-d H:i:s');
|
|
file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT));
|
|
|
|
success([
|
|
'job_id' => $job_id,
|
|
'remediated_pdf' => basename($remediated_pdf),
|
|
'original_filename' => $job_data['original_filename'],
|
|
'fixes_applied' => $result_data['auto_fixable_count'],
|
|
'download_url' => 'api.php?action=download&job_id=' . $job_id . '&type=remediated',
|
|
'message' => 'PDF remediated successfully'
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Download original or remediated PDF
|
|
*/
|
|
function handleDownload() {
|
|
$job_id = $_GET['job_id'] ?? '';
|
|
$type = $_GET['type'] ?? 'original'; // 'original' or 'remediated'
|
|
|
|
if (empty($job_id)) {
|
|
error('Job ID required');
|
|
}
|
|
$job_id = sanitizeJobId($job_id);
|
|
|
|
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
|
|
|
|
if (!file_exists($meta_file)) {
|
|
error('Job not found');
|
|
}
|
|
|
|
$job_data = json_decode(file_get_contents($meta_file), true);
|
|
|
|
if ($type === 'remediated') {
|
|
if (!isset($job_data['remediated_pdf']) || !file_exists($job_data['remediated_pdf'])) {
|
|
error('Remediated PDF not found');
|
|
}
|
|
$file_path = $job_data['remediated_pdf'];
|
|
$filename = pathinfo($job_data['original_filename'], PATHINFO_FILENAME) . '_fixed.pdf';
|
|
} else {
|
|
$file_path = $job_data['filepath'];
|
|
$filename = $job_data['original_filename'];
|
|
}
|
|
|
|
// Serve the file
|
|
header('Content-Type: application/pdf');
|
|
header('Content-Disposition: attachment; filename="' . $filename . '"');
|
|
header('Content-Length: ' . filesize($file_path));
|
|
readfile($file_path);
|
|
exit;
|
|
}
|
|
|
|
/**
|
|
* Get aggregate job statistics
|
|
*/
|
|
function handleStats() {
|
|
$stats = [
|
|
'total_jobs' => 0,
|
|
'completed' => 0,
|
|
'failed' => 0,
|
|
'processing' => 0,
|
|
'queue_length' => 0
|
|
];
|
|
|
|
// Count jobs from meta files
|
|
$files = glob(RESULTS_DIR . '/*.meta.json');
|
|
foreach ($files as $file) {
|
|
$job = json_decode(file_get_contents($file), true);
|
|
$stats['total_jobs']++;
|
|
$result_file = str_replace('.meta.json', '.result.json', $file);
|
|
if (file_exists($result_file)) {
|
|
$stats['completed']++;
|
|
} else if (($job['status'] ?? '') === 'failed') {
|
|
$stats['failed']++;
|
|
} else {
|
|
$stats['processing']++;
|
|
}
|
|
}
|
|
|
|
// Get queue length from Redis
|
|
try {
|
|
$redis = getRedis();
|
|
$stats['queue_length'] = $redis->lLen(REDIS_QUEUE);
|
|
} catch (Exception $e) {
|
|
// Redis unavailable
|
|
}
|
|
|
|
success($stats);
|
|
}
|
|
|
|
/**
|
|
* Handle batch file upload — accepts multiple PDFs
|
|
*/
|
|
function handleBatchUpload() {
|
|
if (!checkRateLimit('upload', 10, 3600)) {
|
|
http_response_code(429);
|
|
echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded.']);
|
|
exit;
|
|
}
|
|
|
|
if (!isset($_FILES['pdfs']) || !is_array($_FILES['pdfs']['name'])) {
|
|
error('No files uploaded. Use "pdfs[]" as the file field name.');
|
|
}
|
|
|
|
$batch_id = 'batch_' . bin2hex(random_bytes(8));
|
|
$file_count = count($_FILES['pdfs']['name']);
|
|
$uploaded = [];
|
|
$errors = [];
|
|
|
|
for ($i = 0; $i < $file_count; $i++) {
|
|
$name = $_FILES['pdfs']['name'][$i];
|
|
$tmp = $_FILES['pdfs']['tmp_name'][$i];
|
|
$size = $_FILES['pdfs']['size'][$i];
|
|
$err = $_FILES['pdfs']['error'][$i];
|
|
|
|
if ($err !== UPLOAD_ERR_OK) {
|
|
$errors[] = ['filename' => $name, 'error' => "Upload error code: $err"];
|
|
continue;
|
|
}
|
|
if ($size > MAX_FILE_SIZE) {
|
|
$errors[] = ['filename' => $name, 'error' => 'File too large'];
|
|
continue;
|
|
}
|
|
$ext = strtolower(pathinfo($name, PATHINFO_EXTENSION));
|
|
if (!in_array($ext, ALLOWED_EXTENSIONS)) {
|
|
$errors[] = ['filename' => $name, 'error' => 'Not a PDF file'];
|
|
continue;
|
|
}
|
|
$header = file_get_contents($tmp, false, null, 0, 5);
|
|
if ($header !== '%PDF-') {
|
|
$errors[] = ['filename' => $name, 'error' => 'Invalid PDF header'];
|
|
continue;
|
|
}
|
|
|
|
$job_id = 'pdf_' . bin2hex(random_bytes(16));
|
|
$filename = $job_id . '.pdf';
|
|
$filepath = UPLOAD_DIR . '/' . $filename;
|
|
|
|
if (!move_uploaded_file($tmp, $filepath)) {
|
|
$errors[] = ['filename' => $name, 'error' => 'Failed to save'];
|
|
continue;
|
|
}
|
|
|
|
$job_data = [
|
|
'job_id' => $job_id,
|
|
'batch_id' => $batch_id,
|
|
'original_filename' => $name,
|
|
'uploaded_at' => date('Y-m-d H:i:s'),
|
|
'file_size' => $size,
|
|
'status' => 'uploaded',
|
|
'filepath' => $filepath
|
|
];
|
|
file_put_contents(
|
|
RESULTS_DIR . '/' . $job_id . '.meta.json',
|
|
json_encode($job_data, JSON_PRETTY_PRINT)
|
|
);
|
|
|
|
$uploaded[] = ['job_id' => $job_id, 'filename' => $name];
|
|
}
|
|
|
|
// Save batch manifest
|
|
$batch_data = [
|
|
'batch_id' => $batch_id,
|
|
'created_at' => date('Y-m-d H:i:s'),
|
|
'total_files' => $file_count,
|
|
'jobs' => array_column($uploaded, 'job_id'),
|
|
];
|
|
file_put_contents(
|
|
RESULTS_DIR . '/' . $batch_id . '.batch.json',
|
|
json_encode($batch_data, JSON_PRETTY_PRINT)
|
|
);
|
|
|
|
success([
|
|
'batch_id' => $batch_id,
|
|
'uploaded' => $uploaded,
|
|
'errors' => $errors,
|
|
'message' => count($uploaded) . ' of ' . $file_count . ' files uploaded'
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Get status of a batch job
|
|
*/
|
|
function handleBatchStatus() {
|
|
$batch_id = $_GET['batch_id'] ?? '';
|
|
if (empty($batch_id) || !preg_match('/^batch_[a-f0-9]+$/', $batch_id)) {
|
|
error('Invalid batch ID');
|
|
}
|
|
|
|
$batch_file = RESULTS_DIR . '/' . $batch_id . '.batch.json';
|
|
if (!file_exists($batch_file)) {
|
|
error('Batch not found');
|
|
}
|
|
|
|
$batch = json_decode(file_get_contents($batch_file), true);
|
|
$jobs = [];
|
|
$completed = 0;
|
|
$failed = 0;
|
|
|
|
foreach ($batch['jobs'] as $job_id) {
|
|
$meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json';
|
|
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
|
|
|
|
$status = 'unknown';
|
|
$score = null;
|
|
$filename = '';
|
|
|
|
if (file_exists($meta_file)) {
|
|
$meta = json_decode(file_get_contents($meta_file), true);
|
|
$status = $meta['status'] ?? 'uploaded';
|
|
$filename = $meta['original_filename'] ?? '';
|
|
}
|
|
if (file_exists($result_file)) {
|
|
$status = 'completed';
|
|
$result = json_decode(file_get_contents($result_file), true);
|
|
$score = $result['accessibility_score'] ?? null;
|
|
$completed++;
|
|
} else if ($status === 'failed') {
|
|
$failed++;
|
|
}
|
|
|
|
$jobs[] = [
|
|
'job_id' => $job_id,
|
|
'filename' => $filename,
|
|
'status' => $status,
|
|
'score' => $score
|
|
];
|
|
}
|
|
|
|
$total = count($batch['jobs']);
|
|
$overall_status = ($completed === $total) ? 'completed' :
|
|
(($completed + $failed === $total) ? 'finished' : 'processing');
|
|
|
|
success([
|
|
'batch_id' => $batch_id,
|
|
'status' => $overall_status,
|
|
'total' => $total,
|
|
'completed' => $completed,
|
|
'failed' => $failed,
|
|
'jobs' => $jobs
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Export results as HTML or JSON
|
|
*/
|
|
function handleExport() {
|
|
$job_id = $_GET['job_id'] ?? '';
|
|
$format = $_GET['format'] ?? 'json';
|
|
|
|
if (empty($job_id)) {
|
|
error('Job ID required');
|
|
}
|
|
$job_id = sanitizeJobId($job_id);
|
|
|
|
$result_file = RESULTS_DIR . '/' . $job_id . '.result.json';
|
|
if (!file_exists($result_file)) {
|
|
error('Results not found');
|
|
}
|
|
|
|
$result = json_decode(file_get_contents($result_file), true);
|
|
|
|
if ($format === 'html') {
|
|
// Generate HTML report via Python
|
|
$venv_python = __DIR__ . '/venv/bin/python3';
|
|
$python_bin = file_exists($venv_python) ? $venv_python : 'python3';
|
|
$report_script = __DIR__ . '/report_generator.py';
|
|
|
|
$html_file = RESULTS_DIR . '/' . $job_id . '.report.html';
|
|
|
|
$cmd = escapeshellcmd($python_bin . ' ' . $report_script) .
|
|
' --input ' . escapeshellarg($result_file) .
|
|
' --output ' . escapeshellarg($html_file);
|
|
|
|
exec($cmd . ' 2>&1', $output, $return_code);
|
|
|
|
if ($return_code !== 0 || !file_exists($html_file)) {
|
|
error('Report generation failed');
|
|
}
|
|
|
|
header('Content-Type: text/html; charset=utf-8');
|
|
header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.html"');
|
|
readfile($html_file);
|
|
exit;
|
|
}
|
|
|
|
// Default: JSON download
|
|
header('Content-Type: application/json');
|
|
header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.json"');
|
|
echo json_encode($result, JSON_PRETTY_PRINT);
|
|
exit;
|
|
}
|
|
|
|
/**
|
|
* Send success response
|
|
*/
|
|
function success($data) {
|
|
echo json_encode([
|
|
'success' => true,
|
|
'data' => $data
|
|
]);
|
|
exit;
|
|
}
|
|
|
|
/**
|
|
* Send error response
|
|
*/
|
|
function error($message) {
|
|
http_response_code(400);
|
|
echo json_encode([
|
|
'success' => false,
|
|
'error' => $message
|
|
]);
|
|
exit;
|
|
}
|