diff --git a/Dockerfile.cloudrun b/Dockerfile.cloudrun deleted file mode 100644 index f4d8e9b..0000000 --- a/Dockerfile.cloudrun +++ /dev/null @@ -1,29 +0,0 @@ -FROM python:3.11-slim - -# Install system dependencies for PDF processing -RUN apt-get update && apt-get install -y --no-install-recommends \ - tesseract-ocr \ - tesseract-ocr-eng \ - poppler-utils \ - ghostscript \ - libgl1 \ - libglib2.0-0 \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /app - -# Install Python dependencies -COPY requirements-cloudrun.txt . -RUN pip install --no-cache-dir -r requirements-cloudrun.txt - -# Copy application code (no worker, redis_queue, or db_manager) -COPY cloudrun_service.py . -COPY enterprise_pdf_checker.py . -COPY pdf_remediation.py . -COPY logger_config.py . -COPY retry_helper.py . - -# Cloud Run sets $PORT; gunicorn binds to it -# --workers 1 --threads 1: Cloud Run concurrency=1, one request at a time -# --timeout 900: allow up to 15 minutes for large PDFs -CMD exec gunicorn --bind :$PORT --workers 1 --threads 1 --timeout 900 cloudrun_service:app diff --git a/Dockerfile.web b/Dockerfile.web deleted file mode 100644 index aaaf196..0000000 --- a/Dockerfile.web +++ /dev/null @@ -1,27 +0,0 @@ -FROM php:8.2-fpm-alpine - -# Install Nginx, Python (for report generation), PostgreSQL libs, and PHP extensions -RUN apk add --no-cache nginx python3 postgresql-dev && \ - docker-php-ext-install pdo pdo_pgsql - -# Copy Nginx config -COPY nginx.conf /etc/nginx/http.d/default.conf - -# Copy application files -WORKDIR /app -COPY api.php auth.php index.html ./ -COPY report_generator.py ./ -COPY css/ css/ -COPY js/ js/ - -# Create directories -RUN mkdir -p /app/uploads /app/results /app/logs && \ - chown -R www-data:www-data /app/uploads /app/results /app/logs - -# Start both Nginx and PHP-FPM -COPY docker-entrypoint-web.sh /docker-entrypoint-web.sh -RUN chmod +x /docker-entrypoint-web.sh - -EXPOSE 80 - -CMD ["/docker-entrypoint-web.sh"] diff --git a/Dockerfile.worker b/Dockerfile.worker deleted file mode 100644 index e91be9c..0000000 --- a/Dockerfile.worker +++ /dev/null @@ -1,31 +0,0 @@ -FROM python:3.11-slim - -# Install system dependencies for PDF processing -RUN apt-get update && apt-get install -y --no-install-recommends \ - tesseract-ocr \ - tesseract-ocr-eng \ - poppler-utils \ - ghostscript \ - libgl1 \ - libglib2.0-0 \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /app - -# Install Python dependencies -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -# Copy application code -COPY enterprise_pdf_checker.py . -COPY pdf_remediation.py . -COPY logger_config.py . -COPY retry_helper.py . -COPY redis_queue.py . -COPY db_manager.py . -COPY worker.py . - -# Create directories -RUN mkdir -p /app/uploads /app/results /app/logs - -CMD ["python", "worker.py"] diff --git a/README's/DAVE_QUICK_SETUP.md b/README's/DAVE_QUICK_SETUP.md deleted file mode 100644 index 7eb716e..0000000 --- a/README's/DAVE_QUICK_SETUP.md +++ /dev/null @@ -1,284 +0,0 @@ -# πŸš€ Quick Setup for Your MAMP Configuration - -## Your Setup -- **MAMP**: Points directly to project folder (no copying needed) -- **venv location**: `/Users/daveporter/Desktop/CODING-2024/PDF-Accessibility-checker/venv` -- **Google API**: Using API key string (not JSON file) -- **Anthropic API**: Using API key string - ---- - -## βœ… What's Already Configured - -The code is now hardcoded with your venv path: -```php -// In api.php - already set to your path -$venv_python = '/Users/daveporter/Desktop/CODING-2024/PDF-Accessibility-checker/venv/bin/python3'; -``` - -**This means:** -- βœ… No need to edit `api.php` -- βœ… No need to configure venv path -- βœ… Just point MAMP to the folder and go! - ---- - -## 🎯 Installation (5 Minutes) - -### Step 1: Create venv -```bash -cd /Users/daveporter/Desktop/CODING-2024/PDF-Accessibility-checker - -# Create virtual environment -python3 -m venv venv - -# Activate it -source venv/bin/activate - -# Install dependencies -pip install -r requirements.txt - -# Deactivate (optional) -deactivate -``` - -### Step 2: Get Your API Keys - -#### Anthropic Claude API Key -1. Go to: https://console.anthropic.com/ -2. Create an API key -3. Copy it (looks like: `sk-ant-api03-...`) - -#### Google Cloud API Key -1. Go to: https://console.cloud.google.com/ -2. Enable "Cloud Vision API" -3. Go to "Credentials" -4. Click "Create Credentials" β†’ "API Key" -5. Copy it (looks like: `AIzaSy...`) - -### Step 3: Point MAMP to Your Folder -1. Open MAMP -2. Preferences β†’ Web Server -3. Set Document Root to: - ``` - /Users/daveporter/Desktop/CODING-2024/PDF-Accessibility-checker - ``` -4. Click OK -5. Start Servers - -### Step 4: Access the App -``` -http://localhost:8888/ -``` - ---- - -## 🎨 Using the App - -### Option 1: Web Interface (Easiest) -1. Open: `http://localhost:8888/` -2. Drag and drop a PDF -3. Enter your API keys in the form: - - Anthropic API Key: `sk-ant-api03-...` - - Google API Key: `AIzaSy...` -4. Wait for results (2-5 minutes) -5. Review accessibility report - -**Note:** You can also set API keys as environment variables (see below) and leave the form fields empty. - -### Option 2: Command Line -```bash -# Activate venv -source venv/bin/activate - -# Run checker (replace YOUR-KEY with actual keys) -python enterprise_pdf_checker.py your-file.pdf \ - --anthropic-key "sk-ant-api03-YOUR-KEY" \ - --google-key "AIzaSy-YOUR-KEY" \ - --output report.json - -# Deactivate -deactivate -``` - ---- - -## πŸ” Setting API Keys as Environment Variables (Optional) - -If you don't want to enter keys every time: - -```bash -# Add to ~/.zshrc (or ~/.bashrc if using bash) -echo 'export ANTHROPIC_API_KEY="sk-ant-api03-YOUR-KEY"' >> ~/.zshrc -echo 'export GOOGLE_API_KEY="AIzaSy-YOUR-KEY"' >> ~/.zshrc - -# Reload -source ~/.zshrc - -# Test -echo $ANTHROPIC_API_KEY -``` - -Then you can leave the form fields empty - it will use the environment variables. - ---- - -## πŸ“ Your File Structure - -``` -/Users/daveporter/Desktop/CODING-2024/PDF-Accessibility-checker/ -β”œβ”€β”€ venv/ ← Python virtual environment -β”‚ └── bin/python3 ← This is what api.php uses -β”œβ”€β”€ uploads/ ← Created automatically -β”œβ”€β”€ results/ ← Created automatically -β”œβ”€β”€ .cache/ ← Created automatically -β”œβ”€β”€ index.html ← Web interface (Oliver branded) -β”œβ”€β”€ api.php ← Backend (hardcoded to your venv) -β”œβ”€β”€ enterprise_pdf_checker.py ← Main checker (Claude 4.5) -β”œβ”€β”€ requirements.txt ← Dependencies -└── [documentation files...] -``` - ---- - -## 🎨 Oliver Branding Confirmed - -βœ… **Colors**: Black (#000000) + Yellow (#FFC407) -βœ… **Font**: Montserrat -βœ… **AI Model**: Claude Sonnet 4.5 -βœ… **Your venv path**: Hardcoded in api.php - ---- - -## πŸ› Troubleshooting - -### "Python script error" or "command not found" - -```bash -# Check venv exists -ls -la /Users/daveporter/Desktop/CODING-2024/PDF-Accessibility-checker/venv/bin/python3 - -# If not, create it -cd /Users/daveporter/Desktop/CODING-2024/PDF-Accessibility-checker -python3 -m venv venv -source venv/bin/activate -pip install -r requirements.txt -``` - -### "Google API error" - -Make sure you've: -1. Enabled Cloud Vision API in Google Cloud Console -2. Created an API key (not service account JSON) -3. The API key has Vision API enabled - -### "Anthropic API error" - -Make sure your API key: -1. Is valid (starts with `sk-ant-api03-`) -2. Has credits/billing enabled -3. Is typed correctly (no spaces) - -### "Upload failed" - -Check MAMP is running: -1. Open MAMP -2. Make sure Apache is green -3. Make sure port is 8888 (or adjust URL) - -### Permissions errors - -```bash -cd /Users/daveporter/Desktop/CODING-2024/PDF-Accessibility-checker -mkdir -p uploads results .cache -chmod 755 uploads results .cache -``` - ---- - -## πŸ’‘ Daily Workflow - -### Starting Work -1. Open MAMP β†’ Start Servers -2. Open browser β†’ `http://localhost:8888/` -3. Upload PDFs and check! - -### For Python Development -```bash -cd /Users/daveporter/Desktop/CODING-2024/PDF-Accessibility-checker -source venv/bin/activate -# ... do your work ... -deactivate -``` - -### Ending Work -1. MAMP β†’ Stop Servers -2. Done! - ---- - -## 🎯 Test It Now - -1. **Open MAMP** β†’ Start Servers -2. **Visit**: `http://localhost:8888/` -3. **Upload** a test PDF (use sample_good.pdf if needed) -4. **Enter API keys** in the form -5. **Click upload** and wait -6. **Review results** - -Should take 2-5 minutes for first check (with caching, repeat checks are faster). - ---- - -## πŸ“Š What Gets Checked - -- βœ… Document structure & tagging -- βœ… Text extractability -- βœ… Image alt text (with AI) -- βœ… Color contrast -- βœ… Readability scores -- βœ… Form field labels -- βœ… Link quality -- βœ… Heading structure -- βœ… OCR quality (if scanned) -- βœ… 30+ other checks - -**Coverage: 95% of WCAG 2.1 Level A & AA** - ---- - -## πŸ’° Cost Per Check - -Average 10-page PDF with 5 images: -- **Anthropic Claude**: $0.075 (5 images Γ— $0.015) -- **Google Vision**: $0.008 (5 images Γ— $0.0016) -- **Total**: ~$0.08-0.10 per document - -First 1,000 images/month on Google are free! - ---- - -## πŸŽ‰ You're Ready! - -Everything is configured specifically for your setup: -- βœ… venv path hardcoded -- βœ… MAMP-compatible (no ini changes needed) -- βœ… Google API key support (not JSON) -- βœ… Oliver branding applied -- βœ… Claude Sonnet 4.5 enabled - -**Just point MAMP to your folder and start checking PDFs!** πŸš€ - ---- - -## πŸ“ž Quick Reference - -**MAMP URL**: `http://localhost:8888/` -**venv Path**: `/Users/daveporter/Desktop/CODING-2024/PDF-Accessibility-checker/venv` -**Activate venv**: `source venv/bin/activate` -**Deactivate venv**: `deactivate` - -**Get Anthropic Key**: https://console.anthropic.com/ -**Get Google Key**: https://console.cloud.google.com/ β†’ Credentials - -**Need help?** Check the other docs or the troubleshooting section above. diff --git a/README's/MAMP_SETUP.md b/README's/MAMP_SETUP.md deleted file mode 100644 index 5be0253..0000000 --- a/README's/MAMP_SETUP.md +++ /dev/null @@ -1,502 +0,0 @@ -# πŸš€ MAMP Setup Guide - Local Development with venv - -## Overview - -This guide is for running the Enterprise PDF Accessibility Checker locally with: -- βœ… **MAMP** - Apache/PHP stack -- βœ… **Python venv** - Isolated Python environment -- βœ… **Oliver Branding** - Black (#000000) and Yellow (#FFC407) -- βœ… **Claude Sonnet 4.5** - Latest model - ---- - -## πŸ”§ Quick Setup (10 Minutes) - -### Step 1: Install System Dependencies - -```bash -# macOS -brew install python3 tesseract poppler - -# Ubuntu/Linux -sudo apt-get update -sudo apt-get install -y python3 python3-pip python3-venv tesseract-ocr poppler-utils -``` - -### Step 2: Create Python Virtual Environment - -```bash -# Navigate to your project directory -cd /path/to/enterprise-pdf-checker - -# Create virtual environment -python3 -m venv venv - -# Activate it -source venv/bin/activate - -# Your prompt should now show (venv) -``` - -### Step 3: Install Python Dependencies in venv - -```bash -# Make sure venv is activated (you should see (venv) in your prompt) -pip install --upgrade pip - -# Install all dependencies -pip install -r requirements.txt - -# Verify installation -python enterprise_pdf_checker.py --help -``` - -### Step 4: Configure API Keys - -```bash -# Set API keys in your current session -export ANTHROPIC_API_KEY="sk-ant-api03-YOUR-KEY-HERE" -export GOOGLE_APPLICATION_CREDENTIALS="/absolute/path/to/google-credentials.json" - -# To make permanent, add to your shell profile: -echo 'export ANTHROPIC_API_KEY="sk-ant-api03-YOUR-KEY-HERE"' >> ~/.zshrc -echo 'export GOOGLE_APPLICATION_CREDENTIALS="/absolute/path/to/credentials.json"' >> ~/.zshrc - -# Reload your shell -source ~/.zshrc -``` - -### Step 5: Set Up in MAMP - -```bash -# Option 1: Copy to MAMP htdocs -cp -r /path/to/enterprise-pdf-checker /Applications/MAMP/htdocs/pdf-checker - -# Option 2: Create symlink -ln -s /path/to/enterprise-pdf-checker /Applications/MAMP/htdocs/pdf-checker - -# Create required directories -cd /Applications/MAMP/htdocs/pdf-checker -mkdir -p uploads results .cache -chmod 755 uploads results .cache -``` - -### Step 6: Configure MAMP - -1. **Open MAMP** -2. **Preferences β†’ Ports** - - Apache: 8888 (or your preferred port) - - PHP: Default -3. **Preferences β†’ PHP** - - Version: 7.4 or higher -4. **Start Servers** - -### Step 7: Update api.php for venv - -The PHP script needs to know about your venv. Update the Python command: - -```php -// In api.php, find the command building section and update: - -// Path to your venv Python -define('PYTHON_BIN', '/absolute/path/to/enterprise-pdf-checker/venv/bin/python3'); - -// Build command using venv Python -$cmd = escapeshellcmd(PYTHON_BIN . ' ' . PYTHON_SCRIPT) . ' ' . - escapeshellarg($pdf_path) . ' ' . - '--output ' . escapeshellarg($output_path); -``` - -Or use this complete replacement for the check command section in api.php: - -```php -// Build command - use venv if available -$venv_python = __DIR__ . '/venv/bin/python3'; -$python_bin = file_exists($venv_python) ? $venv_python : 'python3'; - -$cmd = escapeshellcmd($python_bin . ' ' . PYTHON_SCRIPT) . ' ' . - escapeshellarg($pdf_path) . ' ' . - '--output ' . escapeshellarg($output_path); -``` - -### Step 8: Test Installation - -```bash -# Activate venv (if not already active) -source venv/bin/activate - -# Test Python script directly -python enterprise_pdf_checker.py --help - -# Test with a sample PDF -python enterprise_pdf_checker.py sample.pdf --output test-result.json - -# Deactivate venv when done -deactivate -``` - -### Step 9: Access Web Interface - -``` -http://localhost:8888/pdf-checker/ -``` - ---- - -## 🎨 Oliver Branding Applied - -The interface now uses your brand colors: - -- **Primary Color**: Yellow (#FFC407) -- **Secondary Color**: Black (#000000) -- **Font**: Montserrat (all weights) - -### Design Elements: -- βœ… Black header with yellow accent -- βœ… Yellow primary buttons with black text -- βœ… Black/yellow score display -- βœ… Montserrat font throughout -- βœ… Professional, clean aesthetic - ---- - -## πŸ€– Claude Sonnet 4.5 - -The system now uses **Claude Sonnet 4.5** (`claude-sonnet-4-5-20250929`) - the latest and most capable model: - -**Benefits:** -- Higher accuracy for image analysis -- Better alt text suggestions -- Improved context understanding -- More nuanced accessibility recommendations - -**Cost:** Same as 3.5 Sonnet (~$0.015 per image) - ---- - -## πŸ”„ Daily Workflow - -### Starting Work - -```bash -# 1. Navigate to project -cd /Applications/MAMP/htdocs/pdf-checker - -# 2. Activate venv -source venv/bin/activate - -# 3. Start MAMP -# (Use MAMP application) - -# 4. Open browser -open http://localhost:8888/pdf-checker/ -``` - -### During Work - -```bash -# Python changes require venv to be active -source venv/bin/activate - -# Test Python script -python enterprise_pdf_checker.py test.pdf - -# PHP/HTML changes work immediately (just refresh browser) -``` - -### Ending Work - -```bash -# Deactivate venv -deactivate - -# Stop MAMP -# (Use MAMP application) -``` - ---- - -## πŸ› Troubleshooting - -### "command not found: python" - -```bash -# Make sure venv is activated -source venv/bin/activate - -# Check Python path -which python -# Should show: /path/to/enterprise-pdf-checker/venv/bin/python -``` - -### "Module not found" errors - -```bash -# Activate venv first -source venv/bin/activate - -# Reinstall dependencies -pip install -r requirements.txt -``` - -### PHP can't find Python script - -Check in `api.php`: - -```php -// Make sure paths are absolute -define('PYTHON_SCRIPT', __DIR__ . '/enterprise_pdf_checker.py'); - -// Use venv Python -$venv_python = __DIR__ . '/venv/bin/python3'; -$python_bin = file_exists($venv_python) ? $venv_python : 'python3'; -``` - -### API keys not working - -```bash -# In the web interface, you can enter keys directly -# Or set them for the PHP environment: - -# Add to .htaccess (in project root): -SetEnv ANTHROPIC_API_KEY "sk-ant-..." -SetEnv GOOGLE_APPLICATION_CREDENTIALS "/absolute/path/to/creds.json" -``` - -### Permission errors - -```bash -# Fix directory permissions -cd /Applications/MAMP/htdocs/pdf-checker -chmod 755 uploads results .cache - -# If using Apache: -sudo chown -R _www:_www uploads results .cache -``` - -### Font not loading - -The font is loaded from Google Fonts CDN. If you need offline: - -```html - - -``` - ---- - -## πŸ“ api.php Configuration for venv - -Here's the complete updated section for api.php: - -```php -/** - * Handle PDF accessibility check - */ -function handleCheck() { - $job_id = $_POST['job_id'] ?? ''; - - if (empty($job_id)) { - error('Job ID required'); - } - - $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; - - if (!file_exists($meta_file)) { - error('Job not found'); - } - - $job_data = json_decode(file_get_contents($meta_file), true); - - // Get API keys from request or environment - $google_creds = $_POST['google_credentials'] ?? getenv('GOOGLE_APPLICATION_CREDENTIALS'); - $anthropic_key = $_POST['anthropic_key'] ?? getenv('ANTHROPIC_API_KEY'); - - // Build command - use venv Python if available - $pdf_path = $job_data['filepath']; - $output_path = RESULTS_DIR . '/' . $job_id . '.result.json'; - - // Check for venv Python - $venv_python = __DIR__ . '/venv/bin/python3'; - $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; - - $cmd = escapeshellcmd($python_bin . ' ' . PYTHON_SCRIPT) . ' ' . - escapeshellarg($pdf_path) . ' ' . - '--output ' . escapeshellarg($output_path); - - if ($anthropic_key) { - $cmd .= ' --anthropic-key ' . escapeshellarg($anthropic_key); - } - - if ($google_creds) { - $cmd .= ' --google-credentials ' . escapeshellarg($google_creds); - } - - // Update status - $job_data['status'] = 'processing'; - $job_data['started_at'] = date('Y-m-d H:i:s'); - file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); - - // Run check in background - $cmd .= ' > /dev/null 2>&1 &'; - exec($cmd); - - success([ - 'job_id' => $job_id, - 'status' => 'processing', - 'message' => 'Check started' - ]); -} -``` - ---- - -## πŸ” Environment Variables in MAMP - -### Option 1: .htaccess (Recommended) - -Create `.htaccess` in project root: - -```apache -# API Keys (don't commit this file!) -SetEnv ANTHROPIC_API_KEY "sk-ant-api03-YOUR-KEY" -SetEnv GOOGLE_APPLICATION_CREDENTIALS "/absolute/path/to/creds.json" - -# Security - - Require all denied - - -# PHP Settings -php_value upload_max_filesize 50M -php_value post_max_size 50M -php_value max_execution_time 300 -``` - -### Option 2: Enter in Web Interface - -The web interface allows you to enter API keys directly on each upload. - -### Option 3: PHP Config - -Create `config.php`: - -```php - -``` - ---- - -You're all set! The system is now optimized for: -- βœ… MAMP local development -- βœ… Python venv isolation -- βœ… Oliver branding (Black + Yellow #FFC407) -- βœ… Claude Sonnet 4.5 -- βœ… Montserrat font - -**Start with:** `source venv/bin/activate` then open http://localhost:8888/pdf-checker/ πŸš€ diff --git a/README's/OLIVER_CUSTOMIZATION.md b/README's/OLIVER_CUSTOMIZATION.md deleted file mode 100644 index 0837c87..0000000 --- a/README's/OLIVER_CUSTOMIZATION.md +++ /dev/null @@ -1,323 +0,0 @@ -# 🎨 Oliver Customization Summary - -## βœ… All Changes Applied - -### 🎨 **Branding Updates** - -#### Colors -- **Primary**: #FFC407 (Oliver Yellow) βœ… -- **Secondary**: #000000 (Black) βœ… -- **Previous**: Blue (#2563eb) β†’ Replaced with Yellow/Black - -#### Typography -- **Font**: Montserrat (all weights: 400, 600, 700) βœ… -- **Loaded from**: Google Fonts CDN -- **Applied to**: Entire application - -#### Design Elements -βœ… Black header with yellow accent border -βœ… Yellow primary buttons with black text -βœ… Black/yellow gradient score display -βœ… Montserrat font across all text -βœ… Yellow hover states -βœ… Professional, high-contrast design - ---- - -### πŸ€– **AI Model Update** - -**Claude Sonnet 4.5** βœ… -- Model: `claude-sonnet-4-5-20250929` -- Previous: `claude-3-5-sonnet-20241022` -- **Benefits**: Higher accuracy, better recommendations, improved image analysis -- **Cost**: Same as 3.5 (~$0.015 per image) - ---- - -### 🐍 **Python venv Support** - -#### api.php Updates βœ… -```php -// Automatically detects and uses venv Python -$venv_python = __DIR__ . '/venv/bin/python3'; -$python_bin = file_exists($venv_python) ? $venv_python : 'python3'; -``` - -**What this means:** -- βœ… Works with or without venv -- βœ… No manual configuration needed -- βœ… Falls back to system Python if venv not present -- βœ… MAMP-friendly - ---- - -### πŸ“¦ **New Files Added** - -1. **MAMP_SETUP.md** (12KB) - - Complete MAMP setup guide - - venv instructions - - Troubleshooting - - Daily workflow - - API key configuration - -2. **install_venv.sh** (5.7KB) - - Automated venv setup - - Installs dependencies in venv - - Creates directories - - Tests installation - - Interactive prompts - ---- - -### πŸ—‚οΈ **File Changes** - -#### index.html (25KB) βœ… -```html - - - - -:root { - --primary: #FFC407; /* Oliver Yellow */ - --black: #000000; /* Oliver Black */ - --primary-dark: #e6b006; /* Darker yellow */ -} - - -
-``` - -#### api.php (7.3KB) βœ… -```php -// Auto-detect venv Python -$venv_python = __DIR__ . '/venv/bin/python3'; -$python_bin = file_exists($venv_python) ? $venv_python : 'python3'; -``` - -#### enterprise_pdf_checker.py (44KB) βœ… -```python -# Updated model -model="claude-sonnet-4-5-20250929" -``` - ---- - -## πŸš€ **Quick Start for MAMP** - -### Installation - -```bash -# 1. Run venv installer -chmod +x install_venv.sh -./install_venv.sh - -# 2. Copy to MAMP (choose one) -# Option A: Copy -cp -r . /Applications/MAMP/htdocs/pdf-checker - -# Option B: Symlink -ln -s $(pwd) /Applications/MAMP/htdocs/pdf-checker - -# 3. Set API keys -export ANTHROPIC_API_KEY="sk-ant-api03-YOUR-KEY" -export GOOGLE_APPLICATION_CREDENTIALS="/path/to/creds.json" - -# 4. Start MAMP and visit -open http://localhost:8888/pdf-checker/ -``` - -### Daily Usage - -```bash -# Activate venv (for Python development) -source venv/bin/activate - -# Run checks -python enterprise_pdf_checker.py test.pdf - -# Deactivate when done -deactivate -``` - -**For web interface:** Just use MAMP - api.php handles venv automatically! πŸŽ‰ - ---- - -## 🎯 **What You Get** - -### βœ… Oliver Branding -- Black and yellow color scheme -- Montserrat font throughout -- Professional, high-contrast design -- Maintains accessibility while being on-brand - -### βœ… Claude Sonnet 4.5 -- Latest and most capable model -- Better accuracy for accessibility checks -- Improved recommendations -- Same cost structure - -### βœ… venv Support -- Isolated Python environment -- MAMP-compatible -- Automatic detection in api.php -- No manual configuration needed - -### βœ… Complete Documentation -- MAMP_SETUP.md - Detailed setup guide -- install_venv.sh - Automated installation -- All original docs still included -- Troubleshooting section - ---- - -## πŸ“Š **Before vs After** - -| Feature | Before | After | -|---------|--------|-------| -| **Primary Color** | Blue (#2563eb) | Yellow (#FFC407) βœ… | -| **Secondary Color** | Light Blue | Black (#000000) βœ… | -| **Font** | System default | Montserrat βœ… | -| **AI Model** | Claude 3.5 Sonnet | Claude 4.5 Sonnet βœ… | -| **Python** | System Python | venv support βœ… | -| **MAMP Guide** | Generic setup | Specific MAMP guide βœ… | - ---- - -## πŸ” **Visual Changes** - -### Header -``` -Before: White background, blue text -After: Black background, yellow text, yellow border -``` - -### Buttons -``` -Before: Blue background, white text -After: Black background, yellow text, yellow border - Hover: Yellow background, black text -``` - -### Score Display -``` -Before: Purple gradient -After: Black gradient with yellow accents -``` - -### Typography -``` -Before: System fonts (-apple-system, etc.) -After: Montserrat for all text -``` - ---- - -## 🎨 **Color Palette** - -```css -/* Oliver Brand Colors */ ---primary: #FFC407; /* Yellow - main brand color */ ---primary-dark: #e6b006; /* Darker yellow for hover */ ---primary-darker: #cc9d05; /* Even darker for active states */ ---black: #000000; /* Black - secondary brand color */ - -/* Status Colors (unchanged for accessibility) */ ---success: #10b981; /* Green */ ---warning: #f59e0b; /* Orange */ ---error: #ef4444; /* Red */ ---critical: #dc2626; /* Dark red */ ---info: #3b82f6; /* Blue */ -``` - ---- - -## πŸ› οΈ **Technical Details** - -### Font Loading -```html - - - -``` - -### venv Detection -```php -// In api.php -$venv_python = __DIR__ . '/venv/bin/python3'; -$python_bin = file_exists($venv_python) ? $venv_python : 'python3'; -``` - -### Model Configuration -```python -# In enterprise_pdf_checker.py -self.anthropic_client.messages.create( - model="claude-sonnet-4-5-20250929", - max_tokens=1024, - messages=[...] -) -``` - ---- - -## βœ… **Testing Checklist** - -Before deploying, verify: - -- [ ] Header is black with yellow accent -- [ ] All text uses Montserrat font -- [ ] Primary buttons are black with yellow text -- [ ] Hover states show yellow background -- [ ] Score display has black/yellow gradient -- [ ] Upload area uses appropriate colors -- [ ] API returns Claude Sonnet 4.5 responses -- [ ] venv Python is used when available -- [ ] System Python works as fallback -- [ ] All functionality works in MAMP - ---- - -## πŸ“ž **Need to Customize More?** - -### Change Colors -Edit `index.html`, find: -```css -:root { - --primary: #FFC407; /* Change this */ - --black: #000000; /* Or this */ -} -``` - -### Change Font -Edit `index.html`, find: -```html - -``` -Replace `Montserrat` with your font, then update: -```css -body { - font-family: 'YourFont', sans-serif; -} -``` - -### Change Model -Edit `enterprise_pdf_checker.py`, find: -```python -model="claude-sonnet-4-5-20250929" -``` - ---- - -## πŸŽ‰ **Summary** - -You now have: -βœ… **Oliver-branded** web interface (Black + Yellow #FFC407) -βœ… **Montserrat font** throughout -βœ… **Claude Sonnet 4.5** integration -βœ… **venv support** with automatic detection -βœ… **MAMP-optimized** setup -βœ… **Complete documentation** - -**Everything is ready for MAMP local development!** πŸš€ - -Start with: `./install_venv.sh` then check out **MAMP_SETUP.md** diff --git a/README's/README_FIRST.txt b/README's/README_FIRST.txt deleted file mode 100644 index 24b8fa2..0000000 --- a/README's/README_FIRST.txt +++ /dev/null @@ -1,220 +0,0 @@ -╔════════════════════════════════════════════════════════════════════════════╗ -β•‘ β•‘ -β•‘ 🎯 ENTERPRISE PDF ACCESSIBILITY CHECKER - COMPLETE PACKAGE β•‘ -β•‘ β•‘ -β•‘ The most comprehensive PDF accessibility validation system available β•‘ -β•‘ β•‘ -β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β• - -πŸ“¦ WHAT YOU HAVE -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -βœ… 95% WCAG 2.1 Coverage - Industry-leading automated validation -βœ… AI-Powered Analysis - Anthropic Claude 3.5 + Google Cloud Vision -βœ… Professional Web Interface - Modern drag-and-drop UI -βœ… REST API - Easy integration -βœ… Command Line Interface - Automation ready -βœ… Complete Documentation - 140KB+ of guides - -Total Value: $50,000+ enterprise solution provided complete - - -πŸš€ QUICK START (5 MINUTES) -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -1. Install everything: - $ chmod +x install.sh && ./install.sh - -2. Set up API keys (NEW: .env file support!): - $ cp .env.example .env - $ nano .env # Add your API keys here - - Or use environment variables: - $ export ANTHROPIC_API_KEY="sk-ant-YOUR-KEY-HERE" - $ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/credentials.json" - -3. Quick test (fast mode): - $ python3 enterprise_pdf_checker.py sample_good.pdf --quick - -4. Start the server: - $ php -S localhost:8000 - -5. Open browser: - $ open http://localhost:8000 - -6. Upload a PDF and get comprehensive accessibility report! - - -πŸ“š READ THE DOCUMENTATION IN THIS ORDER -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -🟒 START HERE (Required - 20 minutes) - β”œβ”€ START_HERE.md .................. Package overview & guide - └─ QUICKSTART.md .................. 5-minute setup instructions - -πŸ”΅ CORE DOCUMENTATION (Read these next - 1 hour) - β”œβ”€ ENTERPRISE_README.md ........... Complete installation & usage guide - └─ ARCHITECTURE.md ................ System design & technical details - -🟑 BACKGROUND & CONTEXT (Optional - 2 hours) - β”œβ”€ WCAG_LIMITATIONS.md ............ What can't be automated (5%) - β”œβ”€ INTEGRATION_GUIDE.md ........... API integration strategies - β”œβ”€ IMPLEMENTATION_ROADMAP.md ...... Step-by-step coding guide - β”œβ”€ API_QUICK_REFERENCE.md ......... One-page cheat sheet - └─ MASTER_GUIDE.md ................ Evolution & best practices - - -πŸ“ FILE STRUCTURE -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -CORE APPLICATION (Use these): -β”œβ”€β”€ enterprise_pdf_checker.py (44KB) ... Main Python checker with AI -β”œβ”€β”€ api.php (7.1KB) .................... REST API backend -β”œβ”€β”€ index.html (24KB) .................. Modern web interface -β”œβ”€β”€ requirements.txt (480B) ............ Python dependencies -└── install.sh (3.1KB) ................. Automated setup script - -DOCUMENTATION (Read these): -β”œβ”€β”€ START_HERE.md (14KB) ............... πŸ‘ˆ Read this first! -β”œβ”€β”€ QUICKSTART.md (9.1KB) .............. Quick setup guide -β”œβ”€β”€ ENTERPRISE_README.md (18KB) ........ Complete documentation -β”œβ”€β”€ ARCHITECTURE.md (17KB) ............. System design -β”œβ”€β”€ WCAG_LIMITATIONS.md (14KB) ......... What can't be automated -β”œβ”€β”€ INTEGRATION_GUIDE.md (25KB) ........ API integration -β”œβ”€β”€ IMPLEMENTATION_ROADMAP.md (25KB) ... Coding guide -β”œβ”€β”€ API_QUICK_REFERENCE.md (11KB) ...... Cheat sheet -└── MASTER_GUIDE.md (12KB) ............. Overview & best practices - -TESTING & EXAMPLES: -β”œβ”€β”€ sample_good.pdf (1.4KB) ............ Test PDF with metadata -β”œβ”€β”€ sample_poor.pdf (2.1KB) ............ Test PDF with issues -β”œβ”€β”€ create_sample_pdfs.py (2.7KB) ...... Generate test files -└── accessibility_report.html (6.5KB) .. Example HTML report - -LEGACY/ALTERNATIVES (Reference only): -β”œβ”€β”€ pdf_accessibility_checker.py (22KB) .... Basic version (no AI) -β”œβ”€β”€ enhanced_pdf_checker.py (29KB) ......... Intermediate version -└── README.md (9.5KB) ...................... Basic tool docs - - -πŸ’Ž KEY FEATURES -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -⚑ Performance & Usability (NEW!) - β€’ Quick mode (--quick) for fast initial checks - β€’ Parallel image processing (3x faster) - β€’ Smart API timeouts (no more hangs!) - β€’ .env file support for secure API keys - β€’ Real-time progress updates - -πŸ€– AI-Powered Analysis - β€’ Claude 3.5 Sonnet for image analysis (95% accuracy) - β€’ Google Cloud Vision for OCR (98% accuracy) - β€’ Alt text quality validation - β€’ Text-in-images detection - β€’ Content quality analysis - -πŸ” Comprehensive WCAG Checks - β€’ Document structure & tagging (1.3.1, 4.1.2) - β€’ Color contrast analysis (1.4.3) - β€’ Text extractability & readability (3.1.5) - β€’ Form field validation (3.3.2) - β€’ Link quality checking (2.4.4) - β€’ 30+ automated checks total - -🌐 Three Usage Modes - β€’ Web Interface: Drag-and-drop with visual reports - β€’ Command Line: Automation & batch processing - β€’ REST API: System integration - -πŸ’° Cost-Effective - β€’ ~$0.10 per document (10 pages, 5 images) - β€’ Smart caching reduces repeat checks to $0 - β€’ Break-even after 2-3 documents vs manual review - - -πŸ’° COSTS & ROI -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Per Document: ~$0.10 (Anthropic $0.075 + Google $0.008 + OCR $0.015) - -Monthly Costs: - β€’ 100 documents .... $10/month - β€’ 500 documents .... $50/month - β€’ 1,000 documents .. $100/month - β€’ 5,000 documents .. $500/month - -ROI: - β€’ Manual review: $100/document (2 hours @ $50/hr) - β€’ This tool: $0.10/document (2 minutes) - β€’ Savings: $99.90 per document - β€’ Break-even: After 2-3 documents - β€’ Time savings: 96% reduction - - -🎯 COMPARISON WITH ALTERNATIVES -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - - This Tool Adobe Acrobat PAC (Free) Manual Review -Coverage 95% 90% 75% 100% -Speed 2-5 min 5-10 min 3-5 min 1-2 hours -AI Analysis Yes No No Yes -Automation Full Limited Limited No -API Access Yes No No No -Cost/Document $0.10 $20+ $0 $100 -Quality Rating ⭐⭐⭐⭐⭐ ⭐⭐⭐⭐ ⭐⭐⭐ ⭐⭐⭐⭐⭐ - - -πŸ”’ SECURITY & COMPLIANCE -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -βœ… WCAG 2.1 Level A & AA compliant -βœ… PDF/UA standards aligned -βœ… Section 508 compatible -βœ… EN 301 549 aligned -βœ… HTTPS required for production -βœ… API keys in environment variables -βœ… No data retention policies configurable -βœ… File upload validation & size limits - - -πŸ“ž GETTING HELP -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -1. Check START_HERE.md for overview -2. Read QUICKSTART.md for setup -3. See ENTERPRISE_README.md for troubleshooting -4. Review ARCHITECTURE.md for technical details -5. All API documentation included - - -✨ WHAT MAKES THIS SPECIAL -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -βœ“ Quality-First Design - Uses best AI models (Claude, Google) -βœ“ Production-Ready - Enterprise-grade code & architecture -βœ“ Complete Package - Nothing else to buy or build -βœ“ Well-Documented - 140KB+ of guides & examples -βœ“ Cost-Optimized - Smart caching & efficient processing -βœ“ Three Interfaces - Web, CLI, and API -βœ“ Easy Integration - REST API for existing systems -βœ“ Proven Technology - Built on industry-standard libraries - - -🎯 NEXT STEPS -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -1. NOW: Read START_HERE.md (5 minutes) -2. TODAY: Run ./install.sh and configure API keys -3. THIS WEEK: Test with 10-20 documents -4. THIS MONTH: Deploy to production -5. THIS QUARTER: Achieve 95% WCAG coverage goal - - -═══════════════════════════════════════════════════════════════════════════════ - - 🌟 Make the web accessible for everyone 🌟 - - Start with START_HERE.md β†’ - -═══════════════════════════════════════════════════════════════════════════════ diff --git a/README's/SETUP_ORDER.txt b/README's/SETUP_ORDER.txt deleted file mode 100644 index 1fd7a17..0000000 --- a/README's/SETUP_ORDER.txt +++ /dev/null @@ -1,143 +0,0 @@ -╔════════════════════════════════════════════════════════════════════╗ -β•‘ β•‘ -β•‘ 🎨 OLIVER ENTERPRISE PDF ACCESSIBILITY CHECKER β•‘ -β•‘ β•‘ -β•‘ Customized with Oliver branding + MAMP + venv support β•‘ -β•‘ β•‘ -β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β• - -πŸ“š READ IN THIS ORDER FOR MAMP SETUP: -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -1️⃣ OLIVER_CUSTOMIZATION.md ............... What changed (5 min) - ↓ Summary of all Oliver-specific updates - -2️⃣ MAMP_SETUP.md .......................... MAMP setup guide (15 min) - ↓ Step-by-step MAMP configuration - -3️⃣ Run: ./install_venv.sh ................ Auto-install (5 min) - ↓ Creates venv and installs everything - -4️⃣ START_HERE.md .......................... Full package overview - ↓ Complete system documentation - - -πŸš€ SUPER QUICK START (10 MINUTES): -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -$ ./install_venv.sh -$ export ANTHROPIC_API_KEY="sk-ant-YOUR-KEY" -$ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/creds.json" - -Then copy to MAMP: -$ cp -r . /Applications/MAMP/htdocs/pdf-checker - -Open: http://localhost:8888/pdf-checker/ - -Done! πŸŽ‰ - - -✨ WHAT'S CUSTOMIZED: -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -βœ… Oliver Colors: Black (#000000) + Yellow (#FFC407) -βœ… Oliver Font: Montserrat (all weights) -βœ… Latest AI: Claude Sonnet 4.5 -βœ… venv Support: Automatic detection in api.php -βœ… MAMP Ready: No port conflicts, works out of the box - - -πŸ“ KEY FILES: -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -SETUP & DOCUMENTATION: -β”œβ”€β”€ OLIVER_CUSTOMIZATION.md ......... What changed for Oliver -β”œβ”€β”€ MAMP_SETUP.md ................... Complete MAMP guide -β”œβ”€β”€ install_venv.sh ................. Auto-installer -└── START_HERE.md ................... Full documentation - -APPLICATION (UPDATED): -β”œβ”€β”€ index.html ...................... Oliver branding applied -β”œβ”€β”€ api.php ......................... venv auto-detection -β”œβ”€β”€ enterprise_pdf_checker.py ....... Claude Sonnet 4.5 -└── requirements.txt ................ All dependencies - -REFERENCE: -β”œβ”€β”€ ENTERPRISE_README.md ............ Complete manual -β”œβ”€β”€ ARCHITECTURE.md ................. System design -β”œβ”€β”€ QUICKSTART.md ................... 5-min generic setup -└── [8 more documentation files] - - -🎨 OLIVER BRANDING DETAILS: -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Primary Color: #FFC407 (Yellow) -Secondary Color: #000000 (Black) -Font: Montserrat (400, 600, 700) - -Visual Elements: -β€’ Black header with yellow border -β€’ Yellow primary buttons -β€’ Black/yellow score display -β€’ High-contrast, professional design -β€’ Fully accessible while on-brand - - -πŸ€– AI CONFIGURATION: -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Model: Claude Sonnet 4.5 (claude-sonnet-4-5-20250929) -Why: Latest model, highest accuracy -Cost: ~$0.015 per image (same as 3.5) -Bonus: Also uses Google Cloud Vision for cross-validation - - -🐍 PYTHON VENV: -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -βœ… Isolated environment (no conflicts) -βœ… Auto-detected by api.php -βœ… Falls back to system Python if needed -βœ… Easy to manage - -Activate: source venv/bin/activate -Deactivate: deactivate -Run: python enterprise_pdf_checker.py file.pdf - - -πŸ’‘ COMMON TASKS: -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Test Python script: -$ source venv/bin/activate -$ python enterprise_pdf_checker.py sample.pdf -$ deactivate - -Use web interface: -Just open: http://localhost:8888/pdf-checker/ -(api.php handles venv automatically) - -Add to MAMP: -$ cp -r . /Applications/MAMP/htdocs/pdf-checker -OR -$ ln -s $(pwd) /Applications/MAMP/htdocs/pdf-checker - - -🎯 NEXT STEPS: -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -1. Read OLIVER_CUSTOMIZATION.md to see what changed -2. Read MAMP_SETUP.md for detailed instructions -3. Run ./install_venv.sh to set up venv -4. Set your API keys -5. Add to MAMP htdocs -6. Visit http://localhost:8888/pdf-checker/ -7. Upload a PDF and test! - - -═══════════════════════════════════════════════════════════════════════ - - 🎨 Oliver-branded, Claude 4.5-powered, venv-ready! πŸš€ - -═══════════════════════════════════════════════════════════════════════ diff --git a/README's/install.sh b/README's/install.sh deleted file mode 100644 index 17d3234..0000000 --- a/README's/install.sh +++ /dev/null @@ -1,118 +0,0 @@ -#!/bin/bash -# Enterprise PDF Accessibility Checker - Installation Script - -set -e - -echo "==========================================" -echo "Enterprise PDF Accessibility Checker" -echo "Installation Script" -echo "==========================================" -echo "" - -# Check if running as root -if [ "$EUID" -eq 0 ]; then - echo "Please do not run as root/sudo" - exit 1 -fi - -# Detect OS -if [[ "$OSTYPE" == "linux-gnu"* ]]; then - OS="linux" - PKG_MGR="apt-get" -elif [[ "$OSTYPE" == "darwin"* ]]; then - OS="mac" - PKG_MGR="brew" -else - echo "Unsupported OS: $OSTYPE" - exit 1 -fi - -echo "Detected OS: $OS" -echo "" - -# Step 1: Install system dependencies -echo "Step 1: Installing system dependencies..." -if [ "$OS" == "linux" ]; then - sudo apt-get update - sudo apt-get install -y \ - python3 \ - python3-pip \ - tesseract-ocr \ - poppler-utils \ - php \ - php-cli \ - php-json -elif [ "$OS" == "mac" ]; then - brew install python3 tesseract poppler php -fi -echo "βœ“ System dependencies installed" -echo "" - -# Step 2: Install Python dependencies -echo "Step 2: Installing Python dependencies..." -pip3 install -r requirements.txt --break-system-packages || pip3 install -r requirements.txt -echo "βœ“ Python dependencies installed" -echo "" - -# Step 3: Download TextBlob corpora -echo "Step 3: Downloading TextBlob language data..." -python3 -m textblob.download_corpora lite -echo "βœ“ TextBlob corpora downloaded" -echo "" - -# Step 4: Create required directories -echo "Step 4: Creating directories..." -mkdir -p uploads results .cache -chmod 755 uploads results .cache -echo "βœ“ Directories created" -echo "" - -# Step 5: Test installation -echo "Step 5: Testing installation..." -python3 enterprise_pdf_checker.py --help > /dev/null 2>&1 -if [ $? -eq 0 ]; then - echo "βœ“ Installation successful!" -else - echo "⚠ Warning: Python script test failed" -fi -echo "" - -# Step 6: Check for API keys -echo "Step 6: Checking API configuration..." -if [ -z "$ANTHROPIC_API_KEY" ]; then - echo "⚠ ANTHROPIC_API_KEY not set" - echo " Export it with: export ANTHROPIC_API_KEY='sk-ant-...'" -else - echo "βœ“ Anthropic API key found" -fi - -if [ -z "$GOOGLE_APPLICATION_CREDENTIALS" ]; then - echo "⚠ GOOGLE_APPLICATION_CREDENTIALS not set" - echo " Export it with: export GOOGLE_APPLICATION_CREDENTIALS='/path/to/creds.json'" -else - echo "βœ“ Google credentials found" -fi -echo "" - -# Final instructions -echo "==========================================" -echo "Installation Complete!" -echo "==========================================" -echo "" -echo "Next steps:" -echo "" -echo "1. Configure API keys (if not already done):" -echo " export ANTHROPIC_API_KEY='sk-ant-...'" -echo " export GOOGLE_APPLICATION_CREDENTIALS='/path/to/creds.json'" -echo "" -echo "2. Start the web server:" -echo " php -S localhost:8000" -echo "" -echo "3. Open in browser:" -echo " http://localhost:8000" -echo "" -echo "Or use the command line:" -echo " python3 enterprise_pdf_checker.py your_document.pdf" -echo "" -echo "See ENTERPRISE_README.md for detailed documentation." -echo "" diff --git a/README's/install_venv.sh b/README's/install_venv.sh deleted file mode 100644 index 8ce95f5..0000000 --- a/README's/install_venv.sh +++ /dev/null @@ -1,186 +0,0 @@ -#!/bin/bash -# Enterprise PDF Accessibility Checker - venv Installation Script -# For use with MAMP or local development - -set -e - -echo "==========================================" -echo "Enterprise PDF Accessibility Checker" -echo "MAMP + venv Installation" -echo "==========================================" -echo "" - -# Detect OS -if [[ "$OSTYPE" == "linux-gnu"* ]]; then - OS="linux" -elif [[ "$OSTYPE" == "darwin"* ]]; then - OS="mac" -else - echo "Unsupported OS: $OSTYPE" - exit 1 -fi - -echo "Detected OS: $OS" -echo "" - -# Step 1: Check for Python 3 -echo "Step 1: Checking Python installation..." -if command -v python3 &> /dev/null; then - PYTHON_VERSION=$(python3 --version) - echo "βœ“ $PYTHON_VERSION found" -else - echo "βœ— Python 3 not found" - echo "Please install Python 3.8 or higher first:" - if [ "$OS" == "mac" ]; then - echo " brew install python3" - else - echo " sudo apt-get install python3 python3-pip python3-venv" - fi - exit 1 -fi -echo "" - -# Step 2: Install system dependencies (optional, with user confirmation) -echo "Step 2: System dependencies (Tesseract, Poppler)..." -echo "These are required for OCR and PDF rendering." -read -p "Install system dependencies? (y/n) " -n 1 -r -echo "" -if [[ $REPLY =~ ^[Yy]$ ]]; then - if [ "$OS" == "linux" ]; then - sudo apt-get update - sudo apt-get install -y tesseract-ocr poppler-utils - elif [ "$OS" == "mac" ]; then - brew install tesseract poppler - fi - echo "βœ“ System dependencies installed" -else - echo "⚠ Skipped system dependencies. Install manually if needed." -fi -echo "" - -# Step 3: Create virtual environment -echo "Step 3: Creating Python virtual environment..." -if [ -d "venv" ]; then - echo "⚠ venv directory already exists" - read -p "Delete and recreate? (y/n) " -n 1 -r - echo "" - if [[ $REPLY =~ ^[Yy]$ ]]; then - rm -rf venv - else - echo "Keeping existing venv" - fi -fi - -if [ ! -d "venv" ]; then - python3 -m venv venv - echo "βœ“ Virtual environment created" -else - echo "βœ“ Using existing virtual environment" -fi -echo "" - -# Step 4: Activate venv and install dependencies -echo "Step 4: Installing Python dependencies in venv..." -source venv/bin/activate - -# Upgrade pip -pip install --upgrade pip --quiet - -# Install dependencies -pip install -r requirements.txt --quiet - -echo "βœ“ Python dependencies installed in venv" -echo "" - -# Step 5: Download TextBlob corpora -echo "Step 5: Downloading TextBlob language data..." -python -m textblob.download_corpora lite 2>/dev/null || echo "⚠ TextBlob corpora download skipped" -echo "" - -# Step 6: Create required directories -echo "Step 6: Creating directories..." -mkdir -p uploads results .cache -chmod 755 uploads results .cache -echo "βœ“ Directories created" -echo "" - -# Step 7: Test installation -echo "Step 7: Testing installation..." -python enterprise_pdf_checker.py --help > /dev/null 2>&1 -if [ $? -eq 0 ]; then - echo "βœ“ Python script test passed" -else - echo "⚠ Warning: Python script test failed" -fi -echo "" - -# Step 8: Check for API keys -echo "Step 8: Checking API configuration..." -if [ -z "$ANTHROPIC_API_KEY" ]; then - echo "⚠ ANTHROPIC_API_KEY not set" - echo "" - echo "Set it now:" - echo " export ANTHROPIC_API_KEY='sk-ant-api03-...'" - echo "" - echo "Or add to shell profile (~/.zshrc or ~/.bashrc):" - echo " echo 'export ANTHROPIC_API_KEY=\"sk-ant-api03-...\"' >> ~/.zshrc" -else - echo "βœ“ Anthropic API key found" -fi - -if [ -z "$GOOGLE_APPLICATION_CREDENTIALS" ]; then - echo "⚠ GOOGLE_APPLICATION_CREDENTIALS not set" - echo "" - echo "Set it now:" - echo " export GOOGLE_APPLICATION_CREDENTIALS='/absolute/path/to/credentials.json'" - echo "" - echo "Or add to shell profile:" - echo " echo 'export GOOGLE_APPLICATION_CREDENTIALS=\"/path/to/creds.json\"' >> ~/.zshrc" -else - echo "βœ“ Google credentials found" -fi -echo "" - -# Deactivate venv -deactivate - -# Final instructions -echo "==========================================" -echo "Installation Complete!" -echo "==========================================" -echo "" -echo "βœ… Virtual environment created at: ./venv" -echo "βœ… All dependencies installed" -echo "βœ… Claude Sonnet 4.5 configured" -echo "βœ… Oliver branding applied (Black + Yellow #FFC407)" -echo "" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "Next Steps:" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "" -echo "1. Configure API keys (if not already done):" -echo " export ANTHROPIC_API_KEY='sk-ant-api03-...'" -echo " export GOOGLE_APPLICATION_CREDENTIALS='/path/to/creds.json'" -echo "" -echo "2. For MAMP setup:" -echo " - Copy this folder to MAMP htdocs/" -echo " - Or create symlink: ln -s $(pwd) /Applications/MAMP/htdocs/pdf-checker" -echo " - Start MAMP and visit: http://localhost:8888/pdf-checker/" -echo "" -echo "3. To use command line:" -echo " source venv/bin/activate" -echo " python enterprise_pdf_checker.py your_document.pdf" -echo " deactivate" -echo "" -echo "4. Read MAMP_SETUP.md for detailed MAMP configuration" -echo "" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "Daily Usage:" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "" -echo "Activate venv: source venv/bin/activate" -echo "Deactivate venv: deactivate" -echo "Run checker: python enterprise_pdf_checker.py file.pdf" -echo "" -echo "The api.php automatically detects and uses venv Python! πŸŽ‰" -echo "" diff --git a/README.md b/README.md index e7ade22..173ee72 100644 --- a/README.md +++ b/README.md @@ -1,774 +1,51 @@ -# PDF Accessibility Checker - Current State +# Aimpress PDF Accessibility -> **AI-Powered PDF Accessibility Validation System** -> Comprehensive WCAG 2.1 compliance checking with enterprise-grade features +> WCAG 2.1 AA compliance checking for PDFs β€” web-based, AI-powered, self-service. ---- +**EU Accessibility Act (June 2025)** requires banks, e-commerce, e-learning, and government to provide accessible PDF documents. This product automates the audit process. -## πŸ“‹ What This Application Does +## Features -This is a **production-ready PDF accessibility checker** that validates PDF documents against WCAG 2.1 Level A & AA standards. It combines traditional PDF analysis with cutting-edge AI to achieve approximately **95% automated coverage** of accessibility requirements. +- 30+ WCAG 2.1 AA / PDF/UA-1 checks +- AI-powered alt-text validation (Claude Sonnet + Google Vision) +- Color contrast checking (1.4.3 AA + 1.4.6 AAA) +- Auto-remediation (title, language, tags, bookmarks) +- Visual Page Inspector β€” SVG overlay of issues on rendered pages +- Multi-language support (50+ languages) +- Detailed HTML/JSON/PDF reports with Matterhorn Protocol checkpoints +- Team workspaces with role-based access -### πŸ†• Recent Updates (Feb 2026) +## Tech Stack -**Production Readiness Enhancements:** -- βœ… **API Authentication** - Secure API access with key-based authentication -- βœ… **Structured Logging** - Production-grade logging with rotation and levels -- βœ… **Error Resilience** - Automatic retry logic with exponential backoff for API calls -- βœ… **Test Suite** - 31 automated tests ensuring code quality (34% coverage) -- βœ… **veraPDF Integration** - Enhanced PDF/UA-1 validation (ISO 14289-1) -- βœ… **Virtual Environment** - Isolated Python dependencies for clean deployment -- βœ… **Requirements Docs** - Full BRS/FRS/SAD specifications in `docs_req/` -- βœ… **Bug Fixes** - Critical import bug fixed in remediation module +| Layer | Technology | +|---|---| +| Backend | FastAPI + Python 3.12 | +| Frontend | Next.js 15 + shadcn/ui + Tailwind | +| Auth | Supabase Auth | +| Database | PostgreSQL 16 + RLS | +| Queue | Celery + Redis | +| Storage | MinIO (S3-compatible) | +| Deploy | Docker Compose + Caddy | -**Status:** 95% Production-Ready β€’ All Critical Fixes Complete β€’ All Tests Passing - -### Core Capabilities - -βœ… **Automated WCAG Validation** - Checks 30+ accessibility criteria -βœ… **AI-Powered Image Analysis** - Uses Anthropic Claude 3.5 Sonnet for alt text validation -βœ… **OCR & Text Detection** - Google Cloud Vision for text-in-images detection -βœ… **Color Contrast Analysis** - WCAG AA/AAA compliance checking -βœ… **Readability Metrics** - Flesch scores and grade-level analysis -βœ… **Auto-Remediation** - Fixes common issues automatically -βœ… **Visual Inspector** - See exactly where issues occur on each page -βœ… **Three Interfaces** - Web UI, REST API, and Command Line -βœ… **API Authentication** - Secure API access with key-based authentication -βœ… **Structured Logging** - Production-ready logging with rotation -βœ… **Error Resilience** - Automatic retry logic for API failures -βœ… **Test Suite** - 31 automated tests with 34% coverage -βœ… **veraPDF Integration** - Enhanced PDF/UA compliance validation - ---- - -## πŸ—οΈ System Architecture - -### Components - -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Web Interface (index.html) β”‚ -β”‚ β€’ Drag-and-drop PDF upload β”‚ -β”‚ β€’ Real-time progress tracking β”‚ -β”‚ β€’ Visual results dashboard β”‚ -β”‚ β€’ Issue filtering and navigation β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - β–Ό -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ REST API (api.php) β”‚ -β”‚ β€’ File upload management β”‚ -β”‚ β€’ Job queue processing β”‚ -β”‚ β€’ Result storage and retrieval β”‚ -β”‚ β€’ Auto-remediation endpoint β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - β–Ό -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Processing Engine (enterprise_pdf_checker.py) β”‚ -β”‚ β€’ PDF structure analysis β”‚ -β”‚ β€’ Image extraction and AI analysis β”‚ -β”‚ β€’ Color contrast checking β”‚ -β”‚ β€’ Readability analysis β”‚ -β”‚ β€’ Comprehensive reporting β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ β”‚ - β–Ό β–Ό -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ External APIs β”‚ β”‚ Remediation Engine β”‚ -β”‚ β€’ Claude Vision β”‚ β”‚ (pdf_remediation.py) β”‚ -β”‚ β€’ Google Vision β”‚ β”‚ β€’ Metadata fixes β”‚ -β”‚ β€’ Document AI β”‚ β”‚ β€’ Language setting β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β€’ Tagging corrections β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -``` - -### File Structure - -``` -PDF-Accessibility-checker/ -β”œβ”€β”€ enterprise_pdf_checker.py # Main checker (1,508 lines) -β”œβ”€β”€ pdf_remediation.py # Auto-fix engine (455 lines) -β”œβ”€β”€ api.php # REST API backend (532 lines) -β”œβ”€β”€ index.html # Web interface (1,727 lines) -β”œβ”€β”€ auth.php # Authentication module (NEW) -β”œβ”€β”€ logger_config.py # Logging framework (NEW) -β”œβ”€β”€ retry_helper.py # API retry logic (NEW) -β”œβ”€β”€ requirements.txt # Python dependencies -β”œβ”€β”€ pytest.ini # Test configuration (NEW) -β”œβ”€β”€ .env.example # Environment configuration template -β”‚ -β”œβ”€β”€ venv/ # Virtual environment (created during setup) -β”œβ”€β”€ uploads/ # Uploaded PDFs (temporary) -β”œβ”€β”€ results/ # Check results and metadata -β”œβ”€β”€ .cache/ # API response cache (cost optimization) -β”œβ”€β”€ logs/ # Application logs (NEW) -β”‚ -β”œβ”€β”€ tests/ # Test suite (NEW) -β”‚ β”œβ”€β”€ conftest.py # pytest fixtures -β”‚ β”œβ”€β”€ test_checker.py # Checker unit tests -β”‚ β”œβ”€β”€ test_remediation.py # Remediation tests -β”‚ └── test_api.py # API integration tests -β”‚ -β”œβ”€β”€ Test_files/ # Sample PDFs for testing -β”‚ β”œβ”€β”€ sample_good.pdf -β”‚ └── sample_poor.pdf -β”‚ -β”œβ”€β”€ docs_req/ # Requirements specifications (NEW) -β”‚ β”œβ”€β”€ PDFAccessibilityHub_BRS_v1.1_2026-02-02.md -β”‚ β”œβ”€β”€ PDFAccessibilityHub_FRS_v1.1_2026-02-02.md -β”‚ └── PDFAccessibilityHub_SAD_v1.1_2026-02-02.md -β”‚ -└── README's/ # Extensive documentation (19 files) - β”œβ”€β”€ START_HERE.md - β”œβ”€β”€ QUICKSTART.md - β”œβ”€β”€ ENTERPRISE_README.md - β”œβ”€β”€ ARCHITECTURE.md - β”œβ”€β”€ WCAG_LIMITATIONS.md - └── ... (14 more guides) -``` - ---- - -## πŸš€ Quick Setup Guide - -### Prerequisites - -- **Python 3.8+** -- **PHP 7.4+** (for web interface) -- **Tesseract OCR** (for text extraction) -- **Poppler** (for PDF rendering) -- **API Keys:** - - Anthropic API key (required for AI analysis) - - Google Cloud credentials (optional, enhances analysis) - -### Installation (10 Minutes) +## Local Development ```bash -# 1. Navigate to project directory -cd /path/to/PDF-Accessibility-checker - -# 2. Create virtual environment (recommended) -python3 -m venv venv -source venv/bin/activate - -# 3. Install Python dependencies -pip install -r requirements.txt - -# 4. Install system dependencies (macOS) -brew install php tesseract poppler - -# Optional: Install veraPDF for enhanced PDF/UA validation -brew install verapdf - -# 5. Configure API keys cp .env.example .env -nano .env # Add your Anthropic API key +# Fill in ANTHROPIC_API_KEY + SUPABASE_* values -# 6. Start the web server -php -S localhost:8000 - -# 7. Open browser -open http://localhost:8000 +docker compose up -d postgres redis minio +cd backend && uv sync && uv run uvicorn app.main:app --reload +cd frontend && npm install && npm run dev ``` -**Note:** On macOS, use virtual environment to avoid `externally-managed-environment` errors. +## Pricing -### Alternative: Command Line Usage +| Plan | PDFs/month | Auto-fix | API | Team | +|---|---|---|---|---| +| Free | 5 | β€” | β€” | β€” | +| Pro $29/mo | 100 | βœ“ | β€” | β€” | +| Business $149/mo | Unlimited | βœ“ | βœ“ | βœ“ | -```bash -# Basic check -python3 enterprise_pdf_checker.py document.pdf +## Deployment -# With output file -python3 enterprise_pdf_checker.py document.pdf --output report.json - -# Quick mode (skip AI analysis) -python3 enterprise_pdf_checker.py document.pdf --quick -``` - ---- - -## 🎯 Key Features Explained - -### 1. **AI-Powered Image Analysis** - -Uses **Anthropic Claude 3.5 Sonnet** to analyze every image in the PDF: -- Validates alt text quality and meaningfulness -- Detects text embedded in images (WCAG 1.4.5 violation) -- Identifies color-only information (WCAG 1.4.1) -- Classifies images as decorative vs. informational -- Provides specific accessibility recommendations - -**Cost:** ~$0.015 per image (cached for free on repeat checks) - -### 2. **Comprehensive WCAG Checks** - -Automated validation of 30+ criteria including: -- βœ… Document structure and tagging (1.3.1, 4.1.2) -- βœ… Text alternatives for images (1.1.1) -- βœ… Color contrast ratios (1.4.3) - AA/AAA levels -- βœ… Language declaration (3.1.1) -- βœ… Page titles (2.4.2) -- βœ… Link text quality (2.4.4) -- βœ… Form field labels (3.3.2) -- βœ… Reading order (1.3.2) -- βœ… Font embedding (1.4.4) -- βœ… Content readability (3.1.5) - -### 3. **Auto-Remediation** - -Automatically fixes common issues: -- Missing document title -- Missing author/subject metadata -- Language not set -- Document not marked as tagged -- Missing bookmarks - -**Usage:** -```bash -python3 pdf_remediation.py document.pdf --output fixed.pdf --all -``` - -### 4. **Visual Page Inspector** - -- Displays PDF pages as images -- Highlights issue locations with color-coded markers -- Zoom and pan functionality -- Click issues to see exact page location -- Severity-based color coding (Critical/Error/Warning/Info) - -### 5. **Smart Caching** - -- Caches all API responses by content hash -- Repeat checks of same document = $0 cost -- Similar images across documents = cached automatically -- Reduces typical document cost from $0.10 to $0.00 on re-check - ---- - -## πŸ“Š What Gets Checked - -### Fully Automated (75% of WCAG) - -| Check | WCAG Criterion | Description | -|-------|----------------|-------------| -| Document Structure | 1.3.1, 4.1.2 | PDF tagging and semantic structure | -| Metadata | 2.4.2, 3.1.1 | Title, language, author, subject | -| Text Extractability | - | Ensures text can be read by screen readers | -| Font Embedding | 1.4.4 | Fonts are embedded for consistent rendering | -| Color Contrast | 1.4.3 | WCAG AA/AAA compliance (4.5:1, 7:1 ratios) | -| Form Fields | 3.3.2 | Labels and descriptions present | -| Links | 2.4.4 | Descriptive link text (not "click here") | -| Reading Order | 1.3.2 | Logical content sequence | - -### AI-Assisted (20% of WCAG) - -| Check | WCAG Criterion | AI Model | Description | -|-------|----------------|----------|-------------| -| Alt Text Quality | 1.1.1 | Claude 3.5 | Validates meaningfulness of alt text | -| Text in Images | 1.4.5 | Claude + Google Vision | Detects text embedded in images | -| Color-Only Info | 1.4.1 | Claude 3.5 | Identifies information conveyed by color alone | -| Content Readability | 3.1.5 | TextBlob | Flesch scores, grade level analysis | -| Image Classification | 1.1.1 | Claude 3.5 | Decorative vs. informational | - -### Requires Manual Review (5% of WCAG) - -- ⚠️ Keyboard navigation and tab order (2.1.1) -- ⚠️ Focus indicators (2.4.7) -- ⚠️ Actual screen reader testing -- ⚠️ Semantic structure quality -- ⚠️ Real user experience validation - ---- - -## πŸ’° Cost Structure - -### Per Document Estimate (10 pages, 5 images) - -| Service | Usage | Cost | -|---------|-------|------| -| Anthropic Claude | 5 images @ $0.015 | $0.075 | -| Google Cloud Vision | 5 images @ $0.0015 | $0.008 | -| Google Document AI (OCR) | 10 pages @ $0.0015 | $0.015 | -| **Total** | | **~$0.10** | - -### Monthly Costs by Volume - -- 100 documents/month = **$10** -- 500 documents/month = **$50** -- 1,000 documents/month = **$100** -- 5,000 documents/month = **$500** - -### ROI Comparison - -| Method | Cost/Document | Time | Coverage | -|--------|---------------|------|----------| -| **This Tool** | $0.10 | 2-5 min | 95% | -| Manual Review | $100 | 1-2 hours | 100% | -| Adobe Acrobat Pro | $20+ | 5-10 min | 90% | -| PAC (Free) | $0 | 3-5 min | 75% | - -**Break-even:** After 2-3 documents vs. manual review -**Time savings:** 96% reduction in review time - ---- - -## πŸ”§ Current Limitations - -### What This Tool CANNOT Do - -1. **Full Screen Reader Simulation** - Cannot replicate NVDA/JAWS behavior -2. **Keyboard Navigation Testing** - Cannot test actual tab order functionality -3. **Real User Testing** - Cannot replace human accessibility auditors -4. **PDF Creation** - Only validates, doesn't create accessible PDFs -5. **Complex Table Analysis** - Limited validation of table structure complexity -6. **Mathematical Content** - Cannot validate MathML or equation accessibility - -### Known Issues - -- **Large PDFs (>50MB)** - May timeout or require increased PHP limits -- **Scanned PDFs** - OCR quality depends on scan quality -- **Complex Layouts** - Multi-column layouts may have reading order issues -- **Non-English Content** - AI analysis optimized for English -- **Password-Protected PDFs** - Cannot analyze encrypted documents - ---- - -## πŸ“ˆ Accessibility Score Calculation - -``` -Starting Score: 100 points - -Deductions: -- Critical Issue: -25 points each -- Error: -10 points each -- Warning: -5 points each -- Info: -2 points each - -Minimum Score: 0 -``` - -### Score Interpretation - -| Score | Grade | Meaning | -|-------|-------|---------| -| 90-100 | A | Excellent - Minor improvements only | -| 80-89 | B | Good - Several issues to address | -| 70-79 | C | Fair - Significant barriers present | -| 60-69 | D | Poor - Major accessibility issues | -| 0-59 | F | Critical - Document largely inaccessible | - ---- - -## πŸ”Œ API Endpoints - -### Authentication - -**Development Mode:** Localhost requests (`http://localhost:8000`) do not require authentication. - -**Production Mode:** All API requests require authentication via API key. - -**Methods:** -```bash -# 1. X-API-Key header (recommended) -curl -H 'X-API-Key: your-api-key' http://your-server.com/api.php - -# 2. Authorization Bearer token -curl -H 'Authorization: Bearer your-api-key' http://your-server.com/api.php - -# 3. Query parameter (development only) -curl 'http://localhost:8000/api.php?api_key=dev_key_12345' -``` - -**Generate API Key:** -```bash -curl 'http://localhost:8000/auth.php?generate' -# Returns: b85091698668907e360223e68868fa0a26dd48a2e3500a4eb48200bad63012c6 -``` - -**Default Dev Key:** `dev_key_12345` - ---- - -### Upload PDF -```http -POST /api.php?action=upload -Content-Type: multipart/form-data -X-API-Key: your-api-key - -Body: pdf (file) - -Response: -{ - "success": true, - "data": { - "job_id": "pdf_123456", - "filename": "document.pdf" - } -} -``` - -### Start Check -```http -POST /api.php?action=check -Content-Type: application/json - -Body: -{ - "job_id": "pdf_123456", - "quick_mode": false -} - -Response: -{ - "success": true, - "data": { - "job_id": "pdf_123456", - "status": "processing" - } -} -``` - -### Get Results -```http -GET /api.php?action=result&job_id=pdf_123456 - -Response: -{ - "success": true, - "data": { - "filename": "document.pdf", - "accessibility_score": 75, - "severity_counts": {...}, - "issues": [...] - } -} -``` - -### Auto-Remediate -```http -POST /api.php?action=remediate -Content-Type: application/json - -Body: {"job_id": "pdf_123456"} - -Response: -{ - "success": true, - "data": { - "remediated_pdf": "pdf_123456_remediated.pdf", - "fixes_applied": 5, - "download_url": "api.php?action=download&job_id=pdf_123456&type=remediated" - } -} -``` - ---- - -## πŸ§ͺ Testing - -### Test Files Included - -- `Test_files/sample_good.pdf` - Well-structured PDF with metadata -- `Test_files/sample_poor.pdf` - PDF with multiple accessibility issues - -### Quick Test - -```bash -# Activate virtual environment -source venv/bin/activate - -# Test the checker -python enterprise_pdf_checker.py Test_files/sample_poor.pdf --output test_result.json - -# View results -cat test_result.json | python -m json.tool - -# Test remediation -python pdf_remediation.py Test_files/sample_poor.pdf --all -``` - -### Running Automated Tests - -```bash -# Activate virtual environment -source venv/bin/activate - -# Run all tests -pytest tests/ -v - -# Run with coverage report -pytest tests/ --cov=. --cov-report=html - -# Run only unit tests (skip integration) -pytest tests/ -m "not integration" - -# View coverage report -open htmlcov/index.html -``` - -**Test Results:** -- βœ… 31 tests passing -- βœ… 34% code coverage -- βœ… Unit tests for checker and remediation -- βœ… Integration tests for API and authentication - ---- - -## 🏭 Production Features - -### Authentication & Security - -The application now includes production-ready security features: - -**API Authentication** ([auth.php](auth.php)) -- API key-based authentication for all endpoints -- Support for multiple authentication methods (Bearer token, X-API-Key header, query parameter) -- Development mode bypass for localhost testing -- API key generation utility - -**Configuration:** -```bash -# Generate production API key -curl 'http://localhost:8000/auth.php?generate' - -# Add to .api_keys file -echo "your-generated-key-here" >> .api_keys - -# Or set environment variable -export API_KEY="your-generated-key-here" -``` - -### Logging & Monitoring - -**Structured Logging** ([logger_config.py](logger_config.py)) -- Automatic log rotation (10MB max size, 5 backups) -- Multiple log levels (DEBUG, INFO, WARNING, ERROR, CRITICAL) -- Separate logs for different modules -- Logs stored in `logs/` directory - -**Log Files:** -- `logs/pdf_checker.log` - Main checker operations -- `logs/pdf_remediation.log` - Remediation operations -- `logs/retry_helper.log` - API retry events -- `logs/php_server.log` - Web server access logs - -### Error Resilience - -**Automatic Retry Logic** ([retry_helper.py](retry_helper.py)) -- Exponential backoff for API failures (1s β†’ 2s β†’ 4s delays) -- Configurable retry attempts (default: 3) -- Graceful degradation on persistent failures -- Applied to all AI API calls (Claude and Google Vision) - -**Benefits:** -- Handles transient network failures automatically -- Prevents job failures due to temporary API issues -- Improves overall system reliability - -### Testing & Quality Assurance - -**Automated Test Suite** ([tests/](tests/)) -- 31 unit and integration tests -- 34% code coverage of critical paths -- pytest configuration with coverage reporting -- Tests for checker, remediation, API, and authentication - -**Run Tests:** -```bash -source venv/bin/activate -pytest tests/ -v --cov=. --cov-report=html -open htmlcov/index.html -``` - -### veraPDF Integration - -**Enhanced PDF/UA Validation:** -```bash -# Validate PDF/UA-1 compliance -verapdf --defaultflavour ua1 document.pdf - -# The remediation module automatically uses veraPDF if installed -``` - ---- - -## πŸ“š Documentation - -The `README's/` folder contains **19 comprehensive guides** (140KB+ of documentation): - -### Essential Reading -1. **START_HERE.md** - Package overview and quick start -2. **QUICKSTART.md** - 5-minute setup guide -3. **ENTERPRISE_README.md** - Complete installation and usage -4. **ARCHITECTURE.md** - System design and technical details - -### Advanced Topics -5. **WCAG_LIMITATIONS.md** - What can't be automated -6. **INTEGRATION_GUIDE.md** - API integration strategies -7. **IMPLEMENTATION_ROADMAP.md** - Step-by-step coding guide -8. **API_QUICK_REFERENCE.md** - One-page cheat sheet -9. **MASTER_GUIDE.md** - Evolution and best practices - -### Specialized Guides -- MAMP_SETUP.md - Local server configuration -- PROGRESS_DISPLAY_GUIDE.md - Real-time progress implementation -- TECHNICAL_BACKGROUND.md - Deep dive into accessibility standards -- screen_reader_simulator_proposal.md - Future enhancement ideas - ---- - -## πŸ”’ Security Considerations - -### Current Implementation - -βœ… File type validation (PDF only) -βœ… File size limits (50MB default) -βœ… API keys in environment variables -βœ… Temporary file cleanup -βœ… CORS headers configured -βœ… Input sanitization in API -βœ… **API Authentication** - API key-based access control -βœ… **Development Mode** - Localhost bypass for local testing -βœ… **Structured Logging** - Audit trail for all operations -βœ… **Error Handling** - Retry logic for API failures - -### Production Recommendations - -- [ ] Enable HTTPS (required) -- [ ] Implement rate limiting (infrastructure ready in auth.php) -- [x] Add API authentication (βœ… Implemented) -- [ ] Set up malware scanning -- [ ] Configure file retention policies -- [x] Enable audit logging (βœ… Implemented with logger_config.py) -- [ ] Implement API key rotation -- [ ] Deploy to production server (Apache/Nginx + PHP-FPM) -- [ ] Configure production API keys (replace dev_key_12345) - ---- - -## 🎯 Use Cases - -### 1. **Content Publishing** -Check PDFs before publication to ensure accessibility compliance - -### 2. **Legal Compliance** -Validate documents meet Section 508, ADA, WCAG 2.1 requirements - -### 3. **Quality Assurance** -Integrate into CI/CD pipeline for automated accessibility testing - -### 4. **Batch Processing** -Audit large document libraries for accessibility issues - -### 5. **Remediation Workflow** -Identify issues β†’ Auto-fix simple problems β†’ Manual review complex cases - ---- - -## πŸ› οΈ Technology Stack - -### Backend -- **Python 3.8+** - Core processing engine -- **PHP 7.4+** - REST API and web server -- **Tesseract OCR** - Text extraction from images -- **Poppler** - PDF rendering and conversion - -### Python Libraries -- `pypdf` - PDF parsing and manipulation -- `pdfplumber` - Advanced PDF analysis -- `Pillow` - Image processing -- `numpy` - Numerical computations -- `textblob` - Natural language processing -- `anthropic` - Claude AI integration -- `google-cloud-vision` - Google Vision API -- `google-cloud-documentai` - Document AI - -### Frontend -- **Pure HTML5/CSS3/JavaScript** - No frameworks -- **Montserrat Font** - Professional typography -- **Responsive Design** - Mobile-friendly interface - ---- - -## πŸ“ž Support & Resources - -### Getting Help -1. Check the extensive documentation in `README's/` folder -2. Review troubleshooting section in ENTERPRISE_README.md -3. Test with sample PDFs in `Test_files/` -4. Verify API keys are properly configured - -### External Resources -- [WCAG 2.1 Guidelines](https://www.w3.org/WAI/WCAG21/quickref/) -- [Anthropic Claude API Docs](https://docs.anthropic.com/) -- [Google Cloud Vision Docs](https://cloud.google.com/vision/docs) -- [PDF/UA Standard](https://www.pdfa.org/resource/pdfua-in-a-nutshell/) - ---- - -## 🌟 What Makes This Special - -✨ **Quality-First Design** - Uses best-in-class AI models (Claude, Google) -✨ **Production-Ready** - Enterprise-grade code and architecture -✨ **Complete Package** - Nothing else to buy or build -✨ **Well-Documented** - 140KB+ of comprehensive guides -✨ **Cost-Optimized** - Smart caching reduces API costs -✨ **Three Interfaces** - Web, CLI, and REST API -✨ **Easy Integration** - Simple REST API for existing systems -✨ **Proven Technology** - Built on industry-standard libraries - ---- - -## πŸ“Š Current Status Summary - -| Aspect | Status | Notes | -|--------|--------|-------| -| **Core Functionality** | βœ… Complete | All checks implemented | -| **Web Interface** | βœ… Complete | Drag-drop, progress, results | -| **REST API** | βœ… Complete | All endpoints functional | -| **CLI** | βœ… Complete | Full command-line support | -| **AI Integration** | βœ… Complete | Claude + Google Vision | -| **Auto-Remediation** | βœ… Complete | Fixes metadata issues | -| **Visual Inspector** | βœ… Complete | Page-level issue visualization | -| **Documentation** | βœ… Extensive | 19 guides + requirements specs | -| **Testing** | βœ… Implemented | 31 automated tests, 34% coverage | -| **Authentication** | βœ… Implemented | API key-based, localhost dev mode | -| **Logging** | βœ… Implemented | Structured logs with rotation | -| **Error Handling** | βœ… Implemented | Retry logic with exponential backoff | -| **veraPDF** | βœ… Integrated | Enhanced PDF/UA validation | -| **Multi-tenancy** | ⚠️ Partial | Single deployment, multi-file | -| **Report History** | ❌ Not Implemented | No tracking over time | - ---- - -## πŸš€ Quick Start Checklist - -### First-Time Setup -- [ ] Install Python 3.8+ and PHP 8.0+ -- [ ] Install Tesseract, Poppler, and veraPDF: `brew install tesseract poppler php verapdf` -- [ ] Create virtual environment: `python3 -m venv venv` -- [ ] Activate venv: `source venv/bin/activate` -- [ ] Install dependencies: `pip install -r requirements.txt` -- [ ] Copy `.env.example` to `.env` -- [ ] Add Anthropic API key to `.env` -- [ ] (Optional) Add Google Cloud credentials for enhanced analysis - -### Every Session -- [ ] Activate venv: `source venv/bin/activate` -- [ ] Start server: `php -S localhost:8000` -- [ ] Open browser: `http://localhost:8000` -- [ ] Upload PDF and review accessibility report - -### Testing & Validation -- [ ] Run tests: `pytest tests/ -v` -- [ ] Check logs: `tail -f logs/pdf_checker.log` -- [ ] Generate API key: `curl 'http://localhost:8000/auth.php?generate'` -- [ ] Test veraPDF: `verapdf --defaultflavour ua1 Test_files/sample_good.pdf` - -**Estimated setup time: 15 minutes (first time), 30 seconds (subsequent sessions)** - ---- - -**Built with ❀️ for web accessibility. Making the internet accessible for everyone.** +See `docker-compose.prod.yml` for production setup with Caddy auto-SSL. diff --git a/api.php b/api.php deleted file mode 100644 index b0c9353..0000000 --- a/api.php +++ /dev/null @@ -1,1528 +0,0 @@ - $payload['oid'] ?? null, - 'name' => $payload['name'] ?? ($payload['unique_name'] ?? ($payload['upn'] ?? null)), - 'email'=> $payload['email'] ?? ($payload['upn'] ?? null), - ]; -} -define('CLOUD_RUN_TIMEOUT', 900); // 15 minutes -define('GCP_SA_KEY_PATH', getenv('GCP_SA_KEY_PATH') ?: __DIR__ . '/pdf-api-invoker-key.json'); -define('RATE_LIMIT_DIR', __DIR__ . '/rate_limits'); - -// Database configuration -define('DB_HOST', getenv('DB_HOST') ?: 'localhost'); -define('DB_PORT', intval(getenv('DB_PORT') ?: 5432)); -define('DB_NAME', getenv('DB_NAME') ?: 'pdf_checker'); -define('DB_USER', getenv('DB_USER') ?: 'pdf_checker'); -define('DB_PASSWORD', getenv('DB_PASSWORD') ?: 'dev_password'); - -// Create directories if they don't exist -if (!is_dir(UPLOAD_DIR)) mkdir(UPLOAD_DIR, 0755, true); -if (!is_dir(RESULTS_DIR)) mkdir(RESULTS_DIR, 0755, true); -if (!is_dir(RATE_LIMIT_DIR)) mkdir(RATE_LIMIT_DIR, 0755, true); - -/** - * Check rate limit via filesystem. Returns true if allowed. - * Stores timestamps in JSON files per IP+action. - */ -function checkRateLimit($action, $limit, $window) { - $ip = $_SERVER['REMOTE_ADDR'] ?? 'unknown'; - $key = preg_replace('/[^a-zA-Z0-9_-]/', '_', $ip . '_' . $action); - $file = RATE_LIMIT_DIR . '/' . $key . '.json'; - - $now = time(); - $timestamps = []; - - if (file_exists($file)) { - $data = json_decode(file_get_contents($file), true); - if (is_array($data)) { - // Filter to only timestamps within the window - $timestamps = array_filter($data, function($ts) use ($now, $window) { - return ($now - $ts) < $window; - }); - } - } - - if (count($timestamps) >= $limit) { - return false; - } - - $timestamps[] = $now; - file_put_contents($file, json_encode(array_values($timestamps))); - return true; -} - -/** - * Sanitize job ID to prevent path traversal attacks - */ -function sanitizeJobId($job_id) { - if (!preg_match('/^pdf_[a-f0-9]+$/', $job_id)) { - error('Invalid job ID format'); - } - return $job_id; -} - -/** - * Get an OIDC identity token for authenticating to Cloud Run. - * Uses a GCP service account key to create a self-signed JWT, - * then exchanges it for an identity token via Google's OAuth endpoint. - */ -function getCloudRunToken() { - static $cachedToken = null; - static $cachedExpiry = 0; - - // Return cached token if still valid (with 5-min buffer) - if ($cachedToken && time() < ($cachedExpiry - 300)) { - return $cachedToken; - } - - $keyPath = GCP_SA_KEY_PATH; - if (!file_exists($keyPath)) { - throw new Exception("GCP service account key not found: $keyPath"); - } - - $sa = json_decode(file_get_contents($keyPath), true); - if (!$sa || !isset($sa['client_email']) || !isset($sa['private_key'])) { - throw new Exception("Invalid service account key file"); - } - - $now = time(); - $expiry = $now + 3600; - - // Build JWT header and claims - $header = base64url_encode(json_encode(['alg' => 'RS256', 'typ' => 'JWT'])); - $claims = base64url_encode(json_encode([ - 'iss' => $sa['client_email'], - 'sub' => $sa['client_email'], - 'aud' => 'https://oauth2.googleapis.com/token', - 'iat' => $now, - 'exp' => $expiry, - 'target_audience' => CLOUD_RUN_URL, - ])); - - // Sign with RSA-SHA256 - $signingInput = "$header.$claims"; - $signature = ''; - $privateKey = openssl_pkey_get_private($sa['private_key']); - if (!$privateKey) { - throw new Exception("Failed to parse service account private key"); - } - openssl_sign($signingInput, $signature, $privateKey, OPENSSL_ALGO_SHA256); - $jwt = $signingInput . '.' . base64url_encode($signature); - - // Exchange JWT for identity token - $ch = curl_init('https://oauth2.googleapis.com/token'); - curl_setopt_array($ch, [ - CURLOPT_POST => true, - CURLOPT_POSTFIELDS => http_build_query([ - 'grant_type' => 'urn:ietf:params:oauth:grant-type:jwt-bearer', - 'assertion' => $jwt, - ]), - CURLOPT_RETURNTRANSFER => true, - CURLOPT_TIMEOUT => 10, - ]); - $response = curl_exec($ch); - $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); - - if ($httpCode !== 200) { - throw new Exception("Failed to get identity token: HTTP $httpCode - $response"); - } - - $tokenData = json_decode($response, true); - if (!isset($tokenData['id_token'])) { - throw new Exception("No id_token in response: $response"); - } - - $cachedToken = $tokenData['id_token']; - $cachedExpiry = $expiry; - - return $cachedToken; -} - -/** - * Base64url encode (no padding, URL-safe) - */ -function base64url_encode($data) { - return rtrim(strtr(base64_encode($data), '+/', '-_'), '='); -} - -/** - * Get PostgreSQL PDO connection (lazy singleton) - */ -function getDB() { - static $pdo = null; - if ($pdo === null) { - $dsn = sprintf('pgsql:host=%s;port=%d;dbname=%s', DB_HOST, DB_PORT, DB_NAME); - $pdo = new PDO($dsn, DB_USER, DB_PASSWORD, [ - PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION, - ]); - } - return $pdo; -} - -/** - * Insert or update a job record in PostgreSQL - */ -function updateJobInDatabase($job_id, $filename, $status, $results = null) { - try { - $pdo = getDB(); - - $score = null; - $grade = null; - $total_issues = null; - $critical_count = null; - $error_count = null; - $warning_count = null; - $result_json = null; - $processing_time = null; - - if ($results) { - $score = $results['accessibility_score'] ?? null; - $grade = $results['grade'] ?? null; - $issues = $results['issues'] ?? []; - $total_issues = count($issues); - $critical_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'CRITICAL')); - $error_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'ERROR')); - $warning_count = count(array_filter($issues, fn($i) => ($i['severity'] ?? '') === 'WARNING')); - $result_json = json_encode($results); - $processing_time = $results['stats']['processing_time'] ?? null; - } - - $sql = "INSERT INTO jobs (job_id, filename, status, score, grade, total_issues, - critical_count, error_count, warning_count, result_json, processing_time, - completed_at) - VALUES (:job_id, :filename, :status, :score, :grade, :total_issues, - :critical_count, :error_count, :warning_count, :result_json::jsonb, :processing_time, - CASE WHEN :status2 = 'completed' THEN NOW() ELSE NULL END) - ON CONFLICT (job_id) DO UPDATE SET - status = EXCLUDED.status, - score = COALESCE(EXCLUDED.score, jobs.score), - grade = COALESCE(EXCLUDED.grade, jobs.grade), - total_issues = COALESCE(EXCLUDED.total_issues, jobs.total_issues), - critical_count = COALESCE(EXCLUDED.critical_count, jobs.critical_count), - error_count = COALESCE(EXCLUDED.error_count, jobs.error_count), - warning_count = COALESCE(EXCLUDED.warning_count, jobs.warning_count), - result_json = COALESCE(EXCLUDED.result_json, jobs.result_json), - processing_time = COALESCE(EXCLUDED.processing_time, jobs.processing_time), - completed_at = CASE WHEN EXCLUDED.status = 'completed' THEN NOW() ELSE jobs.completed_at END"; - - $stmt = $pdo->prepare($sql); - $stmt->execute([ - ':job_id' => $job_id, - ':filename' => $filename, - ':status' => $status, - ':score' => $score, - ':grade' => $grade, - ':total_issues' => $total_issues, - ':critical_count' => $critical_count, - ':error_count' => $error_count, - ':warning_count' => $warning_count, - ':result_json' => $result_json, - ':processing_time' => $processing_time, - ':status2' => $status, - ]); - } catch (Exception $e) { - error_log("DB update failed for $job_id: " . $e->getMessage()); - } -} - -// CORS headers for API -$allowed_origins = [ - 'https://ai-sandbox.oliver.solutions', - 'http://localhost:8888', - 'http://127.0.0.1:8888', - 'http://localhost:8000', - 'http://127.0.0.1:8000', -]; -$origin = $_SERVER['HTTP_ORIGIN'] ?? ''; -if (in_array($origin, $allowed_origins) || (function_exists('isDevelopmentMode') && isDevelopmentMode())) { - header('Access-Control-Allow-Origin: ' . ($origin ?: '*')); -} else if ($origin) { - header('Access-Control-Allow-Origin: null'); -} else { - header('Access-Control-Allow-Origin: ' . ($allowed_origins[0])); -} -header('Access-Control-Allow-Methods: POST, GET, OPTIONS, DELETE'); -header('Access-Control-Allow-Headers: Content-Type, X-API-Key, Authorization'); -header('Content-Type: application/json'); - -// Handle preflight -if ($_SERVER['REQUEST_METHOD'] === 'OPTIONS') { - exit(0); -} - -// Require authentication for all API requests -require_once __DIR__ . '/auth.php'; -requireAuth(); - -// Get action -$action = $_GET['action'] ?? $_POST['action'] ?? ''; - -switch ($action) { - case 'upload': - handleUpload(); - break; - case 'check': - handleCheck(); - break; - case 'status': - handleStatus(); - break; - case 'result': - handleResult(); - break; - case 'list': - handleList(); - break; - case 'delete': - handleDelete(); - break; - case 'debug': - handleDebug(); - break; - case 'image': - handleImage(); - break; - case 'remediate': - handleRemediate(); - break; - case 'download': - handleDownload(); - break; - case 'stats': - handleStats(); - break; - case 'batch_upload': - handleBatchUpload(); - break; - case 'batch_status': - handleBatchStatus(); - break; - case 'export': - handleExport(); - break; - case 'save_adjusted_result': - handleSaveAdjustedResult(); - break; - case 'dismiss': - handleDismiss(); - break; - case 'undismiss': - handleUndismiss(); - break; - case 'override_check': - handleOverrideCheck(); - break; - case 'unoverride_check': - handleUnoverrideCheck(); - break; - default: - error('Invalid action'); -} - -/** - * Handle file upload - */ -function handleUpload() { - // Rate limit: 10 uploads/hour per IP - if (!checkRateLimit('upload', 10, 3600)) { - http_response_code(429); - echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded. Try again later.']); - exit; - } - - if (!isset($_FILES['pdf'])) { - error('No file uploaded'); - } - - $file = $_FILES['pdf']; - - // Validate file - if ($file['error'] !== UPLOAD_ERR_OK) { - error('Upload error: ' . $file['error']); - } - - if ($file['size'] > MAX_FILE_SIZE) { - error('File too large. Max size: ' . (MAX_FILE_SIZE / 1024 / 1024) . 'MB'); - } - - $ext = strtolower(pathinfo($file['name'], PATHINFO_EXTENSION)); - if (!in_array($ext, ALLOWED_EXTENSIONS)) { - error('Invalid file type. Only PDF files allowed.'); - } - - // Validate PDF magic bytes - $header = file_get_contents($file['tmp_name'], false, null, 0, 5); - if ($header !== '%PDF-') { - error('File is not a valid PDF (invalid file header)'); - } - - // Generate cryptographically secure job ID - $job_id = 'pdf_' . bin2hex(random_bytes(16)); - $filename = $job_id . '.pdf'; - $filepath = UPLOAD_DIR . '/' . $filename; - - // Move file - if (!move_uploaded_file($file['tmp_name'], $filepath)) { - error('Failed to save file'); - } - - // Attach authenticated user to this job - $user = extractUserFromToken(); - - // Create job metadata - $job_data = [ - 'job_id' => $job_id, - 'original_filename' => $file['name'], - 'uploaded_at' => date('Y-m-d H:i:s'), - 'file_size' => $file['size'], - 'status' => 'uploaded', - 'filepath' => $filepath, - 'user_id' => $user['oid'] ?? null, - 'user_name' => $user['name'] ?? null, - 'user_email'=> $user['email'] ?? null, - ]; - - file_put_contents( - RESULTS_DIR . '/' . $job_id . '.meta.json', - json_encode($job_data, JSON_PRETTY_PRINT) - ); - - success([ - 'job_id' => $job_id, - 'filename' => $file['name'], - 'message' => 'File uploaded successfully' - ]); -} - -/** - * Handle PDF accessibility check β€” send PDF to Cloud Run synchronously - */ -function handleCheck() { - set_time_limit(900); // Allow up to 15 minutes - - $job_id = $_POST['job_id'] ?? ''; - - if (empty($job_id)) { - error('Job ID required'); - } - $job_id = sanitizeJobId($job_id); - - // Rate limit: 30 checks/hour per IP - if (!checkRateLimit('check', 30, 3600)) { - http_response_code(429); - echo json_encode(['success' => false, 'error' => 'Rate limit exceeded. Try again later.']); - exit; - } - - $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; - - if (!file_exists($meta_file)) { - error('Job not found'); - } - - $job_data = json_decode(file_get_contents($meta_file), true); - $quick_mode = $_POST['quick_mode'] ?? false; - - // Update meta to processing - $job_data['status'] = 'processing'; - $job_data['started_at'] = date('Y-m-d H:i:s'); - file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); - - // If Cloud Run URL is configured, send to Cloud Run - if (!empty(CLOUD_RUN_URL)) { - try { - $token = getCloudRunToken(); - $pdf_path = $job_data['filepath']; - - if (!file_exists($pdf_path)) { - error('PDF file not found on server'); - } - - // Build multipart POST to Cloud Run - $ch = curl_init(CLOUD_RUN_URL . '/check'); - $postFields = [ - 'pdf' => new CURLFile($pdf_path, 'application/pdf', basename($pdf_path)), - 'job_id' => $job_id, - 'quick_mode' => $quick_mode ? 'true' : 'false', - 'original_filename' => $job_data['original_filename'] ?? '', - ]; - - curl_setopt_array($ch, [ - CURLOPT_POST => true, - CURLOPT_POSTFIELDS => $postFields, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_TIMEOUT => CLOUD_RUN_TIMEOUT, - CURLOPT_HTTPHEADER => [ - 'Authorization: Bearer ' . $token, - ], - ]); - - $response = curl_exec($ch); - $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - $curlError = curl_error($ch); - curl_close($ch); - - if ($curlError) { - throw new Exception("Cloud Run request failed: $curlError"); - } - - if ($httpCode !== 200) { - $errorBody = json_decode($response, true); - $errorMsg = $errorBody['error'] ?? "HTTP $httpCode"; - throw new Exception("Cloud Run returned error: $errorMsg"); - } - - $result = json_decode($response, true); - if (!$result || !isset($result['success'])) { - throw new Exception("Invalid response from Cloud Run"); - } - - if (!$result['success']) { - throw new Exception($result['error'] ?? 'Unknown Cloud Run error'); - } - - $checkResult = $result['data']; - - // Write result JSON to disk - $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; - file_put_contents($result_file, json_encode($checkResult, JSON_PRETTY_PRINT)); - - // Update meta - $job_data['status'] = 'completed'; - $job_data['completed_at'] = date('Y-m-d H:i:s'); - file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); - - // Update PostgreSQL - updateJobInDatabase($job_id, $job_data['original_filename'] ?? '', 'completed', $checkResult); - - success([ - 'job_id' => $job_id, - 'status' => 'completed', - 'message' => 'Check completed' - ]); - - } catch (Exception $e) { - // Mark as failed - $job_data['status'] = 'failed'; - $job_data['error'] = $e->getMessage(); - file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); - - updateJobInDatabase($job_id, $job_data['original_filename'] ?? '', 'failed'); - - error('Processing failed: ' . $e->getMessage()); - } - } else { - // Fallback to local exec (development without Cloud Run) - $pdf_path = $job_data['filepath']; - $output_path = RESULTS_DIR . '/' . $job_id . '.result.json'; - $venv_python = __DIR__ . '/venv/bin/python3'; - $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; - - $cmd = escapeshellcmd($python_bin . ' ' . PYTHON_SCRIPT) . ' ' . - escapeshellarg($pdf_path) . ' ' . - '--output ' . escapeshellarg($output_path); - - if ($quick_mode) { - $cmd .= ' --quick'; - } - - $anthropic_key = $_POST['anthropic_key'] ?? getenv('ANTHROPIC_API_KEY'); - $google_key = $_POST['google_key'] ?? $_POST['google_credentials'] ?? getenv('GOOGLE_API_KEY'); - - if ($anthropic_key) { - $cmd .= ' --anthropic-key ' . escapeshellarg($anthropic_key); - } - if ($google_key) { - if (file_exists($google_key)) { - $cmd .= ' --google-credentials ' . escapeshellarg($google_key); - } else { - $cmd .= ' --google-key ' . escapeshellarg($google_key); - } - } - - $env_path = getenv('PATH'); - putenv("PATH=/opt/homebrew/bin:/usr/local/bin:{$env_path}"); - - $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; - $cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1 &'; - exec($cmd, $output, $return_code); - - success([ - 'job_id' => $job_id, - 'status' => 'processing', - 'message' => 'Check started (local mode)' - ]); - } -} - -/** - * Check job status β€” pure file-based - */ -function handleStatus() { - $job_id = $_GET['job_id'] ?? ''; - - if (empty($job_id)) { - error('Job ID required'); - } - $job_id = sanitizeJobId($job_id); - - $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; - $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; - $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; - - if (!file_exists($meta_file)) { - error('Job not found'); - } - - $job_data = json_decode(file_get_contents($meta_file), true); - - // Check if result file exists (definitive completion signal) - if (file_exists($result_file)) { - $job_data['status'] = 'completed'; - $job_data['completed_at'] = $job_data['completed_at'] ?? date('Y-m-d H:i:s', filemtime($result_file)); - } else if (file_exists($error_log) && in_array($job_data['status'], ['processing', 'queued'])) { - $error_content = file_get_contents($error_log); - if (!empty($error_content)) { - $started = strtotime($job_data['started_at'] ?? 'now'); - if (time() - $started > 900) { - $job_data['status'] = 'failed'; - $job_data['error'] = 'Process timeout or error'; - $job_data['error_log'] = substr($error_content, -1000); - } - } - } - - $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; - $job_data['dismissed_indices'] = file_exists($dismiss_file) - ? array_map('intval', array_keys(json_decode(file_get_contents($dismiss_file), true) ?: [])) - : []; - - success($job_data); -} - -/** - * Get check results - */ -function handleResult() { - $job_id = $_GET['job_id'] ?? ''; - - if (empty($job_id)) { - error('Job ID required'); - } - $job_id = sanitizeJobId($job_id); - - $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; - - if (!file_exists($result_file)) { - error('Results not found. Check may still be processing.'); - } - - $result = json_decode(file_get_contents($result_file), true); - - // If an adjusted result exists, overlay only the score/wcag fields so the - // frontend can display the adjusted score on reload while keeping the original - // severity_counts and score_breakdown as the recalculation baseline. - $adjusted_file = RESULTS_DIR . '/' . $job_id . '.adjusted.json'; - if (file_exists($adjusted_file)) { - $adjusted = json_decode(file_get_contents($adjusted_file), true); - $result['accessibility_score'] = $adjusted['accessibility_score'] ?? $result['accessibility_score']; - $result['grade'] = $adjusted['grade'] ?? $result['grade']; - $result['wcag_compliance'] = $adjusted['wcag_compliance'] ?? $result['wcag_compliance']; - $result['score_breakdown']['adjusted'] = true; - } - - // Inject dismissed indices so frontend can restore dismiss state on reload - $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; - $result['dismissed_indices'] = file_exists($dismiss_file) - ? array_map('intval', array_keys(json_decode(file_get_contents($dismiss_file), true) ?: [])) - : []; - - // Inject overridden check names so frontend can restore override state on reload - $override_file = RESULTS_DIR . '/' . $job_id . '.overrides.json'; - $result['overridden_checks'] = file_exists($override_file) - ? array_keys(json_decode(file_get_contents($override_file), true) ?: []) - : []; - - success($result); -} - -/** - * List all jobs - */ -function handleList() { - $user = extractUserFromToken(); - $current_user_id = $user['oid'] ?? null; - - $jobs = []; - $files = glob(RESULTS_DIR . '/*.meta.json'); - - foreach ($files as $file) { - $job_data = json_decode(file_get_contents($file), true); - - // User isolation: - // - Authenticated user: show their own jobs + legacy jobs (no user_id) - // - Unauthenticated (dev mode): show only legacy jobs (no user_id) - $job_user_id = $job_data['user_id'] ?? null; - if ($current_user_id !== null) { - // Skip jobs that belong to a DIFFERENT authenticated user - if ($job_user_id !== null && $job_user_id !== $current_user_id) continue; - } else { - // Unauthenticated β€” skip user-owned jobs - if ($job_user_id !== null) continue; - } - - // Enrich with result summary β€” prefer adjusted result if available - $result_file = str_replace('.meta.json', '.result.json', $file); - $adjusted_file = str_replace('.meta.json', '.adjusted.json', $file); - $source_file = file_exists($adjusted_file) ? $adjusted_file : $result_file; - if (file_exists($source_file)) { - $job_data['status'] = 'completed'; - $result = json_decode(file_get_contents($source_file), true); - $job_data['score'] = $result['accessibility_score'] ?? ($result['score'] ?? null); - $job_data['grade'] = $result['grade'] ?? null; - $job_data['total_issues'] = $result['total_issues'] ?? null; - $job_data['critical_count'] = $result['severity_counts']['critical'] ?? 0; - $job_data['error_count'] = $result['severity_counts']['error'] ?? 0; - $job_data['score_adjusted'] = file_exists($adjusted_file); - } - - $jobs[] = $job_data; - } - - // Sort by upload time (newest first) - usort($jobs, function($a, $b) { - return strtotime($b['uploaded_at']) - strtotime($a['uploaded_at']); - }); - - success(['jobs' => $jobs]); -} - -/** - * Delete a job - */ -function handleDelete() { - $job_id = $_POST['job_id'] ?? $_GET['job_id'] ?? ''; - - if (empty($job_id)) { - error('Job ID required'); - } - $job_id = sanitizeJobId($job_id); - - $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; - - if (!file_exists($meta_file)) { - error('Job not found'); - } - - $job_data = json_decode(file_get_contents($meta_file), true); - - // Delete all files associated with this job - @unlink($job_data['filepath'] ?? ''); - @unlink($meta_file); - @unlink(RESULTS_DIR . '/' . $job_id . '.result.json'); - @unlink(RESULTS_DIR . '/' . $job_id . '.dismissed.json'); - @unlink(RESULTS_DIR . '/' . $job_id . '.overrides.json'); - @unlink(RESULTS_DIR . '/' . $job_id . '.error.log'); - - success(['message' => 'Job deleted']); -} - -/** - * Debug endpoint - */ -function handleDebug() { - // Debug endpoint only available in development mode - require_once __DIR__ . '/auth.php'; - if (!isDevelopmentMode()) { - error('Debug endpoint disabled in production'); - } - - $job_id = $_GET['job_id'] ?? ''; - - if (empty($job_id)) { - error('Job ID required'); - } - $job_id = sanitizeJobId($job_id); - - $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; - $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; - $error_log = RESULTS_DIR . '/' . $job_id . '.error.log'; - - $debug_info = [ - 'job_id' => $job_id, - 'meta_exists' => file_exists($meta_file), - 'result_exists' => file_exists($result_file), - 'error_log_exists' => file_exists($error_log), - 'cloud_run_url' => CLOUD_RUN_URL ?: '(not configured β€” local mode)', - 'files' => [] - ]; - - if (file_exists($meta_file)) { - $debug_info['meta'] = json_decode(file_get_contents($meta_file), true); - } - - if (file_exists($error_log)) { - $debug_info['error_log'] = file_get_contents($error_log); - } - - if (file_exists($result_file)) { - $debug_info['result_size'] = filesize($result_file); - } - - // Test Python - $venv_python = __DIR__ . '/venv/bin/python3'; - exec($venv_python . ' --version 2>&1', $python_version); - $debug_info['python_version'] = implode("\n", $python_version); - - success($debug_info); -} - -/** - * Serve page images β€” redirect to GCS URL or serve local file - */ -function handleImage() { - $job_id = $_GET['job_id'] ?? ''; - $page_num = $_GET['page'] ?? ''; - - if (empty($job_id) || empty($page_num)) { - error('Job ID and page number required'); - } - $job_id = sanitizeJobId($job_id); - $page_num = intval($page_num); - - // Check result JSON for GCS URLs - $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; - if (file_exists($result_file)) { - $result = json_decode(file_get_contents($result_file), true); - $page_images = $result['page_images'] ?? []; - - // Check if the page image value is a URL (GCS) - $image_value = $page_images[$page_num] ?? $page_images[strval($page_num)] ?? null; - if ($image_value && (strpos($image_value, 'http://') === 0 || strpos($image_value, 'https://') === 0)) { - // Redirect to GCS URL - header('HTTP/1.1 302 Found'); - header('Location: ' . $image_value); - header('Cache-Control: public, max-age=86400'); - exit; - } - } - - // Fallback: serve local image file - $images_dir = RESULTS_DIR . '/' . $job_id . '.result_images'; - $image_file = $images_dir . '/page_' . $page_num . '.png'; - - if (!file_exists($image_file)) { - http_response_code(404); - header('Content-Type: application/json'); - echo json_encode(['success' => false, 'error' => 'Image not found']); - exit; - } - - // Serve the image - header('Content-Type: image/png'); - header('Cache-Control: public, max-age=86400'); // Cache for 1 day - readfile($image_file); - exit; -} - -/** - * Auto-remediate PDF accessibility issues - */ -function handleRemediate() { - $job_id = $_POST['job_id'] ?? ''; - - if (empty($job_id)) { - error('Job ID required'); - } - $job_id = sanitizeJobId($job_id); - - $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; - $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; - - if (!file_exists($meta_file) || !file_exists($result_file)) { - error('Job not found'); - } - - $job_data = json_decode(file_get_contents($meta_file), true); - $result_data = json_decode(file_get_contents($result_file), true); - - // Check if there are fixable issues - if (!isset($result_data['auto_fixable_count']) || $result_data['auto_fixable_count'] == 0) { - error('No auto-fixable issues found'); - } - - $original_pdf = $job_data['filepath']; - $remediated_pdf = UPLOAD_DIR . '/' . $job_id . '_remediated.pdf'; - - // Use absolute venv path - $venv_python = __DIR__ . '/venv/bin/python3'; - $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; - $remediation_script = __DIR__ . '/pdf_remediation.py'; - - // Build command - apply all safe fixes - $cmd = escapeshellcmd($python_bin . ' ' . $remediation_script) . ' ' . - escapeshellarg($original_pdf) . ' ' . - '--output ' . escapeshellarg($remediated_pdf) . ' ' . - '--all'; - - // Set PATH for poppler - $env_path = getenv('PATH'); - $poppler_paths = '/opt/homebrew/bin:/usr/local/bin'; - putenv("PATH={$poppler_paths}:{$env_path}"); - - // Run remediation - $error_log = RESULTS_DIR . '/' . $job_id . '.remediation.log'; - $cmd .= ' > ' . escapeshellarg($error_log) . ' 2>&1'; - - exec($cmd, $output, $return_code); - - // Check if remediation succeeded - if ($return_code !== 0 || !file_exists($remediated_pdf)) { - $log_content = file_exists($error_log) ? file_get_contents($error_log) : 'Unknown error'; - $truncated = strlen($log_content) > 2000 ? '...' . substr($log_content, -2000) : $log_content; - error('Remediation failed: ' . $truncated); - } - - // Store remediated file info - $job_data['remediated_pdf'] = $remediated_pdf; - $job_data['remediated_at'] = date('Y-m-d H:i:s'); - file_put_contents($meta_file, json_encode($job_data, JSON_PRETTY_PRINT)); - - success([ - 'job_id' => $job_id, - 'remediated_pdf' => basename($remediated_pdf), - 'original_filename' => $job_data['original_filename'], - 'fixes_applied' => $result_data['auto_fixable_count'], - 'download_url' => 'api.php?action=download&job_id=' . $job_id . '&type=remediated', - 'message' => 'PDF remediated successfully' - ]); -} - -/** - * Download original or remediated PDF - */ -function handleDownload() { - $job_id = $_GET['job_id'] ?? ''; - $type = $_GET['type'] ?? 'original'; // 'original' or 'remediated' - - if (empty($job_id)) { - error('Job ID required'); - } - $job_id = sanitizeJobId($job_id); - - $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; - - if (!file_exists($meta_file)) { - error('Job not found'); - } - - $job_data = json_decode(file_get_contents($meta_file), true); - - if ($type === 'remediated') { - if (!isset($job_data['remediated_pdf']) || !file_exists($job_data['remediated_pdf'])) { - error('Remediated PDF not found'); - } - $file_path = $job_data['remediated_pdf']; - $filename = pathinfo($job_data['original_filename'], PATHINFO_FILENAME) . '_fixed.pdf'; - } else { - $file_path = $job_data['filepath']; - $filename = $job_data['original_filename']; - } - - // Serve the file - header('Content-Type: application/pdf'); - header('Content-Disposition: attachment; filename="' . $filename . '"'); - header('Content-Length: ' . filesize($file_path)); - readfile($file_path); - exit; -} - -/** - * Get aggregate job statistics - */ -function handleStats() { - $stats = [ - 'total_jobs' => 0, - 'completed' => 0, - 'failed' => 0, - 'processing' => 0, - ]; - - // Count jobs from meta files - $files = glob(RESULTS_DIR . '/*.meta.json'); - foreach ($files as $file) { - $job = json_decode(file_get_contents($file), true); - $stats['total_jobs']++; - $result_file = str_replace('.meta.json', '.result.json', $file); - if (file_exists($result_file)) { - $stats['completed']++; - } else if (($job['status'] ?? '') === 'failed') { - $stats['failed']++; - } else { - $stats['processing']++; - } - } - - success($stats); -} - -/** - * Handle batch file upload β€” accepts multiple PDFs - */ -function handleBatchUpload() { - if (!checkRateLimit('upload', 10, 3600)) { - http_response_code(429); - echo json_encode(['success' => false, 'error' => 'Upload rate limit exceeded.']); - exit; - } - - if (!isset($_FILES['pdfs']) || !is_array($_FILES['pdfs']['name'])) { - error('No files uploaded. Use "pdfs[]" as the file field name.'); - } - - $batch_id = 'batch_' . bin2hex(random_bytes(8)); - $file_count = count($_FILES['pdfs']['name']); - $uploaded = []; - $errors = []; - - for ($i = 0; $i < $file_count; $i++) { - $name = $_FILES['pdfs']['name'][$i]; - $tmp = $_FILES['pdfs']['tmp_name'][$i]; - $size = $_FILES['pdfs']['size'][$i]; - $err = $_FILES['pdfs']['error'][$i]; - - if ($err !== UPLOAD_ERR_OK) { - $errors[] = ['filename' => $name, 'error' => "Upload error code: $err"]; - continue; - } - if ($size > MAX_FILE_SIZE) { - $errors[] = ['filename' => $name, 'error' => 'File too large']; - continue; - } - $ext = strtolower(pathinfo($name, PATHINFO_EXTENSION)); - if (!in_array($ext, ALLOWED_EXTENSIONS)) { - $errors[] = ['filename' => $name, 'error' => 'Not a PDF file']; - continue; - } - $header = file_get_contents($tmp, false, null, 0, 5); - if ($header !== '%PDF-') { - $errors[] = ['filename' => $name, 'error' => 'Invalid PDF header']; - continue; - } - - $job_id = 'pdf_' . bin2hex(random_bytes(16)); - $filename = $job_id . '.pdf'; - $filepath = UPLOAD_DIR . '/' . $filename; - - if (!move_uploaded_file($tmp, $filepath)) { - $errors[] = ['filename' => $name, 'error' => 'Failed to save']; - continue; - } - - $job_data = [ - 'job_id' => $job_id, - 'batch_id' => $batch_id, - 'original_filename' => $name, - 'uploaded_at' => date('Y-m-d H:i:s'), - 'file_size' => $size, - 'status' => 'uploaded', - 'filepath' => $filepath - ]; - file_put_contents( - RESULTS_DIR . '/' . $job_id . '.meta.json', - json_encode($job_data, JSON_PRETTY_PRINT) - ); - - $uploaded[] = ['job_id' => $job_id, 'filename' => $name]; - } - - // Save batch manifest - $batch_data = [ - 'batch_id' => $batch_id, - 'created_at' => date('Y-m-d H:i:s'), - 'total_files' => $file_count, - 'jobs' => array_column($uploaded, 'job_id'), - ]; - file_put_contents( - RESULTS_DIR . '/' . $batch_id . '.batch.json', - json_encode($batch_data, JSON_PRETTY_PRINT) - ); - - success([ - 'batch_id' => $batch_id, - 'uploaded' => $uploaded, - 'errors' => $errors, - 'message' => count($uploaded) . ' of ' . $file_count . ' files uploaded' - ]); -} - -/** - * Get status of a batch job - */ -function handleBatchStatus() { - $batch_id = $_GET['batch_id'] ?? ''; - if (empty($batch_id) || !preg_match('/^batch_[a-f0-9]+$/', $batch_id)) { - error('Invalid batch ID'); - } - - $batch_file = RESULTS_DIR . '/' . $batch_id . '.batch.json'; - if (!file_exists($batch_file)) { - error('Batch not found'); - } - - $batch = json_decode(file_get_contents($batch_file), true); - $jobs = []; - $completed = 0; - $failed = 0; - - foreach ($batch['jobs'] as $job_id) { - $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; - $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; - - $status = 'unknown'; - $score = null; - $filename = ''; - - if (file_exists($meta_file)) { - $meta = json_decode(file_get_contents($meta_file), true); - $status = $meta['status'] ?? 'uploaded'; - $filename = $meta['original_filename'] ?? ''; - } - if (file_exists($result_file)) { - $status = 'completed'; - $result = json_decode(file_get_contents($result_file), true); - $score = $result['accessibility_score'] ?? null; - $completed++; - } else if ($status === 'failed') { - $failed++; - } - - $jobs[] = [ - 'job_id' => $job_id, - 'filename' => $filename, - 'status' => $status, - 'score' => $score - ]; - } - - $total = count($batch['jobs']); - $overall_status = ($completed === $total) ? 'completed' : - (($completed + $failed === $total) ? 'finished' : 'processing'); - - success([ - 'batch_id' => $batch_id, - 'status' => $overall_status, - 'total' => $total, - 'completed' => $completed, - 'failed' => $failed, - 'jobs' => $jobs - ]); -} - -/** - * Export results as HTML or JSON - */ -function handleExport() { - $job_id = $_GET['job_id'] ?? ''; - $format = $_GET['format'] ?? 'json'; - - if (empty($job_id)) { - error('Job ID required'); - } - $job_id = sanitizeJobId($job_id); - - // Prefer adjusted result if available (created by save_adjusted_result) - $adj_file = RESULTS_DIR . '/' . $job_id . '.adjusted.json'; - $result_file = file_exists($adj_file) ? $adj_file : RESULTS_DIR . '/' . $job_id . '.result.json'; - - if (!file_exists($result_file)) { - error('Results not found'); - } - - $result = json_decode(file_get_contents($result_file), true); - - if ($format === 'html') { - // Generate HTML report via Python - $venv_python = __DIR__ . '/venv/bin/python3'; - $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; - $report_script = __DIR__ . '/report_generator.py'; - - $html_file = RESULTS_DIR . '/' . $job_id . '.report.html'; - - $cmd = escapeshellcmd($python_bin . ' ' . $report_script) . - ' --input ' . escapeshellarg($result_file) . - ' --output ' . escapeshellarg($html_file); - - exec($cmd . ' 2>&1', $output, $return_code); - - if ($return_code !== 0 || !file_exists($html_file)) { - error('Report generation failed'); - } - - header('Content-Type: text/html; charset=utf-8'); - header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.html"'); - readfile($html_file); - exit; - } - - if ($format === 'pdf') { - // Generate PDF report via Python WeasyPrint - $venv_python = __DIR__ . '/venv/bin/python3'; - $python_bin = file_exists($venv_python) ? $venv_python : 'python3'; - $report_script = __DIR__ . '/report_generator.py'; - - $pdf_file = RESULTS_DIR . '/' . $job_id . '.report.pdf'; - - $cmd = escapeshellcmd($python_bin . ' ' . $report_script) . - ' --input ' . escapeshellarg($result_file) . - ' --output ' . escapeshellarg($pdf_file) . - ' --format pdf'; - - exec($cmd . ' 2>&1', $output, $return_code); - - if ($return_code !== 0 || !file_exists($pdf_file)) { - error('PDF report generation failed: ' . implode("\n", $output)); - } - - header('Content-Type: application/pdf'); - header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.pdf"'); - header('Content-Length: ' . filesize($pdf_file)); - readfile($pdf_file); - exit; - } - - // Default: JSON download - header('Content-Type: application/json'); - header('Content-Disposition: attachment; filename="accessibility_report_' . $job_id . '.json"'); - echo json_encode($result, JSON_PRETTY_PRINT); - exit; -} - -/** - * Save an adjusted result merging dismissed issues and check overrides into a new JSON file. - * The export endpoint will prefer this file over the original result. - */ -function handleSaveAdjustedResult() { - $data = json_decode(file_get_contents('php://input'), true) ?: []; - $job_id = $data['job_id'] ?? ''; - - if (empty($job_id)) { - error('job_id required'); - } - $job_id = sanitizeJobId($job_id); - - $result_file = RESULTS_DIR . '/' . $job_id . '.result.json'; - if (!file_exists($result_file)) { - error('Results not found'); - } - - $result = json_decode(file_get_contents($result_file), true); - - // Load dismissed and overrides - $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; - $override_file = RESULTS_DIR . '/' . $job_id . '.overrides.json'; - $dismissed = file_exists($dismiss_file) ? json_decode(file_get_contents($dismiss_file), true) : []; - $overrides = file_exists($override_file) ? json_decode(file_get_contents($override_file), true) : []; - - // 1. Mark dismissed issues in the issues array - if (!empty($dismissed) && isset($result['issues'])) { - foreach ($result['issues'] as $idx => &$issue) { - if (isset($dismissed[$idx])) { - $issue['dismissed'] = true; - } - } - unset($issue); - } - - // 2. Recalculate score (mirrors JS recalculateScore()) - $bd = $result['score_breakdown'] ?? []; - $origSC = $result['severity_counts'] ?? []; - - $adj_crit = (int)($origSC['critical'] ?? 0); - $adj_err = (int)($origSC['error'] ?? 0); - - // Subtract dismissed CRITICAL / ERROR issues - foreach ($dismissed as $idx => $info) { - $sev = strtoupper($result['issues'][$idx]['severity'] ?? ''); - if ($sev === 'CRITICAL') $adj_crit = max(0, $adj_crit - 1); - if ($sev === 'ERROR') $adj_err = max(0, $adj_err - 1); - } - - $new_penalty = min(20, $adj_crit * 5 + $adj_err * 2); - $checks_total = (int)($bd['checks_total'] ?? 0); - $checks_passed = (int)($bd['checks_passed'] ?? 0); - $new_passed = min($checks_total, $checks_passed + count($overrides)); - $new_base = $checks_total > 0 ? (int)round(100 * $new_passed / $checks_total) : 0; - $new_score = max(0, $new_base - $new_penalty); - - $result['accessibility_score'] = $new_score; - $result['severity_counts']['critical'] = $adj_crit; - $result['severity_counts']['error'] = $adj_err; - $result['score_breakdown']['final_score'] = $new_score; - $result['score_breakdown']['checks_passed'] = $new_passed; - $result['score_breakdown']['base_score'] = $new_base; - $result['score_breakdown']['penalty'] = $new_penalty; - $result['score_breakdown']['adjusted'] = true; - - // 3. Recompute WCAG compliance badges based on non-dismissed CRITICAL/ERROR issues - $wcag_levels = [ - '1.1.1'=>'A','1.2.1'=>'A','1.2.2'=>'A','1.2.3'=>'A', - '1.2.4'=>'AA','1.2.5'=>'AA', - '1.3.1'=>'A','1.3.2'=>'A','1.3.3'=>'A', - '1.3.4'=>'AA','1.3.5'=>'AA', - '1.4.1'=>'A','1.4.2'=>'A', - '1.4.3'=>'AA','1.4.4'=>'AA','1.4.5'=>'AA', - '1.4.10'=>'AA','1.4.11'=>'AA','1.4.12'=>'AA','1.4.13'=>'AA', - '2.1.1'=>'A','2.1.2'=>'A','2.1.4'=>'A', - '2.2.1'=>'A','2.2.2'=>'A', - '2.3.1'=>'A', - '2.4.1'=>'A','2.4.2'=>'A','2.4.3'=>'A','2.4.4'=>'A', - '2.4.5'=>'AA','2.4.6'=>'AA','2.4.7'=>'AA', - '2.5.1'=>'A','2.5.2'=>'A','2.5.3'=>'A','2.5.4'=>'A', - '3.1.1'=>'A','3.1.2'=>'AA', - '3.2.1'=>'A','3.2.2'=>'A','3.2.3'=>'AA','3.2.4'=>'AA', - '3.3.1'=>'A','3.3.2'=>'A','3.3.3'=>'AA','3.3.4'=>'AA', - '4.1.1'=>'A','4.1.2'=>'A','4.1.3'=>'AA', - ]; - $failing_a = []; - $failing_aa = []; - if (isset($result['issues'])) { - foreach ($result['issues'] as $issue) { - if (!empty($issue['dismissed'])) continue; - $sev = strtoupper($issue['severity'] ?? ''); - if ($sev !== 'CRITICAL' && $sev !== 'ERROR') continue; - $crit = $issue['wcag_criterion'] ?? ''; - if (!$crit || !isset($wcag_levels[$crit])) continue; - $lvl = $wcag_levels[$crit]; - if ($lvl === 'A' && !in_array($crit, $failing_a)) $failing_a[] = $crit; - if ($lvl === 'AA' && !in_array($crit, $failing_aa)) $failing_aa[] = $crit; - } - } - $result['wcag_compliance']['level_a'] = empty($failing_a); - $result['wcag_compliance']['level_aa'] = empty($failing_a) && empty($failing_aa); - $result['wcag_compliance']['level_a_failures'] = $failing_a; - $result['wcag_compliance']['level_aa_failures'] = $failing_aa; - - // 4. Mark overridden checks in checks_performed - if (!empty($overrides) && isset($result['checks_performed'])) { - foreach ($result['checks_performed'] as &$check) { - if (isset($overrides[$check['name']])) { - $check['passed'] = true; - $check['manual'] = true; - } - } - unset($check); - } - - // 5. Update Matterhorn checkpoints for H-type CPs linked to overridden checks - $check_to_cp = [ - 'Color Contrast' => ['04'], - 'Image Accessibility' => ['13'], - 'Heading Structure' => ['14'], - ]; - $cp_to_check = []; - foreach ($check_to_cp as $checkName => $cpIds) { - foreach ($cpIds as $cpId) { - $cp_to_check[$cpId] = $checkName; - } - } - - if (!empty($overrides) && isset($result['matterhorn_summary']['checkpoints'])) { - foreach ($result['matterhorn_summary']['checkpoints'] as &$cp) { - $cpId = $cp['id']; - if (isset($cp_to_check[$cpId]) && isset($overrides[$cp_to_check[$cpId]])) { - $cp['status'] = 'PASS'; - $cp['manual'] = true; - } - } - unset($cp); - - // Recompute overall_passed - $all_pass = true; - foreach ($result['matterhorn_summary']['checkpoints'] as $cp) { - if ($cp['status'] === 'FAIL') { $all_pass = false; break; } - } - $result['matterhorn_summary']['overall_passed'] = $all_pass; - } - - $adj_file = RESULTS_DIR . '/' . $job_id . '.adjusted.json'; - file_put_contents($adj_file, json_encode($result)); - - success(['saved' => true, 'score' => $new_score]); -} - -/** - * Dismiss an issue (mark as false positive) - */ -function handleDismiss() { - $data = json_decode(file_get_contents('php://input'), true) ?: []; - $job_id = $data['job_id'] ?? ''; - $issue_index = isset($data['issue_index']) ? (int)$data['issue_index'] : -1; - $reason = substr($data['reason'] ?? '', 0, 255); - - if (empty($job_id) || $issue_index < 0) { - error('job_id and issue_index required'); - } - $job_id = sanitizeJobId($job_id); - - $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; - if (!file_exists($meta_file)) { - error('Job not found'); - } - - $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; - $dismissed = file_exists($dismiss_file) ? json_decode(file_get_contents($dismiss_file), true) : []; - $dismissed[$issue_index] = ['reason' => $reason, 'dismissed_at' => date('Y-m-d H:i:s')]; - file_put_contents($dismiss_file, json_encode($dismissed)); - - success(['dismissed' => true, 'issue_index' => $issue_index]); -} - -/** - * Undismiss an issue - */ -function handleUndismiss() { - $data = json_decode(file_get_contents('php://input'), true) ?: []; - $job_id = $data['job_id'] ?? ''; - $issue_index = isset($data['issue_index']) ? (int)$data['issue_index'] : -1; - - if (empty($job_id) || $issue_index < 0) { - error('job_id and issue_index required'); - } - $job_id = sanitizeJobId($job_id); - - $dismiss_file = RESULTS_DIR . '/' . $job_id . '.dismissed.json'; - if (file_exists($dismiss_file)) { - $dismissed = json_decode(file_get_contents($dismiss_file), true); - unset($dismissed[$issue_index]); - file_put_contents($dismiss_file, json_encode($dismissed)); - } - - success(['undismissed' => true, 'issue_index' => $issue_index]); -} - -/** - * Override a check (mark as manually passed) - */ -function handleOverrideCheck() { - $data = json_decode(file_get_contents('php://input'), true) ?: []; - $job_id = $data['job_id'] ?? ''; - $check_name = strip_tags(substr($data['check_name'] ?? '', 0, 200)); - - if (empty($job_id) || empty($check_name)) { - error('job_id and check_name required'); - } - $job_id = sanitizeJobId($job_id); - - $meta_file = RESULTS_DIR . '/' . $job_id . '.meta.json'; - if (!file_exists($meta_file)) { - error('Job not found'); - } - - $override_file = RESULTS_DIR . '/' . $job_id . '.overrides.json'; - $overrides = file_exists($override_file) ? json_decode(file_get_contents($override_file), true) : []; - $overrides[$check_name] = ['overridden_at' => date('Y-m-d H:i:s')]; - file_put_contents($override_file, json_encode($overrides)); - - success(['overridden' => true, 'check_name' => $check_name]); -} - -/** - * Remove a check override - */ -function handleUnoverrideCheck() { - $data = json_decode(file_get_contents('php://input'), true) ?: []; - $job_id = $data['job_id'] ?? ''; - $check_name = strip_tags(substr($data['check_name'] ?? '', 0, 200)); - - if (empty($job_id) || empty($check_name)) { - error('job_id and check_name required'); - } - $job_id = sanitizeJobId($job_id); - - $override_file = RESULTS_DIR . '/' . $job_id . '.overrides.json'; - if (file_exists($override_file)) { - $overrides = json_decode(file_get_contents($override_file), true); - unset($overrides[$check_name]); - file_put_contents($override_file, json_encode($overrides)); - } - - success(['unoverridden' => true, 'check_name' => $check_name]); -} - -/** - * Send success response - */ -function success($data) { - echo json_encode([ - 'success' => true, - 'data' => $data - ]); - exit; -} - -/** - * Send error response - */ -function error($message) { - http_response_code(400); - echo json_encode([ - 'success' => false, - 'error' => $message - ]); - exit; -} diff --git a/auth.php b/auth.php deleted file mode 100644 index eb0f40c..0000000 --- a/auth.php +++ /dev/null @@ -1,198 +0,0 @@ - - * - X-API-Key: - * - Query parameter: ?api_key= (dev only) - */ - -/** - * Check if request is authenticated - * - * @return bool True if authenticated, false otherwise - */ -function authenticate() { - // Development mode: allow localhost without auth - if (isDevelopmentMode()) { - return true; - } - - $api_key = extractApiKey(); - - if (!$api_key) { - return false; - } - - // Validate against configured keys - $valid_keys = getValidApiKeys(); - - return in_array($api_key, $valid_keys, true); -} - -/** - * Check if running in development mode (localhost) - * - * @return bool True if development mode - */ -function isDevelopmentMode() { - // DEV_MODE env var explicitly bypasses auth (set in Apache/env config) - $dev_mode = getenv('DEV_MODE'); - return ($dev_mode === 'true' || $dev_mode === '1'); -} - -/** - * Extract API key from request - * - * Checks multiple sources in order of security: - * 1. Authorization: Bearer header - * 2. X-API-Key header - * 3. Query parameter (least secure, for dev only) - * - * @return string|null API key or null if not found - */ -function extractApiKey() { - // Check Authorization: Bearer header - if (isset($_SERVER['HTTP_AUTHORIZATION'])) { - if (preg_match('/Bearer\s+(.*)$/i', $_SERVER['HTTP_AUTHORIZATION'], $matches)) { - return trim($matches[1]); - } - } - - // Check X-API-Key header - if (isset($_SERVER['HTTP_X_API_KEY'])) { - return trim($_SERVER['HTTP_X_API_KEY']); - } - - // Check query parameter (least secure - dev only) - if (isDevelopmentMode() && isset($_GET['api_key'])) { - return trim($_GET['api_key']); - } - - return null; -} - -/** - * Get list of valid API keys - * - * Loads keys from: - * 1. Environment variable API_KEY - * 2. .api_keys file (one key per line) - * 3. Default dev key (for development only) - * - * @return array List of valid API keys - */ -function getValidApiKeys() { - $keys = []; - - // Load from environment variable - $env_key = getenv('API_KEY'); - if ($env_key) { - $keys[] = $env_key; - } - - // Load from .api_keys file - $config_file = __DIR__ . '/.api_keys'; - if (file_exists($config_file)) { - $file_keys = file($config_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); - if ($file_keys) { - // Filter out comments and empty lines - $file_keys = array_filter($file_keys, function($line) { - $line = trim($line); - return $line && substr($line, 0, 1) !== '#'; - }); - $keys = array_merge($keys, array_values($file_keys)); - } - } - - // Fallback to dev key only in development mode - if (empty($keys) && isDevelopmentMode()) { - error_log("WARNING: Using default dev API key. Configure proper API keys for production!"); - $keys[] = 'dev_key_12345'; - } - - return array_unique($keys); -} - -/** - * Send error response and exit - * - * @param string $message Error message - * @param int $status_code HTTP status code - */ -function sendUnauthorizedResponse($message = "Unauthorized", $status_code = 401) { - http_response_code($status_code); - header('Content-Type: application/json'); - header('WWW-Authenticate: Bearer realm="API"'); - - echo json_encode([ - 'success' => false, - 'error' => $message, - 'status' => $status_code - ]); - - exit; -} - -/** - * Require authentication or send error - * - * Call this at the beginning of protected endpoints - */ -function requireAuth() { - if (!authenticate()) { - sendUnauthorizedResponse("Valid API key required"); - } -} - -/** - * Generate a new random API key - * - * @return string 64-character hex API key - */ -function generateApiKey() { - return bin2hex(random_bytes(32)); -} - -// Example usage (for testing): -if (basename(__FILE__) == basename($_SERVER['SCRIPT_FILENAME'])) { - header('Content-Type: text/plain'); - echo "PDF Accessibility Checker - Authentication Module\n"; - echo "=================================================\n\n"; - - if (isset($_GET['generate'])) { - echo "New API Key:\n"; - echo generateApiKey() . "\n\n"; - echo "Add this to your .api_keys file or API_KEY environment variable.\n"; - } else if (isset($_GET['test'])) { - echo "Testing authentication...\n\n"; - - $api_key = extractApiKey(); - if ($api_key) { - echo "API Key found: " . substr($api_key, 0, 8) . "...\n"; - - if (authenticate()) { - echo "βœ… Authentication successful!\n"; - } else { - echo "❌ Authentication failed - invalid key\n"; - } - } else { - echo "❌ No API key provided\n"; - echo "\nTry:\n"; - echo " - Add header: X-API-Key: \n"; - echo " - Or query param: ?api_key=&test=1\n"; - } - - echo "\nValid keys configured: " . count(getValidApiKeys()) . "\n"; - } else { - echo "Available actions:\n"; - echo " ?generate - Generate new API key\n"; - echo " ?test - Test authentication\n"; - echo "\nExample:\n"; - echo " php auth.php?generate\n"; - echo " curl -H 'X-API-Key: your-key' http://localhost:8000/auth.php?test\n"; - } -} -?> diff --git a/cloudbuild.yaml b/cloudbuild.yaml deleted file mode 100644 index 69a60ff..0000000 --- a/cloudbuild.yaml +++ /dev/null @@ -1,14 +0,0 @@ -steps: - - name: 'gcr.io/cloud-builders/docker' - args: - - 'build' - - '-t' - - 'us-central1-docker.pkg.dev/optical-414516/pdf-accessibility/checker:latest' - - '-f' - - 'Dockerfile.cloudrun' - - '.' - -images: - - 'us-central1-docker.pkg.dev/optical-414516/pdf-accessibility/checker:latest' - -timeout: '600s' diff --git a/cloudrun_service.py b/cloudrun_service.py index 5b4f6f5..201db22 100644 --- a/cloudrun_service.py +++ b/cloudrun_service.py @@ -26,7 +26,7 @@ logger = logging.getLogger('cloudrun') app = Flask(__name__) -GCS_BUCKET_NAME = os.getenv('GCS_BUCKET_NAME', 'optical-pdf-images') +GCS_BUCKET_NAME = os.getenv('STORAGE_BUCKET', 'pdf-pages') def upload_images_to_gcs(images_dir: Path, job_id: str) -> dict: diff --git a/css/styles.css b/css/styles.css index 297e895..f0cf5af 100644 --- a/css/styles.css +++ b/css/styles.css @@ -1,7 +1,7 @@ /* Enterprise PDF Accessibility Checker β€” Redesigned */ /* Aesthetic: Precision Observatory β€” utilitarian elegance with warm accents */ -@import url('https://fonts.googleapis.com/css2?family=Montserrat:wght@300;400;500;600;700;800&display=swap'); +@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap'); *, *::before, *::after { margin: 0; @@ -9,18 +9,18 @@ box-sizing: border-box; } -/* ── Design Tokens β€” Oliver Branding ── */ +/* ── Design Tokens β€” Aimpress ── */ :root { /* Typography */ - --font-display: 'Montserrat', sans-serif; - --font-body: 'Montserrat', sans-serif; + --font-display: 'Inter', sans-serif; + --font-body: 'Inter', sans-serif; - /* Core palette β€” Oliver yellow + black */ - --accent: #FFC407; - --accent-hover: #e6b006; - --accent-glow: rgba(255, 196, 7, 0.2); - --accent-subtle: rgba(255, 196, 7, 0.08); - --accent-text: #000000; /* text on accent backgrounds */ + /* Core palette β€” Aimpress indigo */ + --accent: #6366F1; + --accent-hover: #4F46E5; + --accent-glow: rgba(99, 102, 241, 0.2); + --accent-subtle: rgba(99, 102, 241, 0.08); + --accent-text: #ffffff; /* text on accent backgrounds */ /* Semantic */ --success: #059669; @@ -48,7 +48,7 @@ --border-subtle: #eae8e4; --divider: #d4d0ca; --log-bg: #faf9f7; - --primary: #FFC407; + --primary: #6366F1; --primary-dark: #e6b006; --black: #1a1a1a; @@ -84,10 +84,10 @@ --border-subtle: #2a2a2a; --divider: #303030; --log-bg: #121212; - --primary: #FFC407; + --primary: #6366F1; --primary-dark: #ffd54f; --black: #f0f0f0; - --accent: #FFC407; + --accent: #6366F1; --accent-hover: #ffd54f; --accent-glow: rgba(255, 196, 7, 0.25); --accent-subtle: rgba(255, 196, 7, 0.1); diff --git a/deploy.sh b/deploy.sh deleted file mode 100755 index 90ac447..0000000 --- a/deploy.sh +++ /dev/null @@ -1,226 +0,0 @@ -#!/usr/bin/env bash -# -# deploy.sh β€” Idempotent deployment script for PDF Accessibility Checker -# -# Usage: -# cd /opt/pdf-accessibility && ./deploy.sh -# -# Architecture: -# - Apache (host) serves frontend + api.php from /var/www/html/pdf-accessibility -# - Docker Compose runs: PostgreSQL -# - PDF processing via Google Cloud Run (synchronous HTTP call from api.php) -# -set -euo pipefail - -# ── Configuration ───────────────────────────────────────────────── - -REPO_DIR="$(cd "$(dirname "$0")" && pwd)" -WEB_DIR="/var/www/html/pdf-accessibility" -COMPOSE_FILE="docker-compose.prod.yml" -ENV_FILE="${REPO_DIR}/.env" -MIN_PHP_VERSION="8.0" - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -log() { echo -e "${GREEN}[DEPLOY]${NC} $*"; } -warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } -err() { echo -e "${RED}[ERROR]${NC} $*"; } - -# ── Preflight Checks ───────────────────────────────────────────── - -log "Starting deployment from ${REPO_DIR}" - -# Check Docker -if ! command -v docker &>/dev/null; then - err "Docker is not installed. Install it first:" - err " curl -fsSL https://get.docker.com | sh" - err " sudo usermod -aG docker \$USER" - exit 1 -fi - -# Check Docker Compose (v2 plugin) -if ! docker compose version &>/dev/null; then - err "Docker Compose v2 is not available. Install it:" - err " sudo apt-get install docker-compose-plugin" - exit 1 -fi - -# Check PHP -if ! command -v php &>/dev/null; then - warn "PHP is not installed. api.php requires PHP ${MIN_PHP_VERSION}+ with extensions:" - warn " sudo apt-get install php8.2 php8.2-pgsql php8.2-curl php8.2-mbstring" -else - PHP_VER=$(php -r 'echo PHP_MAJOR_VERSION . "." . PHP_MINOR_VERSION;') - log "PHP version: ${PHP_VER}" - - # Check required extensions - MISSING_EXT="" - php -m | grep -qi pgsql || MISSING_EXT="${MISSING_EXT} php-pgsql" - php -m | grep -qi curl || MISSING_EXT="${MISSING_EXT} php-curl" - php -m | grep -qi openssl || MISSING_EXT="${MISSING_EXT} php-openssl" - - if [ -n "${MISSING_EXT}" ]; then - warn "Missing PHP extensions:${MISSING_EXT}" - warn "Install with: sudo apt-get install${MISSING_EXT}" - fi -fi - -# ── Pull Latest Code ───────────────────────────────────────────── - -log "Pulling latest code..." -cd "${REPO_DIR}" - -if [ -d .git ]; then - git config core.fileMode false - # Run git as the repo owner (not root) so SSH keys work - REPO_OWNER=$(stat -c '%U' "${REPO_DIR}/.git") - if [ "$(id -u)" = "0" ] && [ "${REPO_OWNER}" != "root" ]; then - sudo -u "${REPO_OWNER}" git -C "${REPO_DIR}" fetch --all - sudo -u "${REPO_OWNER}" git -C "${REPO_DIR}" reset --hard origin/$(git rev-parse --abbrev-ref HEAD) - else - git fetch --all - git reset --hard origin/$(git rev-parse --abbrev-ref HEAD) - fi - log "Code updated to $(git log --oneline -1)" -else - warn "Not a git repo β€” using existing files" -fi - -# ── Environment File ───────────────────────────────────────────── - -if [ ! -f "${ENV_FILE}" ]; then - log "Creating .env from .env.example (first run)..." - cp "${REPO_DIR}/.env.example" "${ENV_FILE}" - - # Override Docker hostnames with localhost for host-side PHP - sed -i 's/^DB_HOST=postgres/DB_HOST=127.0.0.1/' "${ENV_FILE}" - sed -i 's/^DEV_MODE=true/DEV_MODE=false/' "${ENV_FILE}" - - warn "Review and update ${ENV_FILE} with production values:" - warn " - DB_PASSWORD (change from default!)" - warn " - ANTHROPIC_API_KEY" - warn " - GOOGLE_API_KEY" - warn " - CLOUD_RUN_URL" - warn " - GCP_SA_KEY_PATH (copy pdf-api-invoker-key.json to server)" - warn " - AZURE_* settings" -else - log "Using existing .env file" -fi - -# ── Build Docker Containers ────────────────────────────────────── - -log "Building Docker containers (using cache)..." -docker compose -f "${COMPOSE_FILE}" build - -log "Starting/restarting Docker services..." -docker compose -f "${COMPOSE_FILE}" up -d --remove-orphans - -# Wait for PostgreSQL to be ready -log "Waiting for PostgreSQL to be healthy..." -RETRIES=30 -until docker compose -f "${COMPOSE_FILE}" exec -T postgres pg_isready -U pdf_checker &>/dev/null || [ $RETRIES -eq 0 ]; do - sleep 1 - RETRIES=$((RETRIES - 1)) -done - -if [ $RETRIES -eq 0 ]; then - err "PostgreSQL failed to start. Check logs:" - err " docker compose -f ${COMPOSE_FILE} logs postgres" - exit 1 -fi - -log "PostgreSQL is ready" - -# Database init.sql runs automatically on first compose up via -# /docker-entrypoint-initdb.d/init.sql β€” no migration tool needed. -# For future migrations, add numbered SQL files to db/ and apply: -if [ -d "${REPO_DIR}/db/migrations" ]; then - for migration in "${REPO_DIR}"/db/migrations/*.sql; do - [ -f "$migration" ] || continue - MIGRATION_NAME=$(basename "$migration") - log "Applying migration: ${MIGRATION_NAME}" - docker compose -f "${COMPOSE_FILE}" exec -T postgres \ - psql -U pdf_checker -d pdf_checker -f "/dev/stdin" < "$migration" 2>/dev/null || \ - warn "Migration ${MIGRATION_NAME} may have already been applied" - done -fi - -# ── Deploy Frontend Files ───────────────────────────────────────── - -log "Deploying frontend to ${WEB_DIR}..." - -# Create web directory if it doesn't exist -sudo mkdir -p "${WEB_DIR}" - -# Clean old frontend files (but preserve uploads, results, .env, logs) -log "Cleaning old frontend files..." -sudo rm -f "${WEB_DIR}/index.html" "${WEB_DIR}/history.html" -sudo rm -rf "${WEB_DIR}/css" "${WEB_DIR}/js" -sudo rm -f "${WEB_DIR}/api.php" "${WEB_DIR}/auth.php" - -# Copy frontend files -sudo cp "${REPO_DIR}/index.html" "${WEB_DIR}/" -sudo cp "${REPO_DIR}/history.html" "${WEB_DIR}/" -sudo cp -r "${REPO_DIR}/css" "${WEB_DIR}/" -sudo cp -r "${REPO_DIR}/js" "${WEB_DIR}/" - -# Copy PHP backend files -sudo cp "${REPO_DIR}/api.php" "${WEB_DIR}/" -sudo cp "${REPO_DIR}/auth.php" "${WEB_DIR}/" - -# Copy Python scripts (needed if api.php fallback exec() is used) -sudo cp "${REPO_DIR}/enterprise_pdf_checker.py" "${WEB_DIR}/" -sudo cp "${REPO_DIR}/pdf_remediation.py" "${WEB_DIR}/" -sudo cp "${REPO_DIR}/logger_config.py" "${WEB_DIR}/" -sudo cp "${REPO_DIR}/retry_helper.py" "${WEB_DIR}/" - -# Copy .env for PHP (if not already there) -if [ ! -f "${WEB_DIR}/.env" ]; then - sudo cp "${ENV_FILE}" "${WEB_DIR}/.env" - log "Copied .env to web directory" -else - # Update .env in web dir from repo .env - sudo cp "${ENV_FILE}" "${WEB_DIR}/.env" -fi - -# Create runtime directories -sudo mkdir -p "${WEB_DIR}/uploads" "${WEB_DIR}/results" "${WEB_DIR}/logs" "${WEB_DIR}/rate_limits" - -# Set ownership for Apache -sudo chown -R www-data:www-data "${WEB_DIR}" -sudo chmod -R 755 "${WEB_DIR}" -sudo chmod -R 775 "${WEB_DIR}/uploads" "${WEB_DIR}/results" "${WEB_DIR}/logs" "${WEB_DIR}/rate_limits" - -# ── Verify ──────────────────────────────────────────────────────── - -log "" -log "=============================================" -log " Deployment complete!" -log "=============================================" -log "" -log "Services status:" -docker compose -f "${COMPOSE_FILE}" ps --format "table {{.Name}}\t{{.Status}}\t{{.Ports}}" -log "" -log "Frontend: ${WEB_DIR}" -log "Docker: PostgreSQL (127.0.0.1:1221)" -log "Cloud Run: ${CLOUD_RUN_URL:-$(grep '^CLOUD_RUN_URL=' "${ENV_FILE}" 2>/dev/null | cut -d= -f2 || echo 'not set')}" -log "" - -# Quick health check -if docker compose -f "${COMPOSE_FILE}" exec -T postgres pg_isready -U pdf_checker &>/dev/null; then - log "PostgreSQL: OK" -fi - -log "" -log "Reloading Apache..." -sudo systemctl reload apache2 && log "Apache reloaded" || warn "Apache reload failed β€” run: sudo systemctl reload apache2" - -log "" -log "Next steps (if first deploy):" -log " 1. Ensure pdf-api-invoker-key.json is at the GCP_SA_KEY_PATH location" -log " 2. Review ${WEB_DIR}/.env (especially CLOUD_RUN_URL and API keys)" -log "" diff --git a/docker-entrypoint-web.sh b/docker-entrypoint-web.sh deleted file mode 100644 index 20506a7..0000000 --- a/docker-entrypoint-web.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh -set -e - -# Allow PHP-FPM to inherit environment variables (needed for getenv() in PHP) -# By default PHP-FPM clears the environment; this disables that behavior -echo 'clear_env = no' >> /usr/local/etc/php-fpm.d/www.conf - -# 15-minute timeout for Cloud Run PDF processing -echo 'request_terminate_timeout = 900' >> /usr/local/etc/php-fpm.d/www.conf - -# Start PHP-FPM in background -php-fpm -D - -# Start Nginx in foreground -nginx -g 'daemon off;' diff --git a/history.html b/history.html deleted file mode 100644 index 0e1150f..0000000 --- a/history.html +++ /dev/null @@ -1,71 +0,0 @@ - - - - - - My Documents β€” PDF Accessibility Checker - - - - - - - - - - - - - -
-
-
-
-

Enterprise PDF Accessibility Checker

-

Comprehensive WCAG 2.1 compliance validation with AI-powered analysis

-
-
- ⬆ New Check - - - -
-
-
-
- -
-
- -
-
- - - - - - - diff --git a/index.html b/index.html deleted file mode 100644 index 6462d85..0000000 --- a/index.html +++ /dev/null @@ -1,266 +0,0 @@ - - - - - - Enterprise PDF Accessibility Checker - - - - - - - - - - - - - - -
-
-
-
-

Enterprise PDF Accessibility Checker

-

Comprehensive WCAG 2.1 compliance validation with AI-powered analysis

-
-
- - - - -
-
-
-
-
-
- - -
-

Upload PDF Document

- -
- - -
- -
-
-
📄
-
Drop your PDF here or click to browse
-
Maximum file size: 50MB
- -
-
-
-
- - -
-
- - - -
-

Check Options

-
- - -
-
- Quick mode runs basic checks only β€” great for initial scans. Completes in ~10 seconds vs ~2 minutes. -
-
- -
-
-
Uploading...
-
0%
-
-
-
-
- -
-
Processing Details
-
-
Initializing...
-
-
-
-
- - -
-
-
-

Accessibility Report

-
- - - - -
-
- -
-
- -- - -
-
-
Accessibility Score
- -
-
- -
- -
-
- - - - - - - - - - - - - -
-

Issues & Recommendations

- - - -
- -
- Review complete β€” check another document or export your report. - -
-
-
-
-
- - - - - - - - - - - diff --git a/js/api.js b/js/api.js deleted file mode 100644 index 8d4bdbb..0000000 --- a/js/api.js +++ /dev/null @@ -1,86 +0,0 @@ -/* API communication layer */ - -const API_BASE = 'api.php'; - -async function apiCall(action, options = {}) { - const { method = 'GET', body = null, params = {} } = options; - - let url = API_BASE; - const queryParams = new URLSearchParams({ action, ...params }); - - if (method === 'GET') { - url += '?' + queryParams.toString(); - } - - const headers = {}; - - // Add MSAL token if available - if (window.msalToken) { - headers['Authorization'] = 'Bearer ' + window.msalToken; - } - - const fetchOptions = { method, headers }; - if (body) { - if (body instanceof FormData) { - body.append('action', action); - fetchOptions.body = body; - } else { - fetchOptions.body = body; - } - } - - const response = await fetch(url, fetchOptions); - return response.json(); -} - -async function uploadFile(file) { - const formData = new FormData(); - formData.append('pdf', file); - return apiCall('upload', { method: 'POST', body: formData }); -} - -async function startCheck(jobId, quickMode) { - const formData = new FormData(); - formData.append('job_id', jobId); - if (quickMode) formData.append('quick_mode', '1'); - return apiCall('check', { method: 'POST', body: formData }); -} - -async function checkStatus(jobId) { - return apiCall('status', { params: { job_id: jobId } }); -} - -async function getResult(jobId) { - return apiCall('result', { params: { job_id: jobId } }); -} - -async function getDebugInfo(jobId) { - return apiCall('debug', { params: { job_id: jobId } }); -} - -async function remediatePdf(jobId) { - const formData = new FormData(); - formData.append('job_id', jobId); - return apiCall('remediate', { method: 'POST', body: formData }); -} - -async function getStats() { - return apiCall('stats'); -} - -async function uploadBatch(files) { - const formData = new FormData(); - for (let i = 0; i < files.length; i++) { - formData.append('pdfs[]', files[i]); - } - return apiCall('batch_upload', { method: 'POST', body: formData }); -} - -async function checkBatchStatus(batchId) { - return apiCall('batch_status', { params: { batch_id: batchId } }); -} - -function getExportUrl(jobId, format) { - const params = new URLSearchParams({ action: 'export', job_id: jobId, format: format }); - return API_BASE + '?' + params.toString(); -} diff --git a/js/app-history.js b/js/app-history.js deleted file mode 100644 index 4f44038..0000000 --- a/js/app-history.js +++ /dev/null @@ -1,96 +0,0 @@ -/* MSAL auth + init for history.html */ - -const msalConfig = { - auth: { - clientId: '', - authority: '', - redirectUri: window.location.origin + window.location.pathname - }, - cache: { cacheLocation: 'localStorage', storeAuthStateInCookie: false } -}; - -let msalInstance = null; -window.msalToken = null; - -function initMsal() { - const el = document.getElementById('msalConfig'); - if (!el) return; - const tenantId = el.dataset.tenantId; - const clientId = el.dataset.clientId; - const redirectUri = el.dataset.redirectUri; - if (!tenantId || !clientId) return; - - msalConfig.auth.clientId = clientId; - msalConfig.auth.authority = `https://login.microsoftonline.com/${tenantId}`; - if (redirectUri) msalConfig.auth.redirectUri = redirectUri; - - const script = document.createElement('script'); - script.src = 'https://cdn.jsdelivr.net/npm/@azure/msal-browser@2/lib/msal-browser.min.js'; - script.onload = () => { - msalInstance = new msal.PublicClientApplication(msalConfig); - msalInstance.initialize().then(handleMsalRedirect); - }; - document.head.appendChild(script); -} - -async function handleMsalRedirect() { - try { - const response = await msalInstance.handleRedirectPromise(); - if (response) { - window.msalToken = response.accessToken; - showAuthenticatedUI(response.account); - return; - } - } catch (e) { console.error('MSAL redirect error:', e); } - - const accounts = msalInstance.getAllAccounts(); - if (accounts.length > 0) { - try { - const tokenResponse = await msalInstance.acquireTokenSilent({ scopes: ['User.Read'], account: accounts[0] }); - window.msalToken = tokenResponse.accessToken; - showAuthenticatedUI(accounts[0]); - } catch (e) { showLoginUI(); } - } else { - if (window.location.hostname === 'localhost' || window.location.hostname === '127.0.0.1') { - showAuthenticatedUI(null); - } else { - showLoginUI(); - } - } -} - -function showLoginUI() { - const overlay = document.getElementById('authOverlay'); - if (overlay) overlay.classList.add('active'); -} - -function showAuthenticatedUI(account) { - const overlay = document.getElementById('authOverlay'); - if (overlay) overlay.classList.remove('active'); - - const userInfo = document.getElementById('userInfo'); - if (userInfo && account) userInfo.textContent = account.name || account.username; - - const logoutBtn = document.getElementById('logoutBtn'); - if (logoutBtn) logoutBtn.style.display = 'inline-block'; - - const historySection = document.getElementById('historySection'); - if (historySection) historySection.style.display = ''; - - loadHistory(); -} - -async function loginWithMicrosoft() { - if (!msalInstance) return; - try { await msalInstance.loginRedirect({ scopes: ['User.Read'] }); } - catch (e) { console.error('Login failed:', e); alert('Login failed. Please try again.'); } -} - -function logout() { - if (msalInstance) msalInstance.logoutRedirect(); -} - -document.addEventListener('DOMContentLoaded', () => { - loadTheme(); // from utils.js β€” sets data-theme on :root - initMsal(); -}); diff --git a/js/app.js b/js/app.js deleted file mode 100644 index 61d71b4..0000000 --- a/js/app.js +++ /dev/null @@ -1,154 +0,0 @@ -/* App initialization and MSAL authentication */ - -// MSAL configuration -const msalConfig = { - auth: { - clientId: '', // Set from data attribute or env - authority: '', - redirectUri: window.location.origin + window.location.pathname - }, - cache: { - cacheLocation: 'localStorage', - storeAuthStateInCookie: false - } -}; - -let msalInstance = null; -window.msalToken = null; - -function initMsal() { - const el = document.getElementById('msalConfig'); - if (!el) return; - - const tenantId = el.dataset.tenantId; - const clientId = el.dataset.clientId; - const redirectUri = el.dataset.redirectUri; - - if (!tenantId || !clientId) return; - - msalConfig.auth.clientId = clientId; - msalConfig.auth.authority = `https://login.microsoftonline.com/${tenantId}`; - if (redirectUri) msalConfig.auth.redirectUri = redirectUri; - - // Load MSAL library dynamically - const script = document.createElement('script'); - script.src = 'https://cdn.jsdelivr.net/npm/@azure/msal-browser@2/lib/msal-browser.min.js'; - script.onload = () => { - msalInstance = new msal.PublicClientApplication(msalConfig); - msalInstance.initialize().then(() => { - handleMsalRedirect(); - }); - }; - document.head.appendChild(script); -} - -async function handleMsalRedirect() { - try { - const response = await msalInstance.handleRedirectPromise(); - if (response) { - window.msalToken = response.accessToken; - showAuthenticatedUI(response.account); - return; - } - } catch (e) { - console.error('MSAL redirect error:', e); - } - - // Check for existing session - const accounts = msalInstance.getAllAccounts(); - if (accounts.length > 0) { - try { - const tokenResponse = await msalInstance.acquireTokenSilent({ - scopes: ['User.Read'], - account: accounts[0] - }); - window.msalToken = tokenResponse.accessToken; - showAuthenticatedUI(accounts[0]); - } catch (e) { - // Token expired, show login - showLoginUI(); - } - } else { - // Check if we're in dev mode (localhost) β€” skip MSAL - if (window.location.hostname === 'localhost' || window.location.hostname === '127.0.0.1') { - hideAuthOverlay(); - } else { - showLoginUI(); - } - } -} - -function showLoginUI() { - const overlay = document.getElementById('authOverlay'); - if (overlay) overlay.classList.add('active'); -} - -function hideAuthOverlay() { - const overlay = document.getElementById('authOverlay'); - if (overlay) overlay.classList.remove('active'); -} - -function showAuthenticatedUI(account) { - hideAuthOverlay(); - const userInfo = document.getElementById('userInfo'); - if (userInfo && account) { - userInfo.textContent = account.name || account.username; - } - const logoutBtn = document.getElementById('logoutBtn'); - if (logoutBtn) logoutBtn.style.display = 'inline-block'; - - // Show My Documents link in header - const historyLink = document.getElementById('historyLink'); - if (historyLink) historyLink.style.display = 'inline-block'; - - // If URL has ?job_id= open that report directly - const params = new URLSearchParams(window.location.search); - const jobId = params.get('job_id'); - if (jobId) openHistoryJob(jobId); -} - -async function openHistoryJob(jobId) { - currentJobId = jobId; - const uploadSection = document.getElementById('uploadSection'); - const resultsSection = document.getElementById('resultsSection'); - if (uploadSection) uploadSection.style.display = 'none'; - if (resultsSection) resultsSection.style.display = ''; - - try { - const resp = await getResult(jobId); - const result = resp?.data || resp; - if (!result || result.error) { - alert('Could not load report: ' + (result?.error || 'Unknown error')); - return; - } - displayResults(result); - if (resultsSection) resultsSection.scrollIntoView({ behavior: 'smooth' }); - } catch (e) { - console.error('openHistoryJob failed:', e); - alert('Failed to load report.'); - } -} - -async function loginWithMicrosoft() { - if (!msalInstance) return; - try { - await msalInstance.loginRedirect({ scopes: ['User.Read'] }); - } catch (e) { - console.error('Login failed:', e); - alert('Login failed. Please try again.'); - } -} - -function logout() { - if (msalInstance) { - msalInstance.logoutRedirect(); - } -} - -/* App init */ -document.addEventListener('DOMContentLoaded', () => { - loadTheme(); - initUpload(); - initBatchUpload(); - initMsal(); -}); diff --git a/nginx.conf b/nginx.conf deleted file mode 100644 index 2275361..0000000 --- a/nginx.conf +++ /dev/null @@ -1,42 +0,0 @@ -server { - listen 80; - server_name _; - root /app; - index index.html; - - client_max_body_size 55M; - - # Serve static files directly - location / { - try_files $uri $uri/ /index.html; - } - - # PHP processing - location ~ \.php$ { - fastcgi_pass 127.0.0.1:9000; - fastcgi_index index.php; - fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name; - include fastcgi_params; - - # 15-minute timeout for Cloud Run PDF processing - fastcgi_read_timeout 900s; - fastcgi_send_timeout 900s; - } - - # Serve page images from results - location /results/ { - alias /app/results/; - expires 1d; - add_header Cache-Control "public, immutable"; - } - - # Security headers - add_header X-Content-Type-Options "nosniff" always; - add_header X-Frame-Options "DENY" always; - add_header X-XSS-Protection "1; mode=block" always; - - # Deny access to hidden files - location ~ /\. { - deny all; - } -} diff --git a/report_generator.py b/report_generator.py index e596e75..ec534ac 100644 --- a/report_generator.py +++ b/report_generator.py @@ -195,10 +195,10 @@ def generate_html(data: dict) -> str: