- Delete PHP API layer (api.php, auth.php) — replaced by FastAPI in Phase 2 - Delete MSAL/Azure AD JS files (app.js, app-history.js, api.js) - Delete GCP Cloud Build/Deploy infra (cloudbuild.yaml, deploy.sh, Dockerfiles) - Delete Oliver-specific docs (OLIVER_CUSTOMIZATION.md, DAVE_QUICK_SETUP.md, etc.) - Replace Oliver yellow #FFC407 with Aimpress indigo #6366F1 across CSS + reports - Replace Oliver Solutions footer in report_generator.py with Aimpress - Switch font from Montserrat to Inter in CSS - Replace GCS optical-pdf-images bucket with STORAGE_BUCKET env var - Rewrite README.md for Aimpress SaaS product Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
136 lines
4.5 KiB
Python
136 lines
4.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
PDF Accessibility Checker — Cloud Run HTTP Service
|
|
|
|
Flask app wrapping EnterprisePDFChecker for serverless execution.
|
|
Receives PDF via multipart POST, runs checks, uploads page images to GCS,
|
|
returns full result JSON.
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import tempfile
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
from flask import Flask, request, jsonify
|
|
from google.cloud import storage
|
|
|
|
from enterprise_pdf_checker import EnterprisePDFChecker
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s [cloudrun] %(levelname)s: %(message)s'
|
|
)
|
|
logger = logging.getLogger('cloudrun')
|
|
|
|
app = Flask(__name__)
|
|
|
|
GCS_BUCKET_NAME = os.getenv('STORAGE_BUCKET', 'pdf-pages')
|
|
|
|
|
|
def upload_images_to_gcs(images_dir: Path, job_id: str) -> dict:
|
|
"""Upload page images to GCS and return {page_num: public_url} mapping."""
|
|
client = storage.Client()
|
|
bucket = client.bucket(GCS_BUCKET_NAME)
|
|
page_images = {}
|
|
|
|
for image_file in sorted(images_dir.glob('page_*.png')):
|
|
# Extract page number from filename (page_1.png -> 1)
|
|
page_num = int(image_file.stem.split('_')[1])
|
|
blob_name = f"{job_id}/{image_file.name}"
|
|
blob = bucket.blob(blob_name)
|
|
blob.upload_from_filename(str(image_file), content_type='image/png')
|
|
# Bucket has uniform bucket-level access with allUsers objectViewer,
|
|
# so objects are public by default — no need for blob.make_public()
|
|
public_url = f"https://storage.googleapis.com/{GCS_BUCKET_NAME}/{blob_name}"
|
|
page_images[page_num] = public_url
|
|
logger.info("Uploaded %s -> %s", image_file.name, public_url)
|
|
|
|
return page_images
|
|
|
|
|
|
@app.route('/check', methods=['POST'])
|
|
def check_pdf():
|
|
"""Accept multipart PDF upload, run accessibility checks, return results."""
|
|
pdf_file = request.files.get('pdf')
|
|
if not pdf_file:
|
|
return jsonify({'success': False, 'error': 'No PDF file provided'}), 400
|
|
|
|
job_id = request.form.get('job_id', 'unknown')
|
|
quick_mode = request.form.get('quick_mode', 'false').lower() in ('true', '1', 'yes')
|
|
original_filename = request.form.get('original_filename', pdf_file.filename or 'document.pdf')
|
|
|
|
logger.info("Received job %s: %s (quick=%s)", job_id, original_filename, quick_mode)
|
|
|
|
tmp_pdf = None
|
|
images_dir = None
|
|
|
|
try:
|
|
# Save uploaded PDF to temp file
|
|
tmp_pdf = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False)
|
|
pdf_file.save(tmp_pdf)
|
|
tmp_pdf.close()
|
|
|
|
# Run accessibility checks
|
|
config = {
|
|
'anthropic_api_key': os.getenv('ANTHROPIC_API_KEY'),
|
|
'google_api_key': os.getenv('GOOGLE_API_KEY'),
|
|
}
|
|
|
|
checker = EnterprisePDFChecker(tmp_pdf.name, config, quick_mode=quick_mode)
|
|
checker.check_all()
|
|
|
|
# Generate page images to a temp directory
|
|
images_dir = tempfile.mkdtemp(prefix='pdf_images_')
|
|
images_path = Path(images_dir)
|
|
checker._generate_page_images(images_path)
|
|
|
|
# Get results before uploading images (page_images has local filenames)
|
|
results = checker.to_dict()
|
|
|
|
# Upload images to GCS and replace local filenames with public URLs
|
|
if checker.page_images:
|
|
gcs_urls = upload_images_to_gcs(images_path, job_id)
|
|
results['page_images'] = gcs_urls
|
|
|
|
# Add grade based on score
|
|
score = results.get('accessibility_score', 0)
|
|
if score >= 90:
|
|
results['grade'] = 'A'
|
|
elif score >= 80:
|
|
results['grade'] = 'B'
|
|
elif score >= 70:
|
|
results['grade'] = 'C'
|
|
elif score >= 60:
|
|
results['grade'] = 'D'
|
|
else:
|
|
results['grade'] = 'F'
|
|
|
|
logger.info("Job %s completed: score=%s grade=%s issues=%d",
|
|
job_id, results['accessibility_score'],
|
|
results['grade'], results['total_issues'])
|
|
|
|
return jsonify({'success': True, 'data': results})
|
|
|
|
except Exception as e:
|
|
logger.error("Job %s failed: %s", job_id, str(e), exc_info=True)
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
|
|
finally:
|
|
# Clean up temp files
|
|
if tmp_pdf and os.path.exists(tmp_pdf.name):
|
|
os.unlink(tmp_pdf.name)
|
|
if images_dir and os.path.exists(images_dir):
|
|
import shutil
|
|
shutil.rmtree(images_dir, ignore_errors=True)
|
|
|
|
|
|
@app.route('/health', methods=['GET'])
|
|
def health():
|
|
return jsonify({'status': 'ok'})
|
|
|
|
|
|
if __name__ == '__main__':
|
|
port = int(os.getenv('PORT', 8080))
|
|
app.run(host='0.0.0.0', port=port, debug=False)
|