#!/usr/bin/env python3 """ PDF Accessibility Checker — Cloud Run HTTP Service Flask app wrapping EnterprisePDFChecker for serverless execution. Receives PDF via multipart POST, runs checks, uploads page images to GCS, returns full result JSON. """ import os import json import tempfile import logging from pathlib import Path from flask import Flask, request, jsonify from google.cloud import storage from enterprise_pdf_checker import EnterprisePDFChecker logging.basicConfig( level=logging.INFO, format='%(asctime)s [cloudrun] %(levelname)s: %(message)s' ) logger = logging.getLogger('cloudrun') app = Flask(__name__) GCS_BUCKET_NAME = os.getenv('STORAGE_BUCKET', 'pdf-pages') def upload_images_to_gcs(images_dir: Path, job_id: str) -> dict: """Upload page images to GCS and return {page_num: public_url} mapping.""" client = storage.Client() bucket = client.bucket(GCS_BUCKET_NAME) page_images = {} for image_file in sorted(images_dir.glob('page_*.png')): # Extract page number from filename (page_1.png -> 1) page_num = int(image_file.stem.split('_')[1]) blob_name = f"{job_id}/{image_file.name}" blob = bucket.blob(blob_name) blob.upload_from_filename(str(image_file), content_type='image/png') # Bucket has uniform bucket-level access with allUsers objectViewer, # so objects are public by default — no need for blob.make_public() public_url = f"https://storage.googleapis.com/{GCS_BUCKET_NAME}/{blob_name}" page_images[page_num] = public_url logger.info("Uploaded %s -> %s", image_file.name, public_url) return page_images @app.route('/check', methods=['POST']) def check_pdf(): """Accept multipart PDF upload, run accessibility checks, return results.""" pdf_file = request.files.get('pdf') if not pdf_file: return jsonify({'success': False, 'error': 'No PDF file provided'}), 400 job_id = request.form.get('job_id', 'unknown') quick_mode = request.form.get('quick_mode', 'false').lower() in ('true', '1', 'yes') original_filename = request.form.get('original_filename', pdf_file.filename or 'document.pdf') logger.info("Received job %s: %s (quick=%s)", job_id, original_filename, quick_mode) tmp_pdf = None images_dir = None try: # Save uploaded PDF to temp file tmp_pdf = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) pdf_file.save(tmp_pdf) tmp_pdf.close() # Run accessibility checks config = { 'anthropic_api_key': os.getenv('ANTHROPIC_API_KEY'), 'google_api_key': os.getenv('GOOGLE_API_KEY'), } checker = EnterprisePDFChecker(tmp_pdf.name, config, quick_mode=quick_mode) checker.check_all() # Generate page images to a temp directory images_dir = tempfile.mkdtemp(prefix='pdf_images_') images_path = Path(images_dir) checker._generate_page_images(images_path) # Get results before uploading images (page_images has local filenames) results = checker.to_dict() # Upload images to GCS and replace local filenames with public URLs if checker.page_images: gcs_urls = upload_images_to_gcs(images_path, job_id) results['page_images'] = gcs_urls # Add grade based on score score = results.get('accessibility_score', 0) if score >= 90: results['grade'] = 'A' elif score >= 80: results['grade'] = 'B' elif score >= 70: results['grade'] = 'C' elif score >= 60: results['grade'] = 'D' else: results['grade'] = 'F' logger.info("Job %s completed: score=%s grade=%s issues=%d", job_id, results['accessibility_score'], results['grade'], results['total_issues']) return jsonify({'success': True, 'data': results}) except Exception as e: logger.error("Job %s failed: %s", job_id, str(e), exc_info=True) return jsonify({'success': False, 'error': str(e)}), 500 finally: # Clean up temp files if tmp_pdf and os.path.exists(tmp_pdf.name): os.unlink(tmp_pdf.name) if images_dir and os.path.exists(images_dir): import shutil shutil.rmtree(images_dir, ignore_errors=True) @app.route('/health', methods=['GET']) def health(): return jsonify({'status': 'ok'}) if __name__ == '__main__': port = int(os.getenv('PORT', 8080)) app.run(host='0.0.0.0', port=port, debug=False)