diff --git a/enterprise_pdf_checker.py b/enterprise_pdf_checker.py index b8040a8..34fb475 100644 --- a/enterprise_pdf_checker.py +++ b/enterprise_pdf_checker.py @@ -21,6 +21,7 @@ import re import base64 import hashlib import time +import subprocess from pathlib import Path from typing import List, Dict, Any, Optional, Tuple from dataclasses import dataclass, field, asdict @@ -38,6 +39,14 @@ except ImportError: # dotenv not installed, that's okay - will use environment variables pass +# Import remediation module +try: + from pdf_remediation import VeraPDFValidator, PDFRemediator +except ImportError: + print("⚠️ Remediation module not found - auto-fix features disabled") + VeraPDFValidator = None + PDFRemediator = None + # Core PDF libraries try: from pypdf import PdfReader, PdfWriter @@ -319,6 +328,8 @@ class EnterprisePDFChecker: self.pdf_plumber = None self.cache = CacheManager() self.page_images: Dict[int, str] = {} # page_num -> image_path + self.verapdf_results: Optional[Dict] = None + self.remediation_suggestions: Optional[Dict] = None # API clients self.vision_client = None @@ -428,6 +439,7 @@ class EnterprisePDFChecker: (self._check_fonts, "Font Accessibility"), (self._check_security, "Security Settings"), (self._check_bookmarks, "Navigation Aids"), + (self._check_verapdf_validation, "PDF/UA Structure (veraPDF)"), ] for check_func, check_name in checks: @@ -435,7 +447,10 @@ class EnterprisePDFChecker: result = self.run_check(check_func, check_name) status = "✅" if result.passed else "❌" print(f"{status} ({result.duration:.2f}s)") - + + # Analyze remediation options + self._analyze_remediation_options() + except Exception as e: self.add_issue( Severity.CRITICAL, @@ -1202,7 +1217,7 @@ Respond in JSON format: """Check navigation bookmarks""" outlines = self.pdf_reader.outline total_pages = len(self.pdf_reader.pages) - + if not outlines and total_pages > 5: self.add_issue( Severity.INFO, @@ -1218,6 +1233,84 @@ Respond in JSON format: "Document has navigation bookmarks", wcag_criterion="2.4.5" ) + + def _check_verapdf_validation(self): + """Run veraPDF PDF/UA validation""" + if not VeraPDFValidator: + print(" ⚠️ veraPDF not available - skipping") + return + + print("\n 📋 Running veraPDF PDF/UA validation...") + + try: + validator = VeraPDFValidator() + results = validator.validate(str(self.pdf_path)) + + if 'error' in results: + print(f" ⚠️ veraPDF validation error: {results['error']}") + return + + self.verapdf_results = results + + # Report compliance status + if results['compliant']: + self.add_issue( + Severity.SUCCESS, + "PDF/UA Compliance", + f"Document passes PDF/UA-1 validation ({results['passed_rules']} rules passed)", + wcag_criterion="PDF/UA", + recommendation="Document meets PDF/UA structure requirements" + ) + else: + self.add_issue( + Severity.ERROR, + "PDF/UA Compliance", + f"Document fails PDF/UA-1 validation ({results['failed_rules']} rules failed, {results['failed_checks']} checks failed)", + wcag_criterion="PDF/UA", + recommendation="Fix structure issues reported by veraPDF" + ) + + # Add specific errors as issues + for error in results.get('errors', [])[:10]: # Limit to first 10 + self.add_issue( + Severity.WARNING, + "PDF/UA Structure", + f"Clause {error['clause']}: {error['description'][:150]}", + wcag_criterion="PDF/UA", + recommendation="Consult veraPDF documentation for this clause" + ) + + print(f" ✅ veraPDF: {results['passed_rules']} passed, {results['failed_rules']} failed") + + except Exception as e: + print(f" ⚠️ veraPDF check error: {str(e)}") + + def _analyze_remediation_options(self): + """Analyze what can be auto-fixed""" + if not PDFRemediator: + return + + print("\n🔧 Analyzing auto-remediation options...") + + try: + remediator = PDFRemediator(str(self.pdf_path)) + suggestions = remediator.analyze_and_suggest_fixes() + + self.remediation_suggestions = suggestions + + # Count fixable issues + total_fixable = sum( + len([f for f in fixes if f.get('auto_fixable')]) + for fixes in suggestions.values() + ) + + if total_fixable > 0: + print(f" ✅ {total_fixable} issues can be auto-fixed") + else: + print(f" ℹ️ No auto-fixable issues found") + + except Exception as e: + print(f" ⚠️ Remediation analysis error: {str(e)}") # ==================== HELPER METHODS ==================== @@ -1307,15 +1400,26 @@ Respond in JSON format: else: stats_serializable[key] = value + # Count auto-fixable issues + auto_fixable_count = 0 + if self.remediation_suggestions: + auto_fixable_count = sum( + len([f for f in fixes if f.get('auto_fixable')]) + for fixes in self.remediation_suggestions.values() + ) + return { 'filename': self.pdf_path.name, 'total_pages': len(self.pdf_reader.pages), 'accessibility_score': score, 'severity_counts': severity_counts, 'total_issues': len(self.issues), + 'auto_fixable_count': auto_fixable_count, 'stats': stats_serializable, 'page_images': self.page_images, # Map of page_num -> image_filename 'page_image_dpi': getattr(self, 'page_image_dpi', 150), # DPI for coordinate scaling + 'verapdf_validation': self.verapdf_results, + 'remediation_suggestions': self.remediation_suggestions, 'checks_performed': [ { 'name': cr.check_name, diff --git a/index.html b/index.html index fde25df..ae6c679 100644 --- a/index.html +++ b/index.html @@ -616,17 +616,38 @@

Accessibility Report

- +
--
Accessibility Score
- +
+ + +