diff --git a/enterprise_pdf_checker.py b/enterprise_pdf_checker.py index b6abfe1..0399d3d 100644 --- a/enterprise_pdf_checker.py +++ b/enterprise_pdf_checker.py @@ -394,19 +394,43 @@ class EnterprisePDFChecker: ) self.issues.append(issue) + # Per-check wall-clock timeouts (seconds). Heavy checks get more time. + _CHECK_TIMEOUTS = { + "Image Accessibility": 180, + "OCR Quality": 180, + "Color Contrast": 120, + "PDF/UA Structure (veraPDF)": 120, + "Content Readability": 60, + } + _DEFAULT_CHECK_TIMEOUT = 90 + def run_check(self, check_func, check_name: str) -> CheckResult: - """Run a check and record results""" + """Run a check with a per-check timeout and record results.""" + from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout start_time = time.time() result = CheckResult(check_name=check_name, passed=True) issues_before = len(self.issues) + timeout = self._CHECK_TIMEOUTS.get(check_name, self._DEFAULT_CHECK_TIMEOUT) try: - check_func() + with ThreadPoolExecutor(max_workers=1) as ex: + future = ex.submit(check_func) + future.result(timeout=timeout) + # Check passed if no critical/error issues added by THIS check new_issues = self.issues[issues_before:] critical_errors = [i for i in new_issues if i.severity in [Severity.CRITICAL, Severity.ERROR]] result.passed = len(critical_errors) == 0 + except FuturesTimeout: + logger.warning(f"{check_name} timed out after {timeout}s — skipping") + self.add_issue( + Severity.WARNING, + check_name, + f"Check timed out after {timeout}s and was skipped", + details={'timeout': timeout} + ) + result.passed = False except Exception as e: self.add_issue( Severity.CRITICAL, @@ -415,11 +439,11 @@ class EnterprisePDFChecker: details={'error': str(e), 'traceback': traceback.format_exc()} ) result.passed = False - + result.duration = time.time() - start_time self.check_results.append(result) self.stats['total_checks'] += 1 - + return result def check_all(self) -> Dict[str, Any]: