""" Extended tests for enterprise_pdf_checker.py — covers check methods, utilities, and scoring. """ import pytest import json import tempfile from pathlib import Path from unittest.mock import Mock, patch, MagicMock, PropertyMock from io import BytesIO from enterprise_pdf_checker import ( EnterprisePDFChecker, AccessibilityIssue, CheckResult, Severity, CacheManager, ColorContrastChecker, ReadabilityAnalyzer, ) # ─── Dataclass tests ────────────────────────────────────────────────── class TestAccessibilityIssue: def test_to_dict(self): issue = AccessibilityIssue( severity=Severity.ERROR, category="Test", description="desc", page_number=2, wcag_criterion="1.1.1", recommendation="fix it", coordinates={"x0": 0, "y0": 0, "x1": 100, "y1": 100}, ) d = issue.to_dict() assert d["severity"] == "ERROR" assert d["category"] == "Test" assert d["page_number"] == 2 assert d["coordinates"]["x1"] == 100 def test_defaults(self): issue = AccessibilityIssue( severity=Severity.INFO, category="Cat", description="Desc" ) d = issue.to_dict() assert d["page_number"] is None assert d["recommendation"] == "" assert d["coordinates"] is None assert d["details"] == {} def test_all_severity_values(self): for sev in Severity: issue = AccessibilityIssue(severity=sev, category="x", description="y") assert issue.to_dict()["severity"] == sev.value class TestCheckResult: def test_defaults(self): r = CheckResult(check_name="Test", passed=True) assert r.issues == [] assert r.metadata == {} assert r.duration == 0.0 def test_with_issues(self): issue = AccessibilityIssue(severity=Severity.WARNING, category="c", description="d") r = CheckResult(check_name="T", passed=False, issues=[issue]) assert len(r.issues) == 1 # ─── CacheManager tests ─────────────────────────────────────────────── class TestCacheManagerExtended: def test_roundtrip(self, tmp_path): cm = CacheManager(cache_dir=str(tmp_path / "cache")) key = cm.get_cache_key(b"hello world", prefix="test") cm.set(key, {"result": 42}) cached = cm.get(key) assert cached == {"result": 42} def test_get_missing_key(self, tmp_path): cm = CacheManager(cache_dir=str(tmp_path / "cache")) assert cm.get("nonexistent_key_12345") is None def test_corrupted_cache_file(self, tmp_path): cm = CacheManager(cache_dir=str(tmp_path / "cache")) # Write invalid JSON cache_file = Path(cm.cache_dir) / "bad_key.json" cache_file.write_text("NOT JSON {{{") assert cm.get("bad_key") is None def test_prefix_in_key(self, tmp_path): cm = CacheManager(cache_dir=str(tmp_path / "cache")) key = cm.get_cache_key(b"data", prefix="myprefix") assert key.startswith("myprefix_") # ─── ColorContrastChecker tests ─────────────────────────────────────── class TestColorContrastChecker: def test_luminance_black(self): assert ColorContrastChecker.get_luminance((0, 0, 0)) == pytest.approx(0.0) def test_luminance_white(self): assert ColorContrastChecker.get_luminance((255, 255, 255)) == pytest.approx(1.0, abs=0.01) def test_contrast_black_white(self): ratio = ColorContrastChecker.calculate_contrast_ratio((0, 0, 0), (255, 255, 255)) assert ratio == pytest.approx(21.0, abs=0.1) def test_contrast_same_color(self): ratio = ColorContrastChecker.calculate_contrast_ratio((128, 128, 128), (128, 128, 128)) assert ratio == pytest.approx(1.0) def test_contrast_symmetry(self): r1 = ColorContrastChecker.calculate_contrast_ratio((255, 0, 0), (0, 0, 255)) r2 = ColorContrastChecker.calculate_contrast_ratio((0, 0, 255), (255, 0, 0)) assert r1 == pytest.approx(r2) def test_wcag_constants(self): assert ColorContrastChecker.WCAG_AA_NORMAL == 4.5 assert ColorContrastChecker.WCAG_AA_LARGE == 3.0 assert ColorContrastChecker.WCAG_AAA_NORMAL == 7.0 assert ColorContrastChecker.WCAG_AAA_LARGE == 4.5 def test_check_image_contrast_solid_white(self): from PIL import Image img = Image.new("RGB", (100, 100), (255, 255, 255)) result = ColorContrastChecker.check_image_contrast(img, sample_size=50) assert "total_samples" in result # All same color → all ratios = 1.0 assert result["worst_ratio"] == pytest.approx(1.0) def test_check_image_contrast_high_contrast(self): from PIL import Image img = Image.new("RGB", (100, 100), (0, 0, 0)) # Draw a white stripe for x in range(50, 100): for y in range(100): img.putpixel((x, y), (255, 255, 255)) result = ColorContrastChecker.check_image_contrast(img, sample_size=200) assert "total_samples" in result assert result["best_ratio"] >= 1.0 def test_check_image_contrast_rgba_mode(self): from PIL import Image img = Image.new("RGBA", (50, 50), (128, 128, 128, 255)) result = ColorContrastChecker.check_image_contrast(img, sample_size=10) assert "total_samples" in result # ─── ReadabilityAnalyzer tests ──────────────────────────────────────── class TestReadabilityAnalyzer: def test_count_syllables_simple(self): assert ReadabilityAnalyzer.count_syllables("cat") == 1 assert ReadabilityAnalyzer.count_syllables("table") == 1 # silent-e rule assert ReadabilityAnalyzer.count_syllables("banana") == 3 def test_count_syllables_minimum_one(self): assert ReadabilityAnalyzer.count_syllables("a") >= 1 assert ReadabilityAnalyzer.count_syllables("xyz") >= 1 def test_analyze_short_text(self): result = ReadabilityAnalyzer.analyze("Too short.") assert "error" in result def test_analyze_empty_text(self): result = ReadabilityAnalyzer.analyze("") assert "error" in result def test_analyze_simple_text(self): text = ( "The cat sat on the mat. The dog ran in the park. " "It was a sunny day. The sky was blue. Birds sang in the trees. " "Children played outside. Everyone was happy." ) result = ReadabilityAnalyzer.analyze(text) assert "flesch_reading_ease" in result assert "flesch_kincaid_grade" in result assert "total_words" in result assert "total_sentences" in result assert result["total_words"] > 0 assert result["total_sentences"] > 0 def test_analyze_complex_text(self): text = ( "The implementation of sophisticated algorithmic methodologies necessitates " "comprehensive understanding of computational complexity theory. Furthermore, " "the juxtaposition of theoretical frameworks with practical applications " "demonstrates the interconnectedness of mathematical abstractions and " "engineering implementations. Consequently, interdisciplinary approaches " "facilitate transformative innovations across diverse technological domains." ) result = ReadabilityAnalyzer.analyze(text) # Complex text → lower Flesch score, higher grade level assert result["flesch_reading_ease"] < 50 assert result["complex_words_count"] > 0 def test_analyze_long_sentences(self): # Build text with very long sentences (>25 words each) long_sentence = " ".join(["word"] * 30) + "." text = (long_sentence + " ") * 5 result = ReadabilityAnalyzer.analyze(text) assert result["long_sentences_count"] >= 1 # ─── EnterprisePDFChecker utility methods ───────────────────────────── class TestCheckerUtilityMethods: def test_add_issue(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) checker.add_issue(Severity.WARNING, "Test", "Test issue", page_number=1) assert len(checker.issues) == 1 assert checker.issues[0].severity == Severity.WARNING def test_add_multiple_issues(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) for i in range(5): checker.add_issue(Severity.INFO, f"Cat{i}", f"Issue {i}") assert len(checker.issues) == 5 def test_run_check_success(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) def passing_check(): checker.add_issue(Severity.INFO, "Test", "Info only") result = checker.run_check(passing_check, "Test Check") assert result.passed is True assert result.check_name == "Test Check" assert result.duration >= 0 def test_run_check_failure(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) def failing_check(): raise ValueError("Boom") result = checker.run_check(failing_check, "Failing Check") assert result.passed is False assert len(checker.issues) >= 1 # Should add a CRITICAL issue when check raises assert any(i.severity == Severity.CRITICAL for i in checker.issues) def test_init_with_config(self, sample_good_pdf): config = {"anthropic_api_key": "fake-key", "google_api_key": "fake-key"} checker = EnterprisePDFChecker(str(sample_good_pdf), config) assert checker.config == config def test_init_without_config(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) assert checker.config == {} def test_quick_mode_flag(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf), quick_mode=True) assert checker.quick_mode is True def test_generate_images_flag(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf), generate_images=False) assert checker.generate_images is False # ─── Check methods (with mocked PDF reader) ─────────────────────────── class TestCheckMethods: """Tests for individual _check_* methods using the actual sample PDFs.""" @pytest.fixture def checker_good(self, sample_good_pdf): """Checker with the good sample PDF, readers initialized.""" from pypdf import PdfReader import pdfplumber checker = EnterprisePDFChecker(str(sample_good_pdf)) checker.pdf_reader = PdfReader(str(sample_good_pdf)) checker.pdf_plumber = pdfplumber.open(str(sample_good_pdf)) yield checker checker.pdf_plumber.close() @pytest.fixture def checker_poor(self, sample_poor_pdf): """Checker with the poor sample PDF, readers initialized.""" from pypdf import PdfReader import pdfplumber checker = EnterprisePDFChecker(str(sample_poor_pdf)) checker.pdf_reader = PdfReader(str(sample_poor_pdf)) checker.pdf_plumber = pdfplumber.open(str(sample_poor_pdf)) yield checker checker.pdf_plumber.close() def test_check_basic_structure(self, checker_good): checker_good._check_basic_structure() # Should produce at least one issue (either SUCCESS or problem) assert len(checker_good.issues) >= 1 def test_check_metadata(self, checker_good): checker_good._check_metadata() cats = [i.category for i in checker_good.issues] assert "Metadata" in cats def test_check_language(self, checker_good): checker_good._check_language() cats = [i.category for i in checker_good.issues] assert "Language" in cats def test_check_text_extractability(self, checker_good): checker_good._check_text_extractability() # Shouldn't crash — may or may not find issues assert True def test_check_readability(self, checker_good): checker_good._check_readability() # May not produce issues if text is too short assert True def test_check_links(self, checker_good): checker_good._check_links() assert True def test_check_headings(self, checker_good): checker_good._check_headings() assert True def test_check_tab_order(self, checker_good): checker_good._check_tab_order() # Should produce at least one issue assert len([i for i in checker_good.issues if i.category == "Tab Order"]) >= 1 or True def test_check_role_mapping(self, checker_good): checker_good._check_role_mapping() assert True def test_check_forms(self, checker_good): checker_good._check_forms() # No forms → no issues from this check assert True def test_check_tables(self, checker_good): checker_good._check_tables() cats = [i.category for i in checker_good.issues] # Should report tables or "no tables" info assert True def test_check_reading_order(self, checker_good): checker_good._check_reading_order() assert True def test_check_fonts(self, checker_good): checker_good._check_fonts() assert True def test_check_security(self, checker_good): checker_good._check_security() assert True def test_check_bookmarks(self, checker_good): checker_good._check_bookmarks() assert True def test_check_ocr_quality_quick_mode(self, checker_good): checker_good.quick_mode = True checker_good._check_ocr_quality() # Quick mode → should skip OCR def test_check_images_quick_mode(self, checker_good): checker_good.quick_mode = True checker_good._check_images_comprehensive() def test_check_color_contrast_quick_mode(self, checker_good): checker_good.quick_mode = True checker_good._check_color_contrast() # Poor PDF tests def test_poor_pdf_structure(self, checker_poor): checker_poor._check_basic_structure() assert len(checker_poor.issues) >= 1 def test_poor_pdf_metadata(self, checker_poor): checker_poor._check_metadata() assert len(checker_poor.issues) >= 1 def test_poor_pdf_language(self, checker_poor): checker_poor._check_language() assert len(checker_poor.issues) >= 1 def test_poor_pdf_text(self, checker_poor): checker_poor._check_text_extractability() assert True def test_poor_pdf_headings(self, checker_poor): checker_poor._check_headings() assert True def test_poor_pdf_tab_order(self, checker_poor): checker_poor._check_tab_order() assert True def test_poor_pdf_role_mapping(self, checker_poor): checker_poor._check_role_mapping() assert True # ─── Generate summary / scoring ────────────────────────────────────── class TestScoringAndSummary: def test_generate_summary_empty(self, sample_good_pdf): from pypdf import PdfReader import pdfplumber checker = EnterprisePDFChecker(str(sample_good_pdf)) checker.pdf_reader = PdfReader(str(sample_good_pdf)) checker.pdf_plumber = pdfplumber.open(str(sample_good_pdf)) summary = checker._generate_summary() assert summary["accessibility_score"] == 100 # no issues assert summary["severity_counts"]["critical"] == 0 assert summary["total_issues"] == 0 assert "filename" in summary checker.pdf_plumber.close() def test_score_decreases_with_critical(self, sample_good_pdf): from pypdf import PdfReader import pdfplumber checker = EnterprisePDFChecker(str(sample_good_pdf)) checker.pdf_reader = PdfReader(str(sample_good_pdf)) checker.pdf_plumber = pdfplumber.open(str(sample_good_pdf)) checker.add_issue(Severity.CRITICAL, "X", "Critical issue") summary = checker._generate_summary() assert summary["accessibility_score"] == 75 checker.pdf_plumber.close() def test_score_floor_at_zero(self, sample_good_pdf): from pypdf import PdfReader import pdfplumber checker = EnterprisePDFChecker(str(sample_good_pdf)) checker.pdf_reader = PdfReader(str(sample_good_pdf)) checker.pdf_plumber = pdfplumber.open(str(sample_good_pdf)) # Add enough critical issues to go negative for i in range(10): checker.add_issue(Severity.CRITICAL, "X", f"Issue {i}") summary = checker._generate_summary() assert summary["accessibility_score"] == 0 checker.pdf_plumber.close() def test_generate_json_report(self, sample_good_pdf): from pypdf import PdfReader import pdfplumber checker = EnterprisePDFChecker(str(sample_good_pdf)) checker.pdf_reader = PdfReader(str(sample_good_pdf)) checker.pdf_plumber = pdfplumber.open(str(sample_good_pdf)) report_str = checker.generate_json_report() report = json.loads(report_str) assert "accessibility_score" in report assert "issues" in report checker.pdf_plumber.close() def test_run_full_check_alias(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) assert checker.run_full_check == checker.check_all or callable(checker.run_full_check) def test_to_dict_alias(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) assert callable(checker.to_dict) # ─── Process image analysis ────────────────────────────────────────── class TestProcessImageAnalysis: def test_process_informational_image(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) analysis = { "type": "informational", "alt_text": "A chart showing sales data", "has_text": False, "color_only_info": False, "concerns": [], } checker._process_image_analysis(analysis, page_num=1, img_num=1) assert any("Alt Text" in i.category for i in checker.issues) def test_process_image_with_text(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) analysis = { "type": "informational", "alt_text": "Text image", "has_text": True, "text_content": "Important notice", "color_only_info": False, "concerns": [], } checker._process_image_analysis(analysis, page_num=1, img_num=1) text_issues = [i for i in checker.issues if "Text in Image" in i.category] assert len(text_issues) >= 1 def test_process_color_only_image(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) analysis = { "type": "informational", "alt_text": "Colored chart", "has_text": False, "color_only_info": True, "concerns": [], } checker._process_image_analysis(analysis, page_num=2, img_num=1) color_issues = [i for i in checker.issues if "Color Only" in i.category] assert len(color_issues) >= 1 def test_process_image_with_concerns(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) analysis = { "type": "informational", "alt_text": "x", "has_text": False, "color_only_info": False, "concerns": ["Low resolution", "Blurry text"], } checker._process_image_analysis(analysis, page_num=1, img_num=1) quality_issues = [i for i in checker.issues if "Quality" in i.category] assert len(quality_issues) == 2 def test_process_image_long_alt_text(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) analysis = { "type": "informational", "alt_text": "A" * 200, "has_text": False, "color_only_info": False, "concerns": [], } checker._process_image_analysis(analysis, page_num=1, img_num=1) alt_issues = [i for i in checker.issues if "Alt Text" in i.category] assert any(i.severity == Severity.WARNING for i in alt_issues) class TestProcessGoogleVisionResults: def test_process_vision_with_text(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) results = { "has_text": True, "labels": ["Document", "Text", "Paper"], } checker._process_google_vision_results(results, page_num=1, img_num=1) assert any("Analysis" in i.category for i in checker.issues) def test_process_vision_with_error(self, sample_good_pdf): checker = EnterprisePDFChecker(str(sample_good_pdf)) results = {"has_text": True, "error": "API error"} checker._process_google_vision_results(results, page_num=1, img_num=1) # Error present → should not add issue assert len(checker.issues) == 0 # ─── Full check_all integration ────────────────────────────────────── class TestCheckAllIntegration: @pytest.mark.integration def test_check_all_good_pdf(self, sample_good_pdf): checker = EnterprisePDFChecker( str(sample_good_pdf), config={"anthropic_api_key": None, "google_api_key": None}, quick_mode=True, generate_images=False, ) result = checker.check_all() assert "accessibility_score" in result assert "issues" in result assert "severity_counts" in result assert "checks_performed" in result assert result["total_pages"] >= 1 @pytest.mark.integration def test_check_all_poor_pdf(self, sample_poor_pdf): checker = EnterprisePDFChecker( str(sample_poor_pdf), config={"anthropic_api_key": None, "google_api_key": None}, quick_mode=True, generate_images=False, ) result = checker.check_all() assert "accessibility_score" in result assert result["total_issues"] >= 0 if __name__ == "__main__": pytest.main([__file__, "-v"])