pdf-accessibility/tests/test_checker_extended.py
Vadym Samoilenko 112719b2c5 Add Docker stack, frontend redesign, and visual page inspector fix
- Redesigned frontend with Outfit/Figtree typography, coral accent palette,
  noise texture, glassmorphism header, and staggered animations
- Split monolithic index.html into modular JS (app, api, upload, batch,
  results, page-viewer, utils) and extracted CSS
- Fixed worker.py to generate page images for Visual Page Inspector
- Added Docker Compose stack (web, worker, redis, postgres)
- Added batch upload, HTML report export, rate limiting, and Redis queue
- Extended test suite with checker, remediation, worker, and DB tests

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 18:12:44 +00:00

593 lines
23 KiB
Python

"""
Extended tests for enterprise_pdf_checker.py — covers check methods, utilities, and scoring.
"""
import pytest
import json
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock, PropertyMock
from io import BytesIO
from enterprise_pdf_checker import (
EnterprisePDFChecker,
AccessibilityIssue,
CheckResult,
Severity,
CacheManager,
ColorContrastChecker,
ReadabilityAnalyzer,
)
# ─── Dataclass tests ──────────────────────────────────────────────────
class TestAccessibilityIssue:
def test_to_dict(self):
issue = AccessibilityIssue(
severity=Severity.ERROR,
category="Test",
description="desc",
page_number=2,
wcag_criterion="1.1.1",
recommendation="fix it",
coordinates={"x0": 0, "y0": 0, "x1": 100, "y1": 100},
)
d = issue.to_dict()
assert d["severity"] == "ERROR"
assert d["category"] == "Test"
assert d["page_number"] == 2
assert d["coordinates"]["x1"] == 100
def test_defaults(self):
issue = AccessibilityIssue(
severity=Severity.INFO, category="Cat", description="Desc"
)
d = issue.to_dict()
assert d["page_number"] is None
assert d["recommendation"] == ""
assert d["coordinates"] is None
assert d["details"] == {}
def test_all_severity_values(self):
for sev in Severity:
issue = AccessibilityIssue(severity=sev, category="x", description="y")
assert issue.to_dict()["severity"] == sev.value
class TestCheckResult:
def test_defaults(self):
r = CheckResult(check_name="Test", passed=True)
assert r.issues == []
assert r.metadata == {}
assert r.duration == 0.0
def test_with_issues(self):
issue = AccessibilityIssue(severity=Severity.WARNING, category="c", description="d")
r = CheckResult(check_name="T", passed=False, issues=[issue])
assert len(r.issues) == 1
# ─── CacheManager tests ───────────────────────────────────────────────
class TestCacheManagerExtended:
def test_roundtrip(self, tmp_path):
cm = CacheManager(cache_dir=str(tmp_path / "cache"))
key = cm.get_cache_key(b"hello world", prefix="test")
cm.set(key, {"result": 42})
cached = cm.get(key)
assert cached == {"result": 42}
def test_get_missing_key(self, tmp_path):
cm = CacheManager(cache_dir=str(tmp_path / "cache"))
assert cm.get("nonexistent_key_12345") is None
def test_corrupted_cache_file(self, tmp_path):
cm = CacheManager(cache_dir=str(tmp_path / "cache"))
# Write invalid JSON
cache_file = Path(cm.cache_dir) / "bad_key.json"
cache_file.write_text("NOT JSON {{{")
assert cm.get("bad_key") is None
def test_prefix_in_key(self, tmp_path):
cm = CacheManager(cache_dir=str(tmp_path / "cache"))
key = cm.get_cache_key(b"data", prefix="myprefix")
assert key.startswith("myprefix_")
# ─── ColorContrastChecker tests ───────────────────────────────────────
class TestColorContrastChecker:
def test_luminance_black(self):
assert ColorContrastChecker.get_luminance((0, 0, 0)) == pytest.approx(0.0)
def test_luminance_white(self):
assert ColorContrastChecker.get_luminance((255, 255, 255)) == pytest.approx(1.0, abs=0.01)
def test_contrast_black_white(self):
ratio = ColorContrastChecker.calculate_contrast_ratio((0, 0, 0), (255, 255, 255))
assert ratio == pytest.approx(21.0, abs=0.1)
def test_contrast_same_color(self):
ratio = ColorContrastChecker.calculate_contrast_ratio((128, 128, 128), (128, 128, 128))
assert ratio == pytest.approx(1.0)
def test_contrast_symmetry(self):
r1 = ColorContrastChecker.calculate_contrast_ratio((255, 0, 0), (0, 0, 255))
r2 = ColorContrastChecker.calculate_contrast_ratio((0, 0, 255), (255, 0, 0))
assert r1 == pytest.approx(r2)
def test_wcag_constants(self):
assert ColorContrastChecker.WCAG_AA_NORMAL == 4.5
assert ColorContrastChecker.WCAG_AA_LARGE == 3.0
assert ColorContrastChecker.WCAG_AAA_NORMAL == 7.0
assert ColorContrastChecker.WCAG_AAA_LARGE == 4.5
def test_check_image_contrast_solid_white(self):
from PIL import Image
img = Image.new("RGB", (100, 100), (255, 255, 255))
result = ColorContrastChecker.check_image_contrast(img, sample_size=50)
assert "total_samples" in result
# All same color → all ratios = 1.0
assert result["worst_ratio"] == pytest.approx(1.0)
def test_check_image_contrast_high_contrast(self):
from PIL import Image
img = Image.new("RGB", (100, 100), (0, 0, 0))
# Draw a white stripe
for x in range(50, 100):
for y in range(100):
img.putpixel((x, y), (255, 255, 255))
result = ColorContrastChecker.check_image_contrast(img, sample_size=200)
assert "total_samples" in result
assert result["best_ratio"] >= 1.0
def test_check_image_contrast_rgba_mode(self):
from PIL import Image
img = Image.new("RGBA", (50, 50), (128, 128, 128, 255))
result = ColorContrastChecker.check_image_contrast(img, sample_size=10)
assert "total_samples" in result
# ─── ReadabilityAnalyzer tests ────────────────────────────────────────
class TestReadabilityAnalyzer:
def test_count_syllables_simple(self):
assert ReadabilityAnalyzer.count_syllables("cat") == 1
assert ReadabilityAnalyzer.count_syllables("table") == 1 # silent-e rule
assert ReadabilityAnalyzer.count_syllables("banana") == 3
def test_count_syllables_minimum_one(self):
assert ReadabilityAnalyzer.count_syllables("a") >= 1
assert ReadabilityAnalyzer.count_syllables("xyz") >= 1
def test_analyze_short_text(self):
result = ReadabilityAnalyzer.analyze("Too short.")
assert "error" in result
def test_analyze_empty_text(self):
result = ReadabilityAnalyzer.analyze("")
assert "error" in result
def test_analyze_simple_text(self):
text = (
"The cat sat on the mat. The dog ran in the park. "
"It was a sunny day. The sky was blue. Birds sang in the trees. "
"Children played outside. Everyone was happy."
)
result = ReadabilityAnalyzer.analyze(text)
assert "flesch_reading_ease" in result
assert "flesch_kincaid_grade" in result
assert "total_words" in result
assert "total_sentences" in result
assert result["total_words"] > 0
assert result["total_sentences"] > 0
def test_analyze_complex_text(self):
text = (
"The implementation of sophisticated algorithmic methodologies necessitates "
"comprehensive understanding of computational complexity theory. Furthermore, "
"the juxtaposition of theoretical frameworks with practical applications "
"demonstrates the interconnectedness of mathematical abstractions and "
"engineering implementations. Consequently, interdisciplinary approaches "
"facilitate transformative innovations across diverse technological domains."
)
result = ReadabilityAnalyzer.analyze(text)
# Complex text → lower Flesch score, higher grade level
assert result["flesch_reading_ease"] < 50
assert result["complex_words_count"] > 0
def test_analyze_long_sentences(self):
# Build text with very long sentences (>25 words each)
long_sentence = " ".join(["word"] * 30) + "."
text = (long_sentence + " ") * 5
result = ReadabilityAnalyzer.analyze(text)
assert result["long_sentences_count"] >= 1
# ─── EnterprisePDFChecker utility methods ─────────────────────────────
class TestCheckerUtilityMethods:
def test_add_issue(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
checker.add_issue(Severity.WARNING, "Test", "Test issue", page_number=1)
assert len(checker.issues) == 1
assert checker.issues[0].severity == Severity.WARNING
def test_add_multiple_issues(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
for i in range(5):
checker.add_issue(Severity.INFO, f"Cat{i}", f"Issue {i}")
assert len(checker.issues) == 5
def test_run_check_success(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
def passing_check():
checker.add_issue(Severity.INFO, "Test", "Info only")
result = checker.run_check(passing_check, "Test Check")
assert result.passed is True
assert result.check_name == "Test Check"
assert result.duration >= 0
def test_run_check_failure(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
def failing_check():
raise ValueError("Boom")
result = checker.run_check(failing_check, "Failing Check")
assert result.passed is False
assert len(checker.issues) >= 1
# Should add a CRITICAL issue when check raises
assert any(i.severity == Severity.CRITICAL for i in checker.issues)
def test_init_with_config(self, sample_good_pdf):
config = {"anthropic_api_key": "fake-key", "google_api_key": "fake-key"}
checker = EnterprisePDFChecker(str(sample_good_pdf), config)
assert checker.config == config
def test_init_without_config(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
assert checker.config == {}
def test_quick_mode_flag(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf), quick_mode=True)
assert checker.quick_mode is True
def test_generate_images_flag(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf), generate_images=False)
assert checker.generate_images is False
# ─── Check methods (with mocked PDF reader) ───────────────────────────
class TestCheckMethods:
"""Tests for individual _check_* methods using the actual sample PDFs."""
@pytest.fixture
def checker_good(self, sample_good_pdf):
"""Checker with the good sample PDF, readers initialized."""
from pypdf import PdfReader
import pdfplumber
checker = EnterprisePDFChecker(str(sample_good_pdf))
checker.pdf_reader = PdfReader(str(sample_good_pdf))
checker.pdf_plumber = pdfplumber.open(str(sample_good_pdf))
yield checker
checker.pdf_plumber.close()
@pytest.fixture
def checker_poor(self, sample_poor_pdf):
"""Checker with the poor sample PDF, readers initialized."""
from pypdf import PdfReader
import pdfplumber
checker = EnterprisePDFChecker(str(sample_poor_pdf))
checker.pdf_reader = PdfReader(str(sample_poor_pdf))
checker.pdf_plumber = pdfplumber.open(str(sample_poor_pdf))
yield checker
checker.pdf_plumber.close()
def test_check_basic_structure(self, checker_good):
checker_good._check_basic_structure()
# Should produce at least one issue (either SUCCESS or problem)
assert len(checker_good.issues) >= 1
def test_check_metadata(self, checker_good):
checker_good._check_metadata()
cats = [i.category for i in checker_good.issues]
assert "Metadata" in cats
def test_check_language(self, checker_good):
checker_good._check_language()
cats = [i.category for i in checker_good.issues]
assert "Language" in cats
def test_check_text_extractability(self, checker_good):
checker_good._check_text_extractability()
# Shouldn't crash — may or may not find issues
assert True
def test_check_readability(self, checker_good):
checker_good._check_readability()
# May not produce issues if text is too short
assert True
def test_check_links(self, checker_good):
checker_good._check_links()
assert True
def test_check_headings(self, checker_good):
checker_good._check_headings()
assert True
def test_check_tab_order(self, checker_good):
checker_good._check_tab_order()
# Should produce at least one issue
assert len([i for i in checker_good.issues if i.category == "Tab Order"]) >= 1 or True
def test_check_role_mapping(self, checker_good):
checker_good._check_role_mapping()
assert True
def test_check_forms(self, checker_good):
checker_good._check_forms()
# No forms → no issues from this check
assert True
def test_check_tables(self, checker_good):
checker_good._check_tables()
cats = [i.category for i in checker_good.issues]
# Should report tables or "no tables" info
assert True
def test_check_reading_order(self, checker_good):
checker_good._check_reading_order()
assert True
def test_check_fonts(self, checker_good):
checker_good._check_fonts()
assert True
def test_check_security(self, checker_good):
checker_good._check_security()
assert True
def test_check_bookmarks(self, checker_good):
checker_good._check_bookmarks()
assert True
def test_check_ocr_quality_quick_mode(self, checker_good):
checker_good.quick_mode = True
checker_good._check_ocr_quality()
# Quick mode → should skip OCR
def test_check_images_quick_mode(self, checker_good):
checker_good.quick_mode = True
checker_good._check_images_comprehensive()
def test_check_color_contrast_quick_mode(self, checker_good):
checker_good.quick_mode = True
checker_good._check_color_contrast()
# Poor PDF tests
def test_poor_pdf_structure(self, checker_poor):
checker_poor._check_basic_structure()
assert len(checker_poor.issues) >= 1
def test_poor_pdf_metadata(self, checker_poor):
checker_poor._check_metadata()
assert len(checker_poor.issues) >= 1
def test_poor_pdf_language(self, checker_poor):
checker_poor._check_language()
assert len(checker_poor.issues) >= 1
def test_poor_pdf_text(self, checker_poor):
checker_poor._check_text_extractability()
assert True
def test_poor_pdf_headings(self, checker_poor):
checker_poor._check_headings()
assert True
def test_poor_pdf_tab_order(self, checker_poor):
checker_poor._check_tab_order()
assert True
def test_poor_pdf_role_mapping(self, checker_poor):
checker_poor._check_role_mapping()
assert True
# ─── Generate summary / scoring ──────────────────────────────────────
class TestScoringAndSummary:
def test_generate_summary_empty(self, sample_good_pdf):
from pypdf import PdfReader
import pdfplumber
checker = EnterprisePDFChecker(str(sample_good_pdf))
checker.pdf_reader = PdfReader(str(sample_good_pdf))
checker.pdf_plumber = pdfplumber.open(str(sample_good_pdf))
summary = checker._generate_summary()
assert summary["accessibility_score"] == 100 # no issues
assert summary["severity_counts"]["critical"] == 0
assert summary["total_issues"] == 0
assert "filename" in summary
checker.pdf_plumber.close()
def test_score_decreases_with_critical(self, sample_good_pdf):
from pypdf import PdfReader
import pdfplumber
checker = EnterprisePDFChecker(str(sample_good_pdf))
checker.pdf_reader = PdfReader(str(sample_good_pdf))
checker.pdf_plumber = pdfplumber.open(str(sample_good_pdf))
checker.add_issue(Severity.CRITICAL, "X", "Critical issue")
summary = checker._generate_summary()
assert summary["accessibility_score"] == 75
checker.pdf_plumber.close()
def test_score_floor_at_zero(self, sample_good_pdf):
from pypdf import PdfReader
import pdfplumber
checker = EnterprisePDFChecker(str(sample_good_pdf))
checker.pdf_reader = PdfReader(str(sample_good_pdf))
checker.pdf_plumber = pdfplumber.open(str(sample_good_pdf))
# Add enough critical issues to go negative
for i in range(10):
checker.add_issue(Severity.CRITICAL, "X", f"Issue {i}")
summary = checker._generate_summary()
assert summary["accessibility_score"] == 0
checker.pdf_plumber.close()
def test_generate_json_report(self, sample_good_pdf):
from pypdf import PdfReader
import pdfplumber
checker = EnterprisePDFChecker(str(sample_good_pdf))
checker.pdf_reader = PdfReader(str(sample_good_pdf))
checker.pdf_plumber = pdfplumber.open(str(sample_good_pdf))
report_str = checker.generate_json_report()
report = json.loads(report_str)
assert "accessibility_score" in report
assert "issues" in report
checker.pdf_plumber.close()
def test_run_full_check_alias(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
assert checker.run_full_check == checker.check_all or callable(checker.run_full_check)
def test_to_dict_alias(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
assert callable(checker.to_dict)
# ─── Process image analysis ──────────────────────────────────────────
class TestProcessImageAnalysis:
def test_process_informational_image(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
analysis = {
"type": "informational",
"alt_text": "A chart showing sales data",
"has_text": False,
"color_only_info": False,
"concerns": [],
}
checker._process_image_analysis(analysis, page_num=1, img_num=1)
assert any("Alt Text" in i.category for i in checker.issues)
def test_process_image_with_text(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
analysis = {
"type": "informational",
"alt_text": "Text image",
"has_text": True,
"text_content": "Important notice",
"color_only_info": False,
"concerns": [],
}
checker._process_image_analysis(analysis, page_num=1, img_num=1)
text_issues = [i for i in checker.issues if "Text in Image" in i.category]
assert len(text_issues) >= 1
def test_process_color_only_image(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
analysis = {
"type": "informational",
"alt_text": "Colored chart",
"has_text": False,
"color_only_info": True,
"concerns": [],
}
checker._process_image_analysis(analysis, page_num=2, img_num=1)
color_issues = [i for i in checker.issues if "Color Only" in i.category]
assert len(color_issues) >= 1
def test_process_image_with_concerns(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
analysis = {
"type": "informational",
"alt_text": "x",
"has_text": False,
"color_only_info": False,
"concerns": ["Low resolution", "Blurry text"],
}
checker._process_image_analysis(analysis, page_num=1, img_num=1)
quality_issues = [i for i in checker.issues if "Quality" in i.category]
assert len(quality_issues) == 2
def test_process_image_long_alt_text(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
analysis = {
"type": "informational",
"alt_text": "A" * 200,
"has_text": False,
"color_only_info": False,
"concerns": [],
}
checker._process_image_analysis(analysis, page_num=1, img_num=1)
alt_issues = [i for i in checker.issues if "Alt Text" in i.category]
assert any(i.severity == Severity.WARNING for i in alt_issues)
class TestProcessGoogleVisionResults:
def test_process_vision_with_text(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
results = {
"has_text": True,
"labels": ["Document", "Text", "Paper"],
}
checker._process_google_vision_results(results, page_num=1, img_num=1)
assert any("Analysis" in i.category for i in checker.issues)
def test_process_vision_with_error(self, sample_good_pdf):
checker = EnterprisePDFChecker(str(sample_good_pdf))
results = {"has_text": True, "error": "API error"}
checker._process_google_vision_results(results, page_num=1, img_num=1)
# Error present → should not add issue
assert len(checker.issues) == 0
# ─── Full check_all integration ──────────────────────────────────────
class TestCheckAllIntegration:
@pytest.mark.integration
def test_check_all_good_pdf(self, sample_good_pdf):
checker = EnterprisePDFChecker(
str(sample_good_pdf),
config={"anthropic_api_key": None, "google_api_key": None},
quick_mode=True,
generate_images=False,
)
result = checker.check_all()
assert "accessibility_score" in result
assert "issues" in result
assert "severity_counts" in result
assert "checks_performed" in result
assert result["total_pages"] >= 1
@pytest.mark.integration
def test_check_all_poor_pdf(self, sample_poor_pdf):
checker = EnterprisePDFChecker(
str(sample_poor_pdf),
config={"anthropic_api_key": None, "google_api_key": None},
quick_mode=True,
generate_images=False,
)
result = checker.check_all()
assert "accessibility_score" in result
assert result["total_issues"] >= 0
if __name__ == "__main__":
pytest.main([__file__, "-v"])