ai_qc/backend/tests/test_formatting_diff.py
nickviljoen d21a8a276d refactor(formatting_diff): harden page_wide threshold + None-key handling
Three review-driven hardening tweaks:
- page_wide now requires ≥3 matched spans (PAGE_WIDE_MIN_SPANS).
  Avoids labelling section-break pages with a single flipped heading
  as page-wide.
- _collect_flips normalises bold/italic via bool() and font/color
  via "or ''" so callers passing dicts without those keys do not
  produce phantom flips against False/''.
- Adds tests for empty span lists and the missing-bold-key case.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 10:01:23 +02:00

218 lines
7.4 KiB
Python

"""Unit tests for deterministic per-page-pair formatting diff."""
import pytest
from document_mode.formatting_diff import compute_formatting_diff
def _span(text, bold=False, italic=False, font='Helvetica', size=10.0,
color='#000000', bbox=(0, 10, 100, 22)):
return {
'text': text, 'bold': bold, 'italic': italic, 'font': font,
'size': size, 'color': color, 'bbox': bbox,
}
def test_identical_spans_produce_no_findings():
spans_a = [_span("Hello world"), _span("Second paragraph")]
spans_b = [_span("Hello world"), _span("Second paragraph")]
result = compute_formatting_diff(spans_a, spans_b, 1, 1)
assert result['finding_count'] == 0
assert result['formatting_changes'] == []
assert result['severity'] == 'none'
def test_bold_flip_is_detected():
spans_a = [_span("Theft of personal belongings", bold=True)]
spans_b = [_span("Theft of personal belongings", bold=False)]
result = compute_formatting_diff(spans_a, spans_b, 18, 18)
assert result['finding_count'] == 1
finding = result['formatting_changes'][0]
assert finding['attribute'] == 'bold'
assert finding['old_value'] is True
assert finding['new_value'] is False
assert finding['total_span_count'] == 1
assert "Theft of personal belongings" in finding['example_quotes']
assert result['severity'] == 'medium'
def test_aggregates_identical_flips_into_one_finding():
old = [
_span("First sentence that lost bold", bold=True),
_span("Second sentence that lost bold", bold=True),
_span("Third sentence that lost bold", bold=True),
]
new = [
_span("First sentence that lost bold", bold=False),
_span("Second sentence that lost bold", bold=False),
_span("Third sentence that lost bold", bold=False),
]
result = compute_formatting_diff(old, new, 22, 22)
assert result['finding_count'] == 1
finding = result['formatting_changes'][0]
assert finding['total_span_count'] == 3
assert len(finding['example_quotes']) == 3
assert finding['page_wide'] is True
def test_page_wide_flag_false_when_only_subset_flips():
old = [
_span("Lost its bold", bold=True),
_span("Stays regular and matches text", bold=False),
]
new = [
_span("Lost its bold", bold=False),
_span("Stays regular and matches text", bold=False),
]
result = compute_formatting_diff(old, new, 5, 5)
assert result['finding_count'] == 1
assert result['formatting_changes'][0]['page_wide'] is False
def test_short_text_spans_are_ignored():
old = [_span("of", bold=True), _span("the", bold=True)]
new = [_span("of", bold=False), _span("the", bold=False)]
result = compute_formatting_diff(old, new, 1, 1)
assert result['finding_count'] == 0
def test_unmatched_text_is_ignored_not_flagged():
old = [_span("Original sentence that was bold", bold=True)]
new = [_span("Completely different replacement copy", bold=False)]
result = compute_formatting_diff(old, new, 7, 7)
assert result['finding_count'] == 0
def test_size_tolerance_005pt_not_flagged():
old = [_span("Stable body text under tolerance", size=10.00)]
new = [_span("Stable body text under tolerance", size=10.04)]
result = compute_formatting_diff(old, new, 1, 1)
assert result['finding_count'] == 0
def test_size_change_above_tolerance_flagged():
old = [_span("Body text resized above tolerance", size=10.00)]
new = [_span("Body text resized above tolerance", size=10.50)]
result = compute_formatting_diff(old, new, 1, 1)
assert result['finding_count'] == 1
assert result['formatting_changes'][0]['attribute'] == 'size'
def test_font_change_detected():
old = [_span("Body text in original font face", font='AXASans-Regular')]
new = [_span("Body text in original font face", font='Helvetica')]
result = compute_formatting_diff(old, new, 1, 1)
assert result['finding_count'] == 1
assert result['formatting_changes'][0]['attribute'] == 'font'
assert result['formatting_changes'][0]['old_value'] == 'AXASans-Regular'
assert result['formatting_changes'][0]['new_value'] == 'Helvetica'
def test_color_change_detected():
old = [_span("Hyperlink-style text in blue", color='#0066cc')]
new = [_span("Hyperlink-style text in blue", color='#000000')]
result = compute_formatting_diff(old, new, 1, 1)
assert result['finding_count'] == 1
assert result['formatting_changes'][0]['attribute'] == 'color'
def test_italic_flip_detected():
old = [_span("Block quote that was italicised", italic=True)]
new = [_span("Block quote that was italicised", italic=False)]
result = compute_formatting_diff(old, new, 1, 1)
assert result['finding_count'] == 1
assert result['formatting_changes'][0]['attribute'] == 'italic'
def test_duplicate_text_disambiguated_by_y_position():
old = [
_span("Important note", bold=True, bbox=(72, 100, 200, 115)),
_span("Important note", bold=True, bbox=(72, 700, 200, 715)),
]
new = [
_span("Important note", bold=False, bbox=(72, 100, 200, 115)),
_span("Important note", bold=True, bbox=(72, 700, 200, 715)),
]
result = compute_formatting_diff(old, new, 1, 1)
assert result['finding_count'] == 1
assert result['formatting_changes'][0]['total_span_count'] == 1
def test_single_span_page_not_labelled_page_wide():
# A page with only one matched span that flipped should NOT be page-wide,
# even though "all" matched spans flipped — the count is too small.
old = [_span("Sole heading on this section-break page", bold=True)]
new = [_span("Sole heading on this section-break page", bold=False)]
result = compute_formatting_diff(old, new, 1, 1)
assert result['finding_count'] == 1
assert result['formatting_changes'][0]['page_wide'] is False
def test_two_span_page_not_labelled_page_wide():
# Threshold is 3 — 2 spans flipping is not enough to call page-wide.
old = [
_span("First short heading", bold=True),
_span("Second short heading", bold=True),
]
new = [
_span("First short heading", bold=False),
_span("Second short heading", bold=False),
]
result = compute_formatting_diff(old, new, 1, 1)
assert result['finding_count'] == 1
assert result['formatting_changes'][0]['page_wide'] is False
def test_missing_bold_key_treated_as_false_no_phantom_flip():
# A span dict that omits 'bold' entirely should be treated as bold=False
# for comparison purposes — not as None, which would falsely flip vs False.
old = [{'text': "Body text from older ingest path", 'italic': False,
'font': 'Helvetica', 'size': 10.0, 'color': '#000000',
'bbox': (0, 10, 100, 22)}]
new = [{'text': "Body text from older ingest path", 'bold': False,
'italic': False, 'font': 'Helvetica', 'size': 10.0,
'color': '#000000', 'bbox': (0, 10, 100, 22)}]
result = compute_formatting_diff(old, new, 1, 1)
assert result['finding_count'] == 0
def test_empty_old_spans_returns_no_findings():
result = compute_formatting_diff([], [_span("Some new text")], 1, 1)
assert result['finding_count'] == 0
assert result['severity'] == 'none'
def test_empty_new_spans_returns_no_findings():
result = compute_formatting_diff([_span("Some old text")], [], 1, 1)
assert result['finding_count'] == 0
assert result['severity'] == 'none'