"""Unit tests for deterministic per-page-pair formatting diff.""" import pytest from document_mode.formatting_diff import compute_formatting_diff def _span(text, bold=False, italic=False, font='Helvetica', size=10.0, color='#000000', bbox=(0, 10, 100, 22)): return { 'text': text, 'bold': bold, 'italic': italic, 'font': font, 'size': size, 'color': color, 'bbox': bbox, } def test_identical_spans_produce_no_findings(): spans_a = [_span("Hello world"), _span("Second paragraph")] spans_b = [_span("Hello world"), _span("Second paragraph")] result = compute_formatting_diff(spans_a, spans_b, 1, 1) assert result['finding_count'] == 0 assert result['formatting_changes'] == [] assert result['severity'] == 'none' def test_bold_flip_is_detected(): spans_a = [_span("Theft of personal belongings", bold=True)] spans_b = [_span("Theft of personal belongings", bold=False)] result = compute_formatting_diff(spans_a, spans_b, 18, 18) assert result['finding_count'] == 1 finding = result['formatting_changes'][0] assert finding['attribute'] == 'bold' assert finding['old_value'] is True assert finding['new_value'] is False assert finding['total_span_count'] == 1 assert "Theft of personal belongings" in finding['example_quotes'] assert result['severity'] == 'medium' def test_aggregates_identical_flips_into_one_finding(): old = [ _span("First sentence that lost bold", bold=True), _span("Second sentence that lost bold", bold=True), _span("Third sentence that lost bold", bold=True), ] new = [ _span("First sentence that lost bold", bold=False), _span("Second sentence that lost bold", bold=False), _span("Third sentence that lost bold", bold=False), ] result = compute_formatting_diff(old, new, 22, 22) assert result['finding_count'] == 1 finding = result['formatting_changes'][0] assert finding['total_span_count'] == 3 assert len(finding['example_quotes']) == 3 assert finding['page_wide'] is True def test_page_wide_flag_false_when_only_subset_flips(): old = [ _span("Lost its bold", bold=True), _span("Stays regular and matches text", bold=False), ] new = [ _span("Lost its bold", bold=False), _span("Stays regular and matches text", bold=False), ] result = compute_formatting_diff(old, new, 5, 5) assert result['finding_count'] == 1 assert result['formatting_changes'][0]['page_wide'] is False def test_short_text_spans_are_ignored(): old = [_span("of", bold=True), _span("the", bold=True)] new = [_span("of", bold=False), _span("the", bold=False)] result = compute_formatting_diff(old, new, 1, 1) assert result['finding_count'] == 0 def test_unmatched_text_is_ignored_not_flagged(): old = [_span("Original sentence that was bold", bold=True)] new = [_span("Completely different replacement copy", bold=False)] result = compute_formatting_diff(old, new, 7, 7) assert result['finding_count'] == 0 def test_size_change_not_flagged(): # Size is intentionally out of scope — rebrand re-exports often change # body-text point sizes by fractions of a point. old = [_span("Body text resized", size=10.00)] new = [_span("Body text resized", size=12.50)] result = compute_formatting_diff(old, new, 1, 1) assert result['finding_count'] == 0 def test_font_change_not_flagged(): # Font swap is intentionally out of scope — caught by the LLM narrative # diff. Reporting it here would drown out bold/italic regressions on # re-branded documents. old = [_span("Body text in original font face", font='AXASans-Regular')] new = [_span("Body text in original font face", font='Helvetica')] result = compute_formatting_diff(old, new, 1, 1) assert result['finding_count'] == 0 def test_color_change_not_flagged(): # Colour is intentionally out of scope for the same rebrand-noise reason. old = [_span("Hyperlink-style text in blue", color='#0066cc')] new = [_span("Hyperlink-style text in blue", color='#000000')] result = compute_formatting_diff(old, new, 1, 1) assert result['finding_count'] == 0 def test_italic_flip_detected(): old = [_span("Block quote that was italicised", italic=True)] new = [_span("Block quote that was italicised", italic=False)] result = compute_formatting_diff(old, new, 1, 1) assert result['finding_count'] == 1 assert result['formatting_changes'][0]['attribute'] == 'italic' def test_duplicate_text_disambiguated_by_y_position(): old = [ _span("Important note", bold=True, bbox=(72, 100, 200, 115)), _span("Important note", bold=True, bbox=(72, 700, 200, 715)), ] new = [ _span("Important note", bold=False, bbox=(72, 100, 200, 115)), _span("Important note", bold=True, bbox=(72, 700, 200, 715)), ] result = compute_formatting_diff(old, new, 1, 1) assert result['finding_count'] == 1 assert result['formatting_changes'][0]['total_span_count'] == 1 def test_single_span_page_not_labelled_page_wide(): # A page with only one matched span that flipped should NOT be page-wide, # even though "all" matched spans flipped — the count is too small. old = [_span("Sole heading on this section-break page", bold=True)] new = [_span("Sole heading on this section-break page", bold=False)] result = compute_formatting_diff(old, new, 1, 1) assert result['finding_count'] == 1 assert result['formatting_changes'][0]['page_wide'] is False def test_two_span_page_not_labelled_page_wide(): # Threshold is 3 — 2 spans flipping is not enough to call page-wide. old = [ _span("First short heading", bold=True), _span("Second short heading", bold=True), ] new = [ _span("First short heading", bold=False), _span("Second short heading", bold=False), ] result = compute_formatting_diff(old, new, 1, 1) assert result['finding_count'] == 1 assert result['formatting_changes'][0]['page_wide'] is False def test_missing_bold_key_treated_as_false_no_phantom_flip(): # A span dict that omits 'bold' entirely should be treated as bold=False # for comparison purposes — not as None, which would falsely flip vs False. old = [{'text': "Body text from older ingest path", 'italic': False, 'font': 'Helvetica', 'size': 10.0, 'color': '#000000', 'bbox': (0, 10, 100, 22)}] new = [{'text': "Body text from older ingest path", 'bold': False, 'italic': False, 'font': 'Helvetica', 'size': 10.0, 'color': '#000000', 'bbox': (0, 10, 100, 22)}] result = compute_formatting_diff(old, new, 1, 1) assert result['finding_count'] == 0 def test_empty_old_spans_returns_no_findings(): result = compute_formatting_diff([], [_span("Some new text")], 1, 1) assert result['finding_count'] == 0 assert result['severity'] == 'none' def test_empty_new_spans_returns_no_findings(): result = compute_formatting_diff([_span("Some old text")], [], 1, 1) assert result['finding_count'] == 0 assert result['severity'] == 'none'