ai_qc/backend/tests/test_formatting_diff.py

"""Unit tests for deterministic per-page-pair formatting diff."""

import pytest

from document_mode.formatting_diff import compute_formatting_diff


def _span(text, bold=False, italic=False, font='Helvetica', size=10.0,
          color='#000000', bbox=(0, 10, 100, 22)):
    return {
        'text': text, 'bold': bold, 'italic': italic, 'font': font,
        'size': size, 'color': color, 'bbox': bbox,
    }


def test_identical_spans_produce_no_findings():
    spans_a = [_span("Hello world"), _span("Second paragraph")]
    spans_b = [_span("Hello world"), _span("Second paragraph")]

    result = compute_formatting_diff(spans_a, spans_b, 1, 1)

    assert result['finding_count'] == 0
    assert result['formatting_changes'] == []
    assert result['severity'] == 'none'


def test_bold_flip_is_detected():
    spans_a = [_span("Theft of personal belongings", bold=True)]
    spans_b = [_span("Theft of personal belongings", bold=False)]

    result = compute_formatting_diff(spans_a, spans_b, 18, 18)

    assert result['finding_count'] == 1
    finding = result['formatting_changes'][0]
    assert finding['attribute'] == 'bold'
    assert finding['old_value'] is True
    assert finding['new_value'] is False
    assert finding['total_span_count'] == 1
    assert "Theft of personal belongings" in finding['example_quotes']
    assert result['severity'] == 'medium'


def test_aggregates_identical_flips_into_one_finding():
    old = [
        _span("First sentence that lost bold", bold=True),
        _span("Second sentence that lost bold", bold=True),
        _span("Third sentence that lost bold", bold=True),
    ]
    new = [
        _span("First sentence that lost bold", bold=False),
        _span("Second sentence that lost bold", bold=False),
        _span("Third sentence that lost bold", bold=False),
    ]

    result = compute_formatting_diff(old, new, 22, 22)

    assert result['finding_count'] == 1
    finding = result['formatting_changes'][0]
    assert finding['total_span_count'] == 3
    assert len(finding['example_quotes']) == 3
    assert finding['page_wide'] is True


def test_page_wide_flag_false_when_only_subset_flips():
    old = [
        _span("Lost its bold", bold=True),
        _span("Stays regular and matches text", bold=False),
    ]
    new = [
        _span("Lost its bold", bold=False),
        _span("Stays regular and matches text", bold=False),
    ]

    result = compute_formatting_diff(old, new, 5, 5)

    assert result['finding_count'] == 1
    assert result['formatting_changes'][0]['page_wide'] is False


def test_short_text_spans_are_ignored():
    old = [_span("of", bold=True), _span("the", bold=True)]
    new = [_span("of", bold=False), _span("the", bold=False)]

    result = compute_formatting_diff(old, new, 1, 1)

    assert result['finding_count'] == 0


def test_unmatched_text_is_ignored_not_flagged():
    old = [_span("Original sentence that was bold", bold=True)]
    new = [_span("Completely different replacement copy", bold=False)]

    result = compute_formatting_diff(old, new, 7, 7)

    assert result['finding_count'] == 0


def test_size_change_not_flagged():
    # Size is intentionally out of scope — rebrand re-exports often change
    # body-text point sizes by fractions of a point.
    old = [_span("Body text resized", size=10.00)]
    new = [_span("Body text resized", size=12.50)]

    result = compute_formatting_diff(old, new, 1, 1)

    assert result['finding_count'] == 0


def test_font_change_not_flagged():
    # Font swap is intentionally out of scope — caught by the LLM narrative
    # diff. Reporting it here would drown out bold/italic regressions on
    # re-branded documents.
    old = [_span("Body text in original font face", font='AXASans-Regular')]
    new = [_span("Body text in original font face", font='Helvetica')]

    result = compute_formatting_diff(old, new, 1, 1)

    assert result['finding_count'] == 0


def test_color_change_not_flagged():
    # Colour is intentionally out of scope for the same rebrand-noise reason.
    old = [_span("Hyperlink-style text in blue", color='#0066cc')]
    new = [_span("Hyperlink-style text in blue", color='#000000')]

    result = compute_formatting_diff(old, new, 1, 1)

    assert result['finding_count'] == 0


def test_italic_flip_detected():
    old = [_span("Block quote that was italicised", italic=True)]
    new = [_span("Block quote that was italicised", italic=False)]

    result = compute_formatting_diff(old, new, 1, 1)

    assert result['finding_count'] == 1
    assert result['formatting_changes'][0]['attribute'] == 'italic'


def test_duplicate_text_disambiguated_by_y_position():
    old = [
        _span("Important note", bold=True, bbox=(72, 100, 200, 115)),
        _span("Important note", bold=True, bbox=(72, 700, 200, 715)),
    ]
    new = [
        _span("Important note", bold=False, bbox=(72, 100, 200, 115)),
        _span("Important note", bold=True, bbox=(72, 700, 200, 715)),
    ]

    result = compute_formatting_diff(old, new, 1, 1)

    assert result['finding_count'] == 1
    assert result['formatting_changes'][0]['total_span_count'] == 1


def test_single_span_page_not_labelled_page_wide():
    # A page with only one matched span that flipped should NOT be page-wide,
    # even though "all" matched spans flipped — the count is too small.
    old = [_span("Sole heading on this section-break page", bold=True)]
    new = [_span("Sole heading on this section-break page", bold=False)]

    result = compute_formatting_diff(old, new, 1, 1)

    assert result['finding_count'] == 1
    assert result['formatting_changes'][0]['page_wide'] is False


def test_two_span_page_not_labelled_page_wide():
    # Threshold is 3 — 2 spans flipping is not enough to call page-wide.
    old = [
        _span("First short heading", bold=True),
        _span("Second short heading", bold=True),
    ]
    new = [
        _span("First short heading", bold=False),
        _span("Second short heading", bold=False),
    ]

    result = compute_formatting_diff(old, new, 1, 1)

    assert result['finding_count'] == 1
    assert result['formatting_changes'][0]['page_wide'] is False


def test_missing_bold_key_treated_as_false_no_phantom_flip():
    # A span dict that omits 'bold' entirely should be treated as bold=False
    # for comparison purposes — not as None, which would falsely flip vs False.
    old = [{'text': "Body text from older ingest path", 'italic': False,
            'font': 'Helvetica', 'size': 10.0, 'color': '#000000',
            'bbox': (0, 10, 100, 22)}]
    new = [{'text': "Body text from older ingest path", 'bold': False,
            'italic': False, 'font': 'Helvetica', 'size': 10.0,
            'color': '#000000', 'bbox': (0, 10, 100, 22)}]

    result = compute_formatting_diff(old, new, 1, 1)

    assert result['finding_count'] == 0


def test_empty_old_spans_returns_no_findings():
    result = compute_formatting_diff([], [_span("Some new text")], 1, 1)
    assert result['finding_count'] == 0
    assert result['severity'] == 'none'


def test_empty_new_spans_returns_no_findings():
    result = compute_formatting_diff([_span("Some old text")], [], 1, 1)
    assert result['finding_count'] == 0
    assert result['severity'] == 'none'