ai_qc/backend/tests/test_diff_engine_formatting_integration.py
nickviljoen 2b1bb9ccf0 feat(diff_engine): merge formatting_diff findings into pair_diffs
run_page_pair_diff now invokes compute_formatting_diff alongside the
LLM call for each aligned pair. When the deterministic layer finds
typographic flips on a page the LLM saw as identical, the pair is
re-classified as having differences with medium severity. Each
aggregated finding contributes to the global medium-severity tally.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 10:03:54 +02:00

62 lines
2.3 KiB
Python

"""Smoke test: run_page_pair_diff merges formatting findings into pair_diffs."""
import pytest
from document_mode.diff_engine import run_page_pair_diff
def _page(page_num, raw_text, spans, image_path='/tmp/dummy.png'):
return {
'page_num': page_num,
'raw_text': raw_text,
'spans': spans,
'image_path': image_path,
'fonts_used': [],
}
def _span(text, bold=False):
return {'text': text, 'bold': bold, 'italic': False, 'font': 'Helvetica',
'size': 10.0, 'color': '#000000', 'bbox': (0, 10, 100, 22)}
def test_formatting_findings_surface_when_llm_returns_identical(tmp_path):
# Create real dummy PNGs since _diff_one_pair tries to open them via PIL.
from PIL import Image as PILImage
img_path = tmp_path / "dummy.png"
PILImage.new('RGB', (10, 10)).save(img_path)
old_pages = [_page(
1,
"Theft of personal belongings if your car is left unattended unless windows are closed.",
[_span("Theft of personal belongings if your car is left unattended", bold=True)],
image_path=str(img_path),
)]
new_pages = [_page(
1,
"Theft of personal belongings if your car is left unattended unless windows are closed.",
[_span("Theft of personal belongings if your car is left unattended", bold=False)],
image_path=str(img_path),
)]
# LLM says: no differences. We expect the deterministic layer to override.
def fake_llm(prompt, old_img, new_img, model_version=None):
return (
'{"differences_found": false, "added": [], "removed": [], '
'"modified": [], "moved": [], "style_changes": [], '
'"severity": "none", "summary": "Identical."}',
{'prompt_tokens': 100, 'completion_tokens': 20, 'total_tokens': 120},
)
result = run_page_pair_diff(
old_ingest={'pages': old_pages},
new_ingest={'pages': new_pages},
call_gemini_vision_fn=fake_llm,
)
pair_diff = result['pair_diffs']['1->1']['diff']
assert pair_diff['differences_found'] is True
assert pair_diff['severity'] == 'medium'
assert len(pair_diff['formatting_changes']) == 1
assert pair_diff['formatting_changes'][0]['attribute'] == 'bold'
assert result['totals']['severity_counts']['medium'] >= 1