"""Smoke test: run_page_pair_diff merges formatting findings into pair_diffs.""" import pytest from document_mode.diff_engine import run_page_pair_diff def _page(page_num, raw_text, spans, image_path='/tmp/dummy.png'): return { 'page_num': page_num, 'raw_text': raw_text, 'spans': spans, 'image_path': image_path, 'fonts_used': [], } def _span(text, bold=False): return {'text': text, 'bold': bold, 'italic': False, 'font': 'Helvetica', 'size': 10.0, 'color': '#000000', 'bbox': (0, 10, 100, 22)} def test_formatting_findings_surface_when_llm_returns_identical(tmp_path): # Create real dummy PNGs since _diff_one_pair tries to open them via PIL. from PIL import Image as PILImage img_path = tmp_path / "dummy.png" PILImage.new('RGB', (10, 10)).save(img_path) old_pages = [_page( 1, "Theft of personal belongings if your car is left unattended unless windows are closed.", [_span("Theft of personal belongings if your car is left unattended", bold=True)], image_path=str(img_path), )] new_pages = [_page( 1, "Theft of personal belongings if your car is left unattended unless windows are closed.", [_span("Theft of personal belongings if your car is left unattended", bold=False)], image_path=str(img_path), )] # LLM says: no differences. We expect the deterministic layer to override. def fake_llm(prompt, old_img, new_img, model_version=None): return ( '{"differences_found": false, "added": [], "removed": [], ' '"modified": [], "moved": [], "style_changes": [], ' '"severity": "none", "summary": "Identical."}', {'prompt_tokens': 100, 'completion_tokens': 20, 'total_tokens': 120}, ) result = run_page_pair_diff( old_ingest={'pages': old_pages}, new_ingest={'pages': new_pages}, call_gemini_vision_fn=fake_llm, ) pair_diff = result['pair_diffs']['1->1']['diff'] assert pair_diff['differences_found'] is True assert pair_diff['severity'] == 'medium' assert len(pair_diff['formatting_changes']) == 1 assert pair_diff['formatting_changes'][0]['attribute'] == 'bold' assert result['totals']['severity_counts']['medium'] >= 1