run_page_pair_diff now invokes compute_formatting_diff alongside the LLM call for each aligned pair. When the deterministic layer finds typographic flips on a page the LLM saw as identical, the pair is re-classified as having differences with medium severity. Each aggregated finding contributes to the global medium-severity tally. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
62 lines
2.3 KiB
Python
62 lines
2.3 KiB
Python
"""Smoke test: run_page_pair_diff merges formatting findings into pair_diffs."""
|
|
|
|
import pytest
|
|
|
|
from document_mode.diff_engine import run_page_pair_diff
|
|
|
|
|
|
def _page(page_num, raw_text, spans, image_path='/tmp/dummy.png'):
|
|
return {
|
|
'page_num': page_num,
|
|
'raw_text': raw_text,
|
|
'spans': spans,
|
|
'image_path': image_path,
|
|
'fonts_used': [],
|
|
}
|
|
|
|
|
|
def _span(text, bold=False):
|
|
return {'text': text, 'bold': bold, 'italic': False, 'font': 'Helvetica',
|
|
'size': 10.0, 'color': '#000000', 'bbox': (0, 10, 100, 22)}
|
|
|
|
|
|
def test_formatting_findings_surface_when_llm_returns_identical(tmp_path):
|
|
# Create real dummy PNGs since _diff_one_pair tries to open them via PIL.
|
|
from PIL import Image as PILImage
|
|
img_path = tmp_path / "dummy.png"
|
|
PILImage.new('RGB', (10, 10)).save(img_path)
|
|
|
|
old_pages = [_page(
|
|
1,
|
|
"Theft of personal belongings if your car is left unattended unless windows are closed.",
|
|
[_span("Theft of personal belongings if your car is left unattended", bold=True)],
|
|
image_path=str(img_path),
|
|
)]
|
|
new_pages = [_page(
|
|
1,
|
|
"Theft of personal belongings if your car is left unattended unless windows are closed.",
|
|
[_span("Theft of personal belongings if your car is left unattended", bold=False)],
|
|
image_path=str(img_path),
|
|
)]
|
|
|
|
# LLM says: no differences. We expect the deterministic layer to override.
|
|
def fake_llm(prompt, old_img, new_img, model_version=None):
|
|
return (
|
|
'{"differences_found": false, "added": [], "removed": [], '
|
|
'"modified": [], "moved": [], "style_changes": [], '
|
|
'"severity": "none", "summary": "Identical."}',
|
|
{'prompt_tokens': 100, 'completion_tokens': 20, 'total_tokens': 120},
|
|
)
|
|
|
|
result = run_page_pair_diff(
|
|
old_ingest={'pages': old_pages},
|
|
new_ingest={'pages': new_pages},
|
|
call_gemini_vision_fn=fake_llm,
|
|
)
|
|
|
|
pair_diff = result['pair_diffs']['1->1']['diff']
|
|
assert pair_diff['differences_found'] is True
|
|
assert pair_diff['severity'] == 'medium'
|
|
assert len(pair_diff['formatting_changes']) == 1
|
|
assert pair_diff['formatting_changes'][0]['attribute'] == 'bold'
|
|
assert result['totals']['severity_counts']['medium'] >= 1
|