feat(diff_engine): merge formatting_diff findings into pair_diffs

run_page_pair_diff now invokes compute_formatting_diff alongside the
LLM call for each aligned pair. When the deterministic layer finds
typographic flips on a page the LLM saw as identical, the pair is
re-classified as having differences with medium severity. Each
aggregated finding contributes to the global medium-severity tally.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
nickviljoen 2026-05-19 10:03:54 +02:00
parent d21a8a276d
commit 2b1bb9ccf0
2 changed files with 94 additions and 0 deletions

View file

@ -26,6 +26,8 @@ from typing import Dict, List, Optional, Tuple
from PIL import Image
from document_mode.formatting_diff import compute_formatting_diff
# Similarity threshold for considering two pages "the same page modified"
# vs "an inserted/removed page". Tuned for policy docs where page-level text
@ -311,6 +313,26 @@ def run_page_pair_diff(
if not old_p or not new_p or not old_p.get('image_path') or not new_p.get('image_path'):
return entry, None
result = _diff_one_pair(old_p, new_p, call_gemini_vision_fn, model_version)
# Deterministic formatting diff — runs alongside the LLM diff.
fmt = compute_formatting_diff(
old_p.get('spans') or [],
new_p.get('spans') or [],
old_p['page_num'],
new_p['page_num'],
)
diff = result.setdefault('diff', {})
diff['formatting_changes'] = fmt['formatting_changes']
if fmt['finding_count'] > 0:
# If the LLM saw the page as identical but the deterministic
# layer found typographic flips, we still need the report to
# render the pair as "has changes".
diff['differences_found'] = True
# Each aggregated finding contributes one medium severity entry.
# Bump the pair's overall severity to at least 'medium' so the
# pair-card pill reflects the finding count.
if diff.get('severity') in (None, 'none'):
diff['severity'] = 'medium'
return entry, result
with concurrent.futures.ThreadPoolExecutor(max_workers=parallel_pairs) as pool:
@ -345,6 +367,16 @@ def run_page_pair_diff(
sev = d['diff'].get('severity') or 'none'
if sev in severity_counts:
severity_counts[sev] += 1
# Each formatting-change finding counts as an additional medium entry,
# so a page with N findings contributes N+1 mediums (the +1 from the
# base severity already counted above, N more from the findings).
fmt_findings = d['diff'].get('formatting_changes') or []
if fmt_findings:
# The base severity was already bumped to >= medium in _run when
# findings exist; here we add the additional findings minus the
# one already accounted for.
extra = max(0, len(fmt_findings) - 1)
severity_counts['medium'] += extra
return {
'alignment': alignment,

View file

@ -0,0 +1,62 @@
"""Smoke test: run_page_pair_diff merges formatting findings into pair_diffs."""
import pytest
from document_mode.diff_engine import run_page_pair_diff
def _page(page_num, raw_text, spans, image_path='/tmp/dummy.png'):
return {
'page_num': page_num,
'raw_text': raw_text,
'spans': spans,
'image_path': image_path,
'fonts_used': [],
}
def _span(text, bold=False):
return {'text': text, 'bold': bold, 'italic': False, 'font': 'Helvetica',
'size': 10.0, 'color': '#000000', 'bbox': (0, 10, 100, 22)}
def test_formatting_findings_surface_when_llm_returns_identical(tmp_path):
# Create real dummy PNGs since _diff_one_pair tries to open them via PIL.
from PIL import Image as PILImage
img_path = tmp_path / "dummy.png"
PILImage.new('RGB', (10, 10)).save(img_path)
old_pages = [_page(
1,
"Theft of personal belongings if your car is left unattended unless windows are closed.",
[_span("Theft of personal belongings if your car is left unattended", bold=True)],
image_path=str(img_path),
)]
new_pages = [_page(
1,
"Theft of personal belongings if your car is left unattended unless windows are closed.",
[_span("Theft of personal belongings if your car is left unattended", bold=False)],
image_path=str(img_path),
)]
# LLM says: no differences. We expect the deterministic layer to override.
def fake_llm(prompt, old_img, new_img, model_version=None):
return (
'{"differences_found": false, "added": [], "removed": [], '
'"modified": [], "moved": [], "style_changes": [], '
'"severity": "none", "summary": "Identical."}',
{'prompt_tokens': 100, 'completion_tokens': 20, 'total_tokens': 120},
)
result = run_page_pair_diff(
old_ingest={'pages': old_pages},
new_ingest={'pages': new_pages},
call_gemini_vision_fn=fake_llm,
)
pair_diff = result['pair_diffs']['1->1']['diff']
assert pair_diff['differences_found'] is True
assert pair_diff['severity'] == 'medium'
assert len(pair_diff['formatting_changes']) == 1
assert pair_diff['formatting_changes'][0]['attribute'] == 'bold'
assert result['totals']['severity_counts']['medium'] >= 1