feat(diff_engine): merge formatting_diff findings into pair_diffs
run_page_pair_diff now invokes compute_formatting_diff alongside the LLM call for each aligned pair. When the deterministic layer finds typographic flips on a page the LLM saw as identical, the pair is re-classified as having differences with medium severity. Each aggregated finding contributes to the global medium-severity tally. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d21a8a276d
commit
2b1bb9ccf0
2 changed files with 94 additions and 0 deletions
|
|
@ -26,6 +26,8 @@ from typing import Dict, List, Optional, Tuple
|
|||
|
||||
from PIL import Image
|
||||
|
||||
from document_mode.formatting_diff import compute_formatting_diff
|
||||
|
||||
|
||||
# Similarity threshold for considering two pages "the same page modified"
|
||||
# vs "an inserted/removed page". Tuned for policy docs where page-level text
|
||||
|
|
@ -311,6 +313,26 @@ def run_page_pair_diff(
|
|||
if not old_p or not new_p or not old_p.get('image_path') or not new_p.get('image_path'):
|
||||
return entry, None
|
||||
result = _diff_one_pair(old_p, new_p, call_gemini_vision_fn, model_version)
|
||||
|
||||
# Deterministic formatting diff — runs alongside the LLM diff.
|
||||
fmt = compute_formatting_diff(
|
||||
old_p.get('spans') or [],
|
||||
new_p.get('spans') or [],
|
||||
old_p['page_num'],
|
||||
new_p['page_num'],
|
||||
)
|
||||
diff = result.setdefault('diff', {})
|
||||
diff['formatting_changes'] = fmt['formatting_changes']
|
||||
if fmt['finding_count'] > 0:
|
||||
# If the LLM saw the page as identical but the deterministic
|
||||
# layer found typographic flips, we still need the report to
|
||||
# render the pair as "has changes".
|
||||
diff['differences_found'] = True
|
||||
# Each aggregated finding contributes one medium severity entry.
|
||||
# Bump the pair's overall severity to at least 'medium' so the
|
||||
# pair-card pill reflects the finding count.
|
||||
if diff.get('severity') in (None, 'none'):
|
||||
diff['severity'] = 'medium'
|
||||
return entry, result
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=parallel_pairs) as pool:
|
||||
|
|
@ -345,6 +367,16 @@ def run_page_pair_diff(
|
|||
sev = d['diff'].get('severity') or 'none'
|
||||
if sev in severity_counts:
|
||||
severity_counts[sev] += 1
|
||||
# Each formatting-change finding counts as an additional medium entry,
|
||||
# so a page with N findings contributes N+1 mediums (the +1 from the
|
||||
# base severity already counted above, N more from the findings).
|
||||
fmt_findings = d['diff'].get('formatting_changes') or []
|
||||
if fmt_findings:
|
||||
# The base severity was already bumped to >= medium in _run when
|
||||
# findings exist; here we add the additional findings minus the
|
||||
# one already accounted for.
|
||||
extra = max(0, len(fmt_findings) - 1)
|
||||
severity_counts['medium'] += extra
|
||||
|
||||
return {
|
||||
'alignment': alignment,
|
||||
|
|
|
|||
62
backend/tests/test_diff_engine_formatting_integration.py
Normal file
62
backend/tests/test_diff_engine_formatting_integration.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
"""Smoke test: run_page_pair_diff merges formatting findings into pair_diffs."""
|
||||
|
||||
import pytest
|
||||
|
||||
from document_mode.diff_engine import run_page_pair_diff
|
||||
|
||||
|
||||
def _page(page_num, raw_text, spans, image_path='/tmp/dummy.png'):
|
||||
return {
|
||||
'page_num': page_num,
|
||||
'raw_text': raw_text,
|
||||
'spans': spans,
|
||||
'image_path': image_path,
|
||||
'fonts_used': [],
|
||||
}
|
||||
|
||||
|
||||
def _span(text, bold=False):
|
||||
return {'text': text, 'bold': bold, 'italic': False, 'font': 'Helvetica',
|
||||
'size': 10.0, 'color': '#000000', 'bbox': (0, 10, 100, 22)}
|
||||
|
||||
|
||||
def test_formatting_findings_surface_when_llm_returns_identical(tmp_path):
|
||||
# Create real dummy PNGs since _diff_one_pair tries to open them via PIL.
|
||||
from PIL import Image as PILImage
|
||||
img_path = tmp_path / "dummy.png"
|
||||
PILImage.new('RGB', (10, 10)).save(img_path)
|
||||
|
||||
old_pages = [_page(
|
||||
1,
|
||||
"Theft of personal belongings if your car is left unattended unless windows are closed.",
|
||||
[_span("Theft of personal belongings if your car is left unattended", bold=True)],
|
||||
image_path=str(img_path),
|
||||
)]
|
||||
new_pages = [_page(
|
||||
1,
|
||||
"Theft of personal belongings if your car is left unattended unless windows are closed.",
|
||||
[_span("Theft of personal belongings if your car is left unattended", bold=False)],
|
||||
image_path=str(img_path),
|
||||
)]
|
||||
|
||||
# LLM says: no differences. We expect the deterministic layer to override.
|
||||
def fake_llm(prompt, old_img, new_img, model_version=None):
|
||||
return (
|
||||
'{"differences_found": false, "added": [], "removed": [], '
|
||||
'"modified": [], "moved": [], "style_changes": [], '
|
||||
'"severity": "none", "summary": "Identical."}',
|
||||
{'prompt_tokens': 100, 'completion_tokens': 20, 'total_tokens': 120},
|
||||
)
|
||||
|
||||
result = run_page_pair_diff(
|
||||
old_ingest={'pages': old_pages},
|
||||
new_ingest={'pages': new_pages},
|
||||
call_gemini_vision_fn=fake_llm,
|
||||
)
|
||||
|
||||
pair_diff = result['pair_diffs']['1->1']['diff']
|
||||
assert pair_diff['differences_found'] is True
|
||||
assert pair_diff['severity'] == 'medium'
|
||||
assert len(pair_diff['formatting_changes']) == 1
|
||||
assert pair_diff['formatting_changes'][0]['attribute'] == 'bold'
|
||||
assert result['totals']['severity_counts']['medium'] >= 1
|
||||
Loading…
Add table
Reference in a new issue