Previously every boolean attribute rendered as "Bold → Regular", producing "Italic: Bold → Regular" for italic flips. Now the helper takes the attribute name and emits "Italic → Regular" or "Bold → Regular" depending on which boolean attribute is being shown. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
419 lines
18 KiB
Python
419 lines
18 KiB
Python
"""Diff report writer for AXA Old-vs-New document mode.
|
||
|
||
Distinct from result_writer.py — the diff report's shape is fundamentally
|
||
different (alignment table, page-pair cards, severity breakdowns) so it
|
||
gets its own module rather than overloading the single-doc writer.
|
||
|
||
Outputs:
|
||
<output_dir>/<session_id>_<old_basename>_vs_<new_basename>_diff_data.json
|
||
<output_dir>/<session_id>_<old_basename>_vs_<new_basename>_diff_report.html
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import html
|
||
import json
|
||
import os
|
||
from typing import Dict, List, Optional
|
||
|
||
|
||
def _slug(name: str) -> str:
|
||
base = os.path.splitext(os.path.basename(name))[0]
|
||
return base.replace(' ', '_').replace('/', '_')[:60]
|
||
|
||
|
||
def _severity_class(sev: str) -> str:
|
||
return {
|
||
'high': 'sev-high',
|
||
'medium': 'sev-medium',
|
||
'low': 'sev-low',
|
||
'none': 'sev-none',
|
||
}.get(sev, 'sev-none')
|
||
|
||
|
||
def _status_class(status: str) -> str:
|
||
return {
|
||
'matched': 'status-matched',
|
||
'added': 'status-added',
|
||
'removed': 'status-removed',
|
||
}.get(status, '')
|
||
|
||
|
||
def _render_alignment_table(alignment: List[Dict]) -> str:
|
||
rows = []
|
||
for entry in alignment:
|
||
old = entry.get('old_page')
|
||
new = entry.get('new_page')
|
||
sim = entry.get('similarity')
|
||
sim_str = f"{sim:.2f}" if isinstance(sim, (int, float)) else '—'
|
||
status = entry.get('status', '')
|
||
rows.append(f"""
|
||
<tr class='{_status_class(status)}'>
|
||
<td>{old if old is not None else '—'}</td>
|
||
<td>{new if new is not None else '—'}</td>
|
||
<td>{sim_str}</td>
|
||
<td><span class='status-pill {_status_class(status)}'>{html.escape(status)}</span></td>
|
||
</tr>
|
||
""")
|
||
return f"""
|
||
<table class='alignment-table'>
|
||
<thead><tr><th>Old page</th><th>New page</th><th>Similarity</th><th>Status</th></tr></thead>
|
||
<tbody>{''.join(rows)}</tbody>
|
||
</table>
|
||
"""
|
||
|
||
|
||
def _render_diff_list(items: List[str], css_class: str, label: str, icon: str) -> str:
|
||
if not items:
|
||
return ''
|
||
bullets = ''.join(f"<li>{html.escape(it)}</li>" for it in items)
|
||
return f"""
|
||
<div class='diff-block {css_class}'>
|
||
<div class='diff-label'>{icon} {label}</div>
|
||
<ul>{bullets}</ul>
|
||
</div>
|
||
"""
|
||
|
||
|
||
def _render_formatting_block(findings: List[Dict]) -> str:
|
||
if not findings:
|
||
return ''
|
||
|
||
def _fmt_value(v, attribute):
|
||
if isinstance(v, bool):
|
||
if attribute == 'italic':
|
||
return 'Italic' if v else 'Regular'
|
||
return 'Bold' if v else 'Regular'
|
||
return str(v)
|
||
|
||
items = []
|
||
for f in findings:
|
||
attr = f.get('attribute', '')
|
||
old_v = _fmt_value(f.get('old_value'), attr)
|
||
new_v = _fmt_value(f.get('new_value'), attr)
|
||
total = f.get('total_span_count', 0)
|
||
page_wide = f.get('page_wide', False)
|
||
quotes = f.get('example_quotes', []) or []
|
||
|
||
if page_wide:
|
||
prefix = f"<strong>Page-wide {html.escape(attr)} change</strong>: {html.escape(old_v)} → {html.escape(new_v)}"
|
||
else:
|
||
prefix = f"<strong>{html.escape(attr).capitalize()}: {html.escape(old_v)} → {html.escape(new_v)}</strong>"
|
||
|
||
quote_html = ''
|
||
if quotes:
|
||
quoted = ', '.join(f'“{html.escape(q)}”' for q in quotes[:3])
|
||
extra = total - len(quotes[:3])
|
||
extra_html = f" <span class='muted'>…and {extra} more</span>" if extra > 0 else ''
|
||
quote_html = f" ({total} span{'s' if total != 1 else ''}): {quoted}{extra_html}"
|
||
|
||
items.append(f"<li>{prefix}{quote_html}</li>")
|
||
|
||
return f"""
|
||
<div class='diff-block block-style'>
|
||
<div class='diff-label'>🎨 Formatting changes</div>
|
||
<ul>{''.join(items)}</ul>
|
||
</div>
|
||
"""
|
||
|
||
|
||
def _render_pair_card(entry: Dict, pair_diffs: Dict) -> str:
|
||
old = entry['old_page']
|
||
new = entry['new_page']
|
||
status = entry['status']
|
||
sim = entry.get('similarity')
|
||
|
||
# Added or removed entire pages — different shape
|
||
if status == 'added':
|
||
return f"""
|
||
<div class='pair-card status-added'>
|
||
<div class='pair-header'>
|
||
<span class='page-label'>+ Page added in new version</span>
|
||
<span class='page-coords'>new page <strong>{new}</strong></span>
|
||
</div>
|
||
<div class='pair-body'>
|
||
<em class='muted'>This page exists in the new version but had no counterpart in the old version.</em>
|
||
</div>
|
||
</div>
|
||
"""
|
||
if status == 'removed':
|
||
return f"""
|
||
<div class='pair-card status-removed'>
|
||
<div class='pair-header'>
|
||
<span class='page-label'>− Page removed in new version</span>
|
||
<span class='page-coords'>old page <strong>{old}</strong></span>
|
||
</div>
|
||
<div class='pair-body'>
|
||
<em class='muted'>This page was in the old version but is not in the new version.</em>
|
||
</div>
|
||
</div>
|
||
"""
|
||
|
||
# Matched pair — render diff result
|
||
key = f"{old}->{new}"
|
||
pair = pair_diffs.get(key, {}).get('diff') or {}
|
||
sev = pair.get('severity', 'none')
|
||
summary = pair.get('summary', '')
|
||
differences_found = pair.get('differences_found', False)
|
||
|
||
if not differences_found and not pair.get('error'):
|
||
return f"""
|
||
<details class='pair-card status-matched-clean'>
|
||
<summary>
|
||
<span class='page-label'>= No differences detected</span>
|
||
<span class='page-coords'>old <strong>{old}</strong> ↔ new <strong>{new}</strong> · sim {sim:.2f}</span>
|
||
<span class='sev-pill sev-none'>identical</span>
|
||
</summary>
|
||
<div class='pair-body'><em class='muted'>{html.escape(summary or "Pages compared as visually identical.")}</em></div>
|
||
</details>
|
||
"""
|
||
|
||
blocks = []
|
||
blocks.append(_render_diff_list(pair.get('added') or [], 'block-added', 'Added', '➕'))
|
||
blocks.append(_render_diff_list(pair.get('removed') or [], 'block-removed', 'Removed', '➖'))
|
||
blocks.append(_render_diff_list(pair.get('modified') or [], 'block-modified', 'Modified', '✎'))
|
||
blocks.append(_render_diff_list(pair.get('moved') or [], 'block-moved', 'Moved', '↔'))
|
||
blocks.append(_render_diff_list(pair.get('style_changes') or [], 'block-style', 'Style changes', '🎨'))
|
||
blocks.append(_render_formatting_block(pair.get('formatting_changes') or []))
|
||
|
||
error_block = ''
|
||
if pair.get('error'):
|
||
error_block = f"<div class='diff-error'>⚠️ {html.escape(pair['error'])}</div>"
|
||
|
||
return f"""
|
||
<details class='pair-card' open>
|
||
<summary>
|
||
<span class='page-label'>old <strong>{old}</strong> ↔ new <strong>{new}</strong></span>
|
||
<span class='page-coords'>sim {sim:.2f}</span>
|
||
<span class='sev-pill {_severity_class(sev)}'>{html.escape(sev)}</span>
|
||
</summary>
|
||
<div class='pair-body'>
|
||
<p class='pair-summary'>{html.escape(summary or '')}</p>
|
||
{error_block}
|
||
{''.join(blocks)}
|
||
</div>
|
||
</details>
|
||
"""
|
||
|
||
|
||
def _render_at_a_glance(totals: Dict, doc_summary: Dict) -> str:
|
||
sev = totals.get('severity_counts', {})
|
||
return f"""
|
||
<div class='glance-grid'>
|
||
<div class='glance-card'>
|
||
<div class='glance-num'>{totals.get('old_page_count', 0)} → {totals.get('new_page_count', 0)}</div>
|
||
<div class='glance-label'>Page count</div>
|
||
</div>
|
||
<div class='glance-card status-added'>
|
||
<div class='glance-num'>{totals.get('pages_added', 0)}</div>
|
||
<div class='glance-label'>Pages added</div>
|
||
</div>
|
||
<div class='glance-card status-removed'>
|
||
<div class='glance-num'>{totals.get('pages_removed', 0)}</div>
|
||
<div class='glance-label'>Pages removed</div>
|
||
</div>
|
||
<div class='glance-card'>
|
||
<div class='glance-num'>{totals.get('pages_modified', 0)}</div>
|
||
<div class='glance-label'>Pages modified</div>
|
||
</div>
|
||
<div class='glance-card'>
|
||
<div class='glance-num'>{totals.get('pages_unchanged', 0)}</div>
|
||
<div class='glance-label'>Pages unchanged</div>
|
||
</div>
|
||
<div class='glance-card sev-high'>
|
||
<div class='glance-num'>{sev.get('high', 0)}</div>
|
||
<div class='glance-label'>High severity</div>
|
||
</div>
|
||
<div class='glance-card sev-medium'>
|
||
<div class='glance-num'>{sev.get('medium', 0)}</div>
|
||
<div class='glance-label'>Medium severity</div>
|
||
</div>
|
||
<div class='glance-card sev-low'>
|
||
<div class='glance-num'>{sev.get('low', 0)}</div>
|
||
<div class='glance-label'>Low severity</div>
|
||
</div>
|
||
</div>
|
||
"""
|
||
|
||
|
||
def _render_html(result: Dict) -> str:
|
||
old_pdf = result.get('old_pdf', {})
|
||
new_pdf = result.get('new_pdf', {})
|
||
totals = result.get('totals', {})
|
||
doc_summary = result.get('document_summary', {})
|
||
alignment = result.get('alignment', [])
|
||
pair_diffs = result.get('pair_diffs', {})
|
||
score = doc_summary.get('overall_score', 0)
|
||
grade = doc_summary.get('grade', '')
|
||
|
||
glance = _render_at_a_glance(totals, doc_summary)
|
||
alignment_table = _render_alignment_table(alignment)
|
||
pair_cards = '\n'.join(_render_pair_card(entry, pair_diffs) for entry in alignment)
|
||
|
||
title = f"Diff Report — {old_pdf.get('filename', 'old')} vs {new_pdf.get('filename', 'new')}"
|
||
|
||
return f"""<!DOCTYPE html>
|
||
<html lang='en'>
|
||
<head>
|
||
<meta charset='utf-8'>
|
||
<title>{html.escape(title)}</title>
|
||
<style>
|
||
body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; margin: 0; background: #f5f6f8; color: #222; }}
|
||
.wrap {{ max-width: 1200px; margin: 0 auto; padding: 24px; }}
|
||
h1 {{ margin: 0 0 4px; font-size: 22px; }}
|
||
h2 {{ margin: 24px 0 8px; font-size: 16px; color: #111; }}
|
||
.meta {{ color: #666; font-size: 13px; margin-bottom: 20px; }}
|
||
.muted {{ color: #888; }}
|
||
|
||
.versions-card {{ background: white; border-radius: 8px; padding: 18px 20px; box-shadow: 0 1px 3px rgba(0,0,0,0.06); margin-bottom: 16px; }}
|
||
.versions-card .vs-row {{ display: flex; align-items: center; gap: 16px; font-size: 14px; }}
|
||
.versions-card .vs-old, .versions-card .vs-new {{ flex: 1; padding: 10px 14px; border-radius: 6px; }}
|
||
.versions-card .vs-old {{ background: #fceac0; }}
|
||
.versions-card .vs-new {{ background: #d6f0d8; }}
|
||
.versions-card .vs-arrow {{ font-size: 24px; color: #888; }}
|
||
|
||
.overall-card {{ background: white; border-radius: 8px; padding: 20px; box-shadow: 0 1px 3px rgba(0,0,0,0.06); margin-bottom: 16px; display: flex; align-items: center; gap: 24px; }}
|
||
.overall-score {{ font-size: 48px; font-weight: 600; color: #111; }}
|
||
.grade-badge {{ padding: 4px 12px; border-radius: 999px; font-size: 12px; font-weight: 700; letter-spacing: 0.5px; background: #eef2f7; color: #2a4060; }}
|
||
|
||
.glance-grid {{ display: grid; grid-template-columns: repeat(4, 1fr); gap: 8px; margin-bottom: 16px; }}
|
||
.glance-card {{ background: white; border-radius: 8px; padding: 14px; box-shadow: 0 1px 3px rgba(0,0,0,0.06); text-align: center; }}
|
||
.glance-card.status-added {{ background: #d6f0d8; }}
|
||
.glance-card.status-removed {{ background: #f4d4d4; }}
|
||
.glance-card.sev-high {{ background: #f4d4d4; }}
|
||
.glance-card.sev-medium {{ background: #fceac0; }}
|
||
.glance-card.sev-low {{ background: #eef2f7; }}
|
||
.glance-num {{ font-size: 26px; font-weight: 700; color: #222; }}
|
||
.glance-label {{ font-size: 12px; color: #555; margin-top: 4px; }}
|
||
|
||
.alignment-table {{ width: 100%; border-collapse: collapse; background: white; border-radius: 8px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.06); margin-bottom: 12px; font-size: 13px; }}
|
||
.alignment-table th, .alignment-table td {{ text-align: left; padding: 6px 12px; border-bottom: 1px solid #eee; }}
|
||
.alignment-table th {{ background: #fafafa; font-weight: 600; color: #555; }}
|
||
.alignment-table tr:last-child td {{ border-bottom: none; }}
|
||
.alignment-table tr.status-added td {{ background: #f4fcf5; }}
|
||
.alignment-table tr.status-removed td {{ background: #fdf3f3; }}
|
||
|
||
.status-pill {{ font-size: 11px; padding: 2px 8px; border-radius: 999px; font-weight: 600; background: #eef2f7; color: #4a5a72; }}
|
||
.status-pill.status-matched {{ background: #eef5ff; color: #2c4f8c; }}
|
||
.status-pill.status-added {{ background: #d6f0d8; color: #1f6a2a; }}
|
||
.status-pill.status-removed {{ background: #f4d4d4; color: #8a1f1f; }}
|
||
|
||
.pair-card {{ background: white; border-radius: 8px; padding: 14px 18px; margin-bottom: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.06); border-left: 3px solid transparent; }}
|
||
.pair-card[open] {{ padding-bottom: 18px; }}
|
||
.pair-card.status-matched-clean {{ border-left-color: #ccc; opacity: 0.85; }}
|
||
.pair-card.status-added {{ border-left-color: #2a8a3a; background: #f4fcf5; }}
|
||
.pair-card.status-removed {{ border-left-color: #b53030; background: #fdf3f3; }}
|
||
.pair-card summary {{ cursor: pointer; display: flex; align-items: center; justify-content: space-between; gap: 12px; font-size: 14px; list-style: none; }}
|
||
.pair-card summary::-webkit-details-marker {{ display: none; }}
|
||
.pair-card summary::before {{ content: '▸'; display: inline-block; transition: transform .15s; margin-right: 6px; color: #888; }}
|
||
.pair-card[open] summary::before {{ transform: rotate(90deg); }}
|
||
.pair-header {{ display: flex; align-items: center; gap: 12px; flex: 1; }}
|
||
.page-label {{ font-weight: 600; }}
|
||
.page-coords {{ color: #888; font-size: 12px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }}
|
||
.pair-body {{ padding-left: 18px; padding-top: 8px; }}
|
||
.pair-summary {{ color: #444; font-size: 13px; margin: 4px 0 12px; }}
|
||
|
||
.sev-pill {{ font-size: 11px; padding: 2px 10px; border-radius: 999px; font-weight: 600; }}
|
||
.sev-pill.sev-high {{ background: #f4d4d4; color: #8a1f1f; }}
|
||
.sev-pill.sev-medium {{ background: #fceac0; color: #7a5a00; }}
|
||
.sev-pill.sev-low {{ background: #eef2f7; color: #4a5a72; }}
|
||
.sev-pill.sev-none {{ background: #e8efe8; color: #4a6a4a; }}
|
||
|
||
.diff-block {{ background: #fafbfc; border-left: 3px solid #ccc; padding: 8px 14px; margin: 8px 0; border-radius: 4px; }}
|
||
.diff-block.block-added {{ border-left-color: #2a8a3a; }}
|
||
.diff-block.block-removed {{ border-left-color: #b53030; }}
|
||
.diff-block.block-modified {{ border-left-color: #b58a00; }}
|
||
.diff-block.block-moved {{ border-left-color: #2c4f8c; }}
|
||
.diff-block.block-style {{ border-left-color: #8a4ab8; }}
|
||
.diff-label {{ font-weight: 600; font-size: 13px; margin-bottom: 4px; }}
|
||
.diff-block ul {{ margin: 4px 0; padding-left: 22px; }}
|
||
.diff-block li {{ font-size: 13px; line-height: 1.45; margin: 2px 0; }}
|
||
.diff-error {{ background: #fdf3f3; color: #8a1f1f; padding: 8px 12px; border-radius: 4px; font-size: 13px; margin: 8px 0; }}
|
||
|
||
.filter-bar {{ background: white; border-radius: 6px; padding: 10px 14px; margin-bottom: 8px; box-shadow: 0 1px 2px rgba(0,0,0,0.04); font-size: 13px; }}
|
||
.filter-bar label {{ cursor: pointer; margin-right: 12px; }}
|
||
|
||
.cost-line {{ font-size: 12px; color: #666; margin-top: 4px; }}
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<div class='wrap'>
|
||
<h1>Old vs New Diff — {html.escape(result.get('profile_name', ''))}</h1>
|
||
<div class='meta'>{html.escape(result.get('timestamp', ''))}</div>
|
||
|
||
<div class='versions-card'>
|
||
<div class='vs-row'>
|
||
<div class='vs-old'>
|
||
<strong>OLD:</strong> {html.escape(old_pdf.get('filename', ''))}<br>
|
||
<span class='muted'>{old_pdf.get('pages_processed', 0)} pages</span>
|
||
</div>
|
||
<div class='vs-arrow'>→</div>
|
||
<div class='vs-new'>
|
||
<strong>NEW:</strong> {html.escape(new_pdf.get('filename', ''))}<br>
|
||
<span class='muted'>{new_pdf.get('pages_processed', 0)} pages</span>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div class='overall-card'>
|
||
<div>
|
||
<div class='overall-score'>{score}</div>
|
||
<div style='font-size:12px;color:#666;'>Diff score (100 = identical)</div>
|
||
</div>
|
||
<div>
|
||
<span class='grade-badge'>{html.escape(grade)}</span>
|
||
</div>
|
||
<div class='cost-line muted'>
|
||
Tokens: {result.get('token_usage', {}).get('total_tokens', 0):,}
|
||
</div>
|
||
</div>
|
||
|
||
<h2>At a glance</h2>
|
||
{glance}
|
||
|
||
<h2>Page alignment map</h2>
|
||
{alignment_table}
|
||
|
||
<h2>Page-by-page differences</h2>
|
||
{pair_cards}
|
||
|
||
</div>
|
||
</body>
|
||
</html>
|
||
"""
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Public entrypoint
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
|
||
def write_diff_report(
|
||
result: Dict,
|
||
old_filename: str,
|
||
new_filename: str,
|
||
session_id: str,
|
||
output_dir: str,
|
||
output_mode: str = 'both',
|
||
) -> Dict[str, Optional[str]]:
|
||
"""Write JSON + HTML diff reports.
|
||
|
||
Returns: {'json': path or None, 'html': path or None}
|
||
"""
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
base = f"{session_id}_{_slug(old_filename)}_vs_{_slug(new_filename)}_diff"
|
||
paths: Dict[str, Optional[str]] = {'json': None, 'html': None}
|
||
|
||
if output_mode in ('json', 'both'):
|
||
json_path = os.path.join(output_dir, f"{base}_data.json")
|
||
with open(json_path, 'w', encoding='utf-8') as f:
|
||
json.dump(result, f, indent=2, default=str)
|
||
paths['json'] = json_path
|
||
|
||
if output_mode in ('html', 'both'):
|
||
html_path = os.path.join(output_dir, f"{base}_report.html")
|
||
with open(html_path, 'w', encoding='utf-8') as f:
|
||
f.write(_render_html(result))
|
||
paths['html'] = html_path
|
||
|
||
return paths
|