AXA's accessibility QC team uses axes4 PAC (PDF/UA-1 / Matterhorn Protocol) as their compliance gate, but our existing 9-criterion deterministic check runs surface-level only and would pass documents PAC fails. Wired up the existing _run_verapdf() stub so veraPDF — the open-source Matterhorn implementation — runs as a subprocess and drives the score when available. Verified locally: veraPDF on EAA_v1.pdf reports the exact same Content (86) and Metadata (1) failure counts as PAC's report on the same document family, confirming protocol parity. Falls back cleanly to the deterministic layer when veraPDF isn't installed, so deploys are safe before the binary lands on dev/prod servers. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
718 lines
31 KiB
Python
718 lines
31 KiB
Python
"""Result serialisation for document-mode QC.
|
||
|
||
Writes both a structured JSON file (full per-check + per-page drilldown) and
|
||
a self-contained HTML report optimised for fast triage:
|
||
|
||
• Top "Findings at a glance" panel — one line per check
|
||
• Per-check sections with structured findings tables
|
||
• Per-page accordion (collapsed by default; "show only failing" filter)
|
||
|
||
Filename convention mirrors single-asset mode:
|
||
<output_dir>/<session_id>_<filename>_data.json
|
||
<output_dir>/<session_id>_<filename>_report.html
|
||
"""
|
||
|
||
import html
|
||
import json
|
||
import os
|
||
from typing import Dict, List, Optional
|
||
|
||
|
||
def _slugify_filename(name: str) -> str:
|
||
base = os.path.splitext(os.path.basename(name))[0]
|
||
return base.replace(' ', '_').replace('/', '_')
|
||
|
||
|
||
def _score_class(score: float) -> str:
|
||
if score >= 8:
|
||
return 'score-good'
|
||
if score >= 6:
|
||
return 'score-ok'
|
||
return 'score-bad'
|
||
|
||
|
||
def _pill(text: str, kind: str = 'neutral') -> str:
|
||
classes = {
|
||
'good': 'pill pill-good',
|
||
'ok': 'pill pill-ok',
|
||
'bad': 'pill pill-bad',
|
||
'neutral': 'pill',
|
||
}
|
||
return f'<span class="{classes.get(kind, classes["neutral"])}">{html.escape(str(text))}</span>'
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Findings renderers — each understands a specific check's structured payload
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
|
||
def _render_font_inventory(findings: Dict) -> str:
|
||
distribution = findings.get('distribution') or []
|
||
if not distribution:
|
||
return '<p>No fonts captured.</p>'
|
||
rows = ''.join(
|
||
f"<tr><td>{html.escape(d['font'])}</td><td>{d['page_count']}</td>"
|
||
f"<td>{html.escape(', '.join(str(p) for p in d['pages'][:30]))}{'…' if len(d['pages']) > 30 else ''}</td></tr>"
|
||
for d in distribution
|
||
)
|
||
return f"""
|
||
<table class='findings-table'>
|
||
<thead><tr><th>Font</th><th>Pages with this font</th><th>Page list (first 30)</th></tr></thead>
|
||
<tbody>{rows}</tbody>
|
||
</table>
|
||
"""
|
||
|
||
|
||
def _render_phone_inventory(findings: Dict) -> str:
|
||
distribution = findings.get('distribution') or []
|
||
if not distribution:
|
||
return '<p>No phone numbers detected.</p>'
|
||
rows = ''.join(
|
||
f"<tr><td>{html.escape(d['number'])}</td><td>{d['occurrences']}</td>"
|
||
f"<td>{html.escape(', '.join(str(p) for p in d['pages'][:30]))}{'…' if len(d['pages']) > 30 else ''}</td></tr>"
|
||
for d in distribution
|
||
)
|
||
return f"""
|
||
<table class='findings-table'>
|
||
<thead><tr><th>Number</th><th>Occurrences</th><th>Pages</th></tr></thead>
|
||
<tbody>{rows}</tbody>
|
||
</table>
|
||
"""
|
||
|
||
|
||
def _render_bold_words_violations(findings: Dict) -> str:
|
||
if findings.get('error') == 'seed_missing':
|
||
return '<p class="muted">No bold-words seed dictionary present.</p>'
|
||
violations = findings.get('violations') or []
|
||
bold_n = findings.get('bold_occurrences', 0)
|
||
non_bold_n = findings.get('non_bold_occurrences', 0)
|
||
excluded = findings.get('definitions_pages_excluded') or []
|
||
dict_size = findings.get('dictionary_size', 0)
|
||
|
||
head = f"""
|
||
<p>
|
||
<strong>{dict_size}</strong> defined terms scanned ·
|
||
<span class='ok'>{bold_n}</span> correctly bold ·
|
||
<span class='bad'>{non_bold_n}</span> non-bold ·
|
||
excluding definitions pages {excluded or '(none)'}.
|
||
</p>
|
||
"""
|
||
|
||
if not violations:
|
||
return head + '<p class="ok">No non-bold occurrences detected.</p>'
|
||
|
||
# Group violations by term for readability
|
||
by_term: Dict[str, List[Dict]] = {}
|
||
for v in violations:
|
||
by_term.setdefault(v['term'], []).append(v)
|
||
|
||
sections = []
|
||
for term, items in sorted(by_term.items(), key=lambda kv: -len(kv[1])):
|
||
rows = ''.join(
|
||
f"<tr><td>{v['page']}</td><td>{html.escape((v['context'] or '')[:200])}</td>"
|
||
f"<td><code>{html.escape(v.get('font') or '?')}</code></td></tr>"
|
||
for v in items[:25]
|
||
)
|
||
more = f"<tr><td colspan=3 class='muted'>…and {len(items) - 25} more occurrences.</td></tr>" if len(items) > 25 else ''
|
||
sections.append(f"""
|
||
<details class='violation-block'>
|
||
<summary><strong>{html.escape(term)}</strong> — {len(items)} non-bold occurrence{'s' if len(items) != 1 else ''}</summary>
|
||
<table class='findings-table'>
|
||
<thead><tr><th>Page</th><th>Context (truncated)</th><th>Font</th></tr></thead>
|
||
<tbody>{rows}{more}</tbody>
|
||
</table>
|
||
</details>
|
||
""")
|
||
|
||
return head + '\n'.join(sections)
|
||
|
||
|
||
def _render_page_numbering(findings: Dict) -> str:
|
||
issues = findings.get('discontinuities') or []
|
||
detected = findings.get('pages_with_detected_number', 0)
|
||
total = findings.get('pages_total', 0)
|
||
head = f"<p>Detected page numbers on {detected}/{total} pages.</p>"
|
||
if not issues:
|
||
return head + '<p class="ok">No discontinuities detected.</p>'
|
||
rows = ''.join(
|
||
f"<tr><td>{i['page_index']}</td><td>{i['expected']}</td><td>{i['detected']}</td></tr>"
|
||
for i in issues
|
||
)
|
||
return head + f"""
|
||
<table class='findings-table'>
|
||
<thead><tr><th>Page index</th><th>Expected</th><th>Detected</th></tr></thead>
|
||
<tbody>{rows}</tbody>
|
||
</table>
|
||
<p class="muted">Heuristic — TOC pages and section dividers can produce false positives.</p>
|
||
"""
|
||
|
||
|
||
def _render_print_code(findings: Dict) -> str:
|
||
pages = findings.get('pages_inspected') or []
|
||
code = findings.get('code_candidates') or []
|
||
refs = findings.get('doc_refs') or []
|
||
dates = findings.get('date_candidates') or []
|
||
versions = findings.get('version_candidates') or []
|
||
return f"""
|
||
<p>Inspected page(s): {pages}</p>
|
||
<p>
|
||
<strong>Code candidates:</strong> {', '.join(code) or '<em>none</em>'}<br>
|
||
<strong>Document refs:</strong> {', '.join(refs) or '<em>none</em>'}<br>
|
||
<strong>Date candidates:</strong> {', '.join(dates) or '<em>none</em>'}<br>
|
||
<strong>Version candidates:</strong> {', '.join(versions) or '<em>none</em>'}
|
||
</p>
|
||
"""
|
||
|
||
|
||
def _render_omg_versioning(findings: Dict) -> str:
|
||
pages = findings.get('pages_inspected') or []
|
||
omg = findings.get('omg_matches') or []
|
||
dates = findings.get('date_matches') or []
|
||
return f"""
|
||
<p>Inspected page(s): {pages}</p>
|
||
<p>
|
||
<strong>OMG codes:</strong> {', '.join(omg) or '<em>none</em>'}<br>
|
||
<strong>Date formats:</strong> {', '.join(dates) or '<em>none</em>'}
|
||
</p>
|
||
"""
|
||
|
||
|
||
def _render_print_preflight(findings: Dict) -> str:
|
||
if findings.get('error'):
|
||
return f"<p class='bad'>Error: {html.escape(str(findings['error']))}</p>"
|
||
|
||
criteria = findings.get('criteria') or []
|
||
passed = findings.get('criteria_passed', 0)
|
||
total = findings.get('criteria_total', 0)
|
||
|
||
head = f"<p><strong>{passed} / {total}</strong> print preflight criteria passed.</p>"
|
||
|
||
rows = []
|
||
for c in criteria:
|
||
marker = '<span class="ok">✓</span>' if c['passed'] else '<span class="bad">✗</span>'
|
||
detail_extra = ''
|
||
d = c.get('detail') or {}
|
||
if d.get('low_dpi_images'):
|
||
items = ''.join(
|
||
f"<li>Page {x['page']}, xref {x['xref']}: {x['effective_dpi']} DPI "
|
||
f"(rendered {x['rendered_inches'][0]} × {x['rendered_inches'][1]} in)</li>"
|
||
for x in d['low_dpi_images'][:10]
|
||
)
|
||
more = (f"<li class='muted'>…and {len(d['low_dpi_images']) - 10} more.</li>"
|
||
if len(d['low_dpi_images']) > 10 else '')
|
||
detail_extra = f"<ul style='margin:4px 0 0;padding-left:18px;font-size:12px'>{items}{more}</ul>"
|
||
elif d.get('colorspace_counts'):
|
||
cs = d['colorspace_counts']
|
||
cs_summary = ', '.join(f"{k}: {v}" for k, v in sorted(cs.items()))
|
||
detail_extra = f"<br><code>{html.escape(cs_summary)}</code>"
|
||
elif d.get('spot_spaces'):
|
||
detail_extra = f"<br><code>{html.escape(', '.join(d['spot_spaces']))}</code>"
|
||
elif d.get('distinct_sizes_pts'):
|
||
sizes = '; '.join(f"{round(s[0]*0.3528,1)}×{round(s[1]*0.3528,1)}mm" for s in d['distinct_sizes_pts'])
|
||
detail_extra = f"<br><code>{html.escape(sizes)}</code>"
|
||
rows.append(f"""
|
||
<tr>
|
||
<td class='center' style='width:50px'>{marker}</td>
|
||
<td><code>{html.escape(c['code'])}</code></td>
|
||
<td>{html.escape(c['title'])}</td>
|
||
<td>{html.escape(c['note'])}{detail_extra}</td>
|
||
</tr>
|
||
""")
|
||
|
||
return head + f"""
|
||
<table class='findings-table'>
|
||
<thead><tr><th></th><th>Code</th><th>Criterion</th><th>Observation</th></tr></thead>
|
||
<tbody>{''.join(rows)}</tbody>
|
||
</table>
|
||
"""
|
||
|
||
|
||
def _render_pdf_accessibility(findings: Dict) -> str:
|
||
if findings.get('error'):
|
||
return f"<p class='bad'>Error: {html.escape(str(findings['error']))}</p>"
|
||
|
||
criteria = findings.get('criteria') or []
|
||
passed = findings.get('criteria_passed', 0)
|
||
total = findings.get('criteria_total', 0)
|
||
verapdf_run = findings.get('verapdf_run', False)
|
||
verapdf = findings.get('verapdf') or {}
|
||
|
||
if verapdf_run:
|
||
verapdf_label = '<span class="ok">enabled</span>'
|
||
elif verapdf.get('error'):
|
||
verapdf_label = f'<span class="bad">error: {html.escape(verapdf["error"])}</span>'
|
||
else:
|
||
verapdf_label = '<span class="muted">not installed on host</span>'
|
||
|
||
head = f"""
|
||
<p>
|
||
<strong>{passed} / {total}</strong> fast criteria passed
|
||
· veraPDF PDF/UA-1: {verapdf_label}
|
||
</p>
|
||
"""
|
||
|
||
verapdf_block = ''
|
||
if verapdf_run:
|
||
compliant = verapdf.get('compliant')
|
||
verdict_html = (
|
||
"<span class='ok'>COMPLIANT</span>" if compliant
|
||
else "<span class='bad'>NOT COMPLIANT</span>"
|
||
)
|
||
rule_rows = []
|
||
for r in verapdf.get('failed_rule_details') or []:
|
||
tags = ', '.join(r.get('tags') or []) or '—'
|
||
samples = r.get('sample_errors') or []
|
||
sample_html = ''
|
||
if samples:
|
||
sample_html = (
|
||
"<br><code>e.g. " + html.escape(samples[0]) + "</code>"
|
||
)
|
||
rule_rows.append(f"""
|
||
<tr>
|
||
<td><code>{html.escape(str(r.get('clause', '')))}-{html.escape(str(r.get('test_number', '')))}</code></td>
|
||
<td class='center'>{r.get('failed_checks', 0)}</td>
|
||
<td><code>{html.escape(tags)}</code></td>
|
||
<td>{html.escape(r.get('description', ''))}{sample_html}</td>
|
||
</tr>
|
||
""")
|
||
|
||
verapdf_block = f"""
|
||
<p><strong>veraPDF verdict:</strong> {verdict_html} ·
|
||
{verapdf.get('passed_rules', 0)} rules passed / {verapdf.get('failed_rules', 0)} failed ·
|
||
{verapdf.get('passed_checks', 0)} checks passed / {verapdf.get('failed_checks', 0)} failed</p>
|
||
"""
|
||
if rule_rows:
|
||
verapdf_block += f"""
|
||
<table class='findings-table'>
|
||
<thead><tr><th>Clause</th><th>Failures</th><th>Tags</th><th>Description</th></tr></thead>
|
||
<tbody>{''.join(rule_rows)}</tbody>
|
||
</table>
|
||
"""
|
||
|
||
rows = []
|
||
for c in criteria:
|
||
marker = '<span class="ok">✓</span>' if c['passed'] else '<span class="bad">✗</span>'
|
||
detail_extra = ''
|
||
d = c.get('detail') or {}
|
||
if d.get('not_embedded'):
|
||
detail_extra = f"<br><code>Non-embedded: {html.escape(', '.join(d['not_embedded']))}</code>"
|
||
elif d.get('image_count') is not None:
|
||
detail_extra = f"<br><code>{d.get('image_count', 0)} images on {d.get('pages_with_images', 0)} pages (first 30)</code>"
|
||
rows.append(f"""
|
||
<tr>
|
||
<td class='center' style='width:50px'>{marker}</td>
|
||
<td><code>{html.escape(c['code'])}</code></td>
|
||
<td>{html.escape(c['title'])}</td>
|
||
<td>{html.escape(c['note'])}{detail_extra}</td>
|
||
</tr>
|
||
""")
|
||
|
||
return head + verapdf_block + f"""
|
||
<table class='findings-table'>
|
||
<thead><tr><th></th><th>Code</th><th>Criterion</th><th>Observation</th></tr></thead>
|
||
<tbody>{''.join(rows)}</tbody>
|
||
</table>
|
||
"""
|
||
|
||
|
||
def _render_generic(findings: Dict, response: str) -> str:
|
||
"""Fallback renderer for checks without a custom structured view —
|
||
just show the response as preformatted text."""
|
||
if response:
|
||
return f"<pre class='response-block'>{html.escape(response)}</pre>"
|
||
return f"<pre class='response-block'>{html.escape(json.dumps(findings, indent=2, default=str))}</pre>"
|
||
|
||
|
||
# Human-readable labels for page_type tags. Kept in sync with
|
||
# document_mode.page_classifier.PAGE_TYPE_LABELS.
|
||
_PAGE_TYPE_LABELS = {
|
||
'cover': 'Cover',
|
||
'checklist': 'Asset Checklist',
|
||
'palette': 'Creative Guidance',
|
||
'notes': 'Yellow Notes',
|
||
'artwork': 'Artwork',
|
||
}
|
||
|
||
|
||
def _page_type_pill(page_type: str) -> str:
|
||
label = _PAGE_TYPE_LABELS.get(page_type, page_type or 'artwork')
|
||
cls = 'page-type-artwork' if page_type == 'artwork' else 'page-type-info'
|
||
return f"<span class='page-type-pill {cls}'>{html.escape(label)}</span>"
|
||
|
||
|
||
def _render_page_each(findings: Dict, response: str) -> str:
|
||
"""Per-page breakdown for any page_each-scope check.
|
||
|
||
Renders a table of pages (page_num, type, score, status) followed by
|
||
expandable per-page response cards. Used by the Boots PPack profile
|
||
where every check runs on every page.
|
||
"""
|
||
page_scores = findings.get('page_scores') or {}
|
||
page_types = findings.get('page_types') or {}
|
||
page_responses = findings.get('page_responses') or {}
|
||
artwork_scores = findings.get('artwork_page_scores') or {}
|
||
informational_scores = findings.get('informational_page_scores') or {}
|
||
failing_artwork = set(findings.get('failing_artwork_pages') or [])
|
||
|
||
if not page_scores:
|
||
# Fall back to the generic response dump if the dispatcher didn't
|
||
# populate per-page data (e.g. a check raised mid-run).
|
||
return _render_generic(findings, response)
|
||
|
||
# Headline summary line
|
||
head = f"""
|
||
<p>
|
||
Ran on <strong>{len(page_scores)}</strong> pages —
|
||
<span class='ok'>{len(artwork_scores)}</span> artwork,
|
||
<span class='muted'>{len(informational_scores)}</span> informational
|
||
(informational pages don't affect Pass/Fail).
|
||
</p>
|
||
"""
|
||
|
||
rows = []
|
||
for page_num, score in sorted(page_scores.items()):
|
||
ptype = page_types.get(page_num, 'artwork')
|
||
is_artwork = ptype == 'artwork'
|
||
score_cls = _score_class(score)
|
||
if is_artwork:
|
||
status_pill = (
|
||
'<span class="pass-pill fail">Below threshold</span>'
|
||
if page_num in failing_artwork
|
||
else '<span class="pass-pill pass">OK</span>'
|
||
)
|
||
else:
|
||
status_pill = '<span class="pass-pill" style="background:#eef2f7;color:#4a5a72;">Informational</span>'
|
||
|
||
response_text = page_responses.get(page_num, '')
|
||
body = (
|
||
f"<details class='page-response'><summary class='muted'>Show details</summary>"
|
||
f"<pre class='response-block'>{html.escape(response_text)}</pre>"
|
||
f"</details>"
|
||
if response_text else ''
|
||
)
|
||
rows.append(f"""
|
||
<tr>
|
||
<td>Page {page_num}</td>
|
||
<td>{_page_type_pill(ptype)}</td>
|
||
<td class='{score_cls}'><strong>{score}</strong></td>
|
||
<td>{status_pill}</td>
|
||
<td>{body}</td>
|
||
</tr>
|
||
""")
|
||
|
||
return head + f"""
|
||
<table class='findings-table'>
|
||
<thead>
|
||
<tr>
|
||
<th>Page</th><th>Page type</th><th>Score</th><th>Status</th><th>Detail</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>{''.join(rows)}</tbody>
|
||
</table>
|
||
"""
|
||
|
||
|
||
_FINDINGS_RENDERERS = {
|
||
'axa_font_inventory': _render_font_inventory,
|
||
'axa_phone_inventory': _render_phone_inventory,
|
||
'axa_bold_words_definitions': _render_bold_words_violations,
|
||
'axa_page_numbering': _render_page_numbering,
|
||
'axa_print_code': _render_print_code,
|
||
'axa_omg_versioning': _render_omg_versioning,
|
||
'axa_pdf_accessibility': _render_pdf_accessibility,
|
||
'axa_print_preflight': _render_print_preflight,
|
||
}
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Main HTML report
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
|
||
def _render_check_section(check_name: str, check_result: Dict) -> str:
|
||
score = check_result.get('score', 0) or 0
|
||
summary = check_result.get('summary', '')
|
||
findings = check_result.get('findings', {}) or {}
|
||
response = check_result.get('response', '') or ''
|
||
scope = check_result.get('scope', '?')
|
||
pass_flag = check_result.get('pass', False)
|
||
|
||
renderer = _FINDINGS_RENDERERS.get(check_name)
|
||
if renderer:
|
||
body = renderer(findings)
|
||
elif scope == 'page_each':
|
||
body = _render_page_each(findings, response)
|
||
else:
|
||
body = _render_generic(findings, response)
|
||
|
||
return f"""
|
||
<details class='check-section' open>
|
||
<summary>
|
||
<span class='check-name'>{html.escape(check_name)}</span>
|
||
<span class='check-meta'>
|
||
<span class='scope-pill'>scope: {html.escape(scope)}</span>
|
||
<span class='score-pill {_score_class(score)}'>{score}</span>
|
||
{('<span class="pass-pill pass">Pass</span>' if pass_flag else '<span class="pass-pill fail">Fail</span>')}
|
||
</span>
|
||
</summary>
|
||
<p class='check-summary'>{html.escape(summary)}</p>
|
||
<div class='check-body'>{body}</div>
|
||
</details>
|
||
"""
|
||
|
||
|
||
def _render_at_a_glance(check_summaries: Dict[str, Dict]) -> str:
|
||
rows = []
|
||
for name, s in check_summaries.items():
|
||
score = s.get('score', 0) or 0
|
||
rows.append(f"""
|
||
<tr>
|
||
<td><a href='#check-{html.escape(name)}'>{html.escape(name)}</a></td>
|
||
<td><span class='scope-pill'>{html.escape(s.get('scope', '?'))}</span></td>
|
||
<td class='{_score_class(score)}'><strong>{score}</strong></td>
|
||
<td>{('<span class="pass-pill pass">Pass</span>' if s.get('pass') else '<span class="pass-pill fail">Fail</span>')}</td>
|
||
<td>{html.escape(s.get('summary', ''))}</td>
|
||
</tr>
|
||
""")
|
||
return f"""
|
||
<table class='glance-table'>
|
||
<thead><tr><th>Check</th><th>Scope</th><th>Score</th><th>Status</th><th>Headline finding</th></tr></thead>
|
||
<tbody>{''.join(rows)}</tbody>
|
||
</table>
|
||
"""
|
||
|
||
|
||
def _render_page_strip(pages: List[Dict]) -> str:
|
||
"""Per-page strip showing fonts found on each page (and image link if available)."""
|
||
if not pages:
|
||
return ''
|
||
rows = []
|
||
for p in pages:
|
||
fonts = p.get('fonts_used') or []
|
||
ptype = p.get('page_type', 'artwork')
|
||
rows.append(f"""
|
||
<details class='page-row'>
|
||
<summary>
|
||
<span class='page-num'>Page {p['page_num']}</span>
|
||
{_page_type_pill(ptype)}
|
||
<span class='font-count'>{len(fonts)} fonts</span>
|
||
</summary>
|
||
<p class='fonts-on-page'>{html.escape(', '.join(fonts))}</p>
|
||
</details>
|
||
""")
|
||
return ''.join(rows)
|
||
|
||
|
||
def _render_html(result: Dict, original_filename: str) -> str:
|
||
summary = result.get('document_summary', {})
|
||
overall_score = summary.get('overall_score', 0)
|
||
grade = summary.get('grade', '')
|
||
check_summaries = summary.get('check_summaries', {})
|
||
check_results = result.get('check_results', {})
|
||
pages = result.get('pages', [])
|
||
fonts_inventory = (result.get('ingest_metadata') or {}).get('fonts_inventory', [])
|
||
|
||
truncated_banner = ''
|
||
if result.get('truncated'):
|
||
truncated_banner = f"""
|
||
<div class='banner banner-warn'>
|
||
⚠️ Document has {result.get('page_count')} pages — only the first {result.get('pages_processed')} were processed.
|
||
</div>
|
||
"""
|
||
|
||
strict_banner = ''
|
||
if summary.get('strict_grade'):
|
||
violations = summary.get('strict_violations') or []
|
||
if violations:
|
||
# Group violations by page for readability
|
||
by_page: Dict[int, List[Dict]] = {}
|
||
for v in violations:
|
||
by_page.setdefault(v['page'], []).append(v)
|
||
page_rows = []
|
||
for page_num, vs in sorted(by_page.items()):
|
||
check_list = ', '.join(
|
||
f"<code>{html.escape(v['check'])}</code> ({v['score']})"
|
||
for v in vs
|
||
)
|
||
page_rows.append(f"<li>Page {page_num}: {check_list}</li>")
|
||
strict_banner = f"""
|
||
<div class='banner banner-fail'>
|
||
<strong>Strict-grade override triggered.</strong>
|
||
{len(violations)} artwork-page check{'s' if len(violations) != 1 else ''} scored below 6 — overall grade forced to Fail.
|
||
<ul style='margin:6px 0 0 18px;padding:0;'>{''.join(page_rows)}</ul>
|
||
</div>
|
||
"""
|
||
else:
|
||
strict_banner = """
|
||
<div class='banner banner-ok'>
|
||
✓ Strict-grade override active — no artwork-page checks scored below the threshold.
|
||
</div>
|
||
"""
|
||
|
||
glance = _render_at_a_glance(check_summaries)
|
||
check_sections = '\n'.join(
|
||
f"<a id='check-{html.escape(name)}'></a>{_render_check_section(name, check_results.get(name, {}))}"
|
||
for name in check_summaries.keys()
|
||
)
|
||
|
||
fonts_pill_strip = ''.join(
|
||
f"<span class='font-pill'>{html.escape(f)}</span>" for f in fonts_inventory
|
||
) or '<em class="muted">No fonts captured.</em>'
|
||
|
||
page_strip = _render_page_strip(pages)
|
||
|
||
return f"""<!DOCTYPE html>
|
||
<html lang='en'>
|
||
<head>
|
||
<meta charset='utf-8'>
|
||
<title>QC Report — {html.escape(original_filename)}</title>
|
||
<style>
|
||
body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; margin: 0; background: #f5f6f8; color: #222; }}
|
||
.wrap {{ max-width: 1200px; margin: 0 auto; padding: 24px; }}
|
||
h1 {{ margin: 0 0 4px; font-size: 22px; }}
|
||
h2 {{ margin: 28px 0 10px; font-size: 16px; color: #111; }}
|
||
.meta {{ color: #666; font-size: 13px; margin-bottom: 20px; }}
|
||
.ok {{ color: #2a8a3a; }}
|
||
.bad {{ color: #b53030; }}
|
||
.muted {{ color: #888; }}
|
||
.center {{ text-align: center; }}
|
||
code {{ font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 12px; background: #f0f0f0; padding: 1px 4px; border-radius: 3px; }}
|
||
.overall-card {{ background: white; border-radius: 8px; padding: 20px; box-shadow: 0 1px 3px rgba(0,0,0,0.06); margin-bottom: 16px; display: flex; align-items: center; gap: 24px; }}
|
||
.overall-score {{ font-size: 48px; font-weight: 600; }}
|
||
.overall-score.score-good {{ color: #2a8a3a; }}
|
||
.overall-score.score-ok {{ color: #b58a00; }}
|
||
.overall-score.score-bad {{ color: #b53030; }}
|
||
.grade-badge {{ padding: 4px 12px; border-radius: 999px; font-size: 12px; font-weight: 700; letter-spacing: 0.5px; }}
|
||
.grade-Pass {{ background: #d6f0d8; color: #1f6a2a; }}
|
||
.grade-Fail {{ background: #f4d4d4; color: #8a1f1f; }}
|
||
.glance-table, .findings-table {{ width: 100%; border-collapse: collapse; background: white; border-radius: 8px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.06); margin-bottom: 12px; }}
|
||
.glance-table th, .glance-table td, .findings-table th, .findings-table td {{ text-align: left; padding: 8px 12px; border-bottom: 1px solid #eee; font-size: 13px; vertical-align: top; }}
|
||
.glance-table th, .findings-table th {{ background: #fafafa; font-weight: 600; color: #555; }}
|
||
.glance-table tr:last-child td, .findings-table tr:last-child td {{ border-bottom: none; }}
|
||
.glance-table a {{ color: #0a5fc2; text-decoration: none; }}
|
||
.glance-table a:hover {{ text-decoration: underline; }}
|
||
.score-good, td.score-good {{ color: #2a8a3a; }}
|
||
.score-ok, td.score-ok {{ color: #b58a00; }}
|
||
.score-bad, td.score-bad {{ color: #b53030; }}
|
||
.score-pill {{ font-weight: 700; padding: 3px 10px; border-radius: 6px; background: #f0f0f0; font-size: 14px; }}
|
||
.score-pill.score-good {{ background: #d6f0d8; color: #1f6a2a; }}
|
||
.score-pill.score-ok {{ background: #fceac0; color: #7a5a00; }}
|
||
.score-pill.score-bad {{ background: #f4d4d4; color: #8a1f1f; }}
|
||
.scope-pill {{ font-size: 11px; background: #eef2f7; color: #4a5a72; padding: 2px 8px; border-radius: 999px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }}
|
||
.pass-pill {{ font-size: 11px; padding: 2px 8px; border-radius: 999px; font-weight: 600; }}
|
||
.pass-pill.pass {{ background: #d6f0d8; color: #1f6a2a; }}
|
||
.pass-pill.fail {{ background: #f4d4d4; color: #8a1f1f; }}
|
||
.check-section {{ background: white; border-radius: 8px; padding: 14px 18px; margin-bottom: 10px; box-shadow: 0 1px 3px rgba(0,0,0,0.06); }}
|
||
.check-section[open] {{ padding-bottom: 18px; }}
|
||
.check-section summary {{ cursor: pointer; display: flex; align-items: center; justify-content: space-between; gap: 12px; list-style: none; }}
|
||
.check-section summary::-webkit-details-marker {{ display: none; }}
|
||
.check-section summary::before {{ content: '▸'; display: inline-block; transition: transform .15s; margin-right: 6px; color: #888; }}
|
||
.check-section[open] summary::before {{ transform: rotate(90deg); }}
|
||
.check-section .check-name {{ font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 14px; flex: 1; }}
|
||
.check-section .check-meta {{ display: flex; align-items: center; gap: 8px; }}
|
||
.check-summary {{ color: #444; font-size: 13px; margin: 8px 0 14px; padding-left: 18px; }}
|
||
.check-body {{ padding-left: 18px; }}
|
||
.violation-block {{ background: #fafbfc; border-left: 3px solid #b53030; padding: 8px 12px; margin: 6px 0; border-radius: 4px; }}
|
||
.violation-block summary {{ cursor: pointer; font-size: 13px; }}
|
||
.violation-block[open] summary {{ margin-bottom: 8px; }}
|
||
.response-block {{ font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 12px; background: #fafafa; padding: 10px; border-radius: 6px; max-height: 400px; overflow: auto; white-space: pre-wrap; }}
|
||
.font-pill {{ display: inline-block; padding: 2px 8px; background: #eef2f7; border-radius: 999px; margin: 2px 4px 2px 0; font-size: 12px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }}
|
||
.banner {{ padding: 12px 16px; border-radius: 6px; margin-bottom: 12px; font-size: 13px; }}
|
||
.banner-warn {{ background: #fff4d6; color: #7a5a00; border: 1px solid #f0d77b; }}
|
||
.banner-fail {{ background: #fbe5e5; color: #8a1f1f; border: 1px solid #e9b2b2; }}
|
||
.banner-ok {{ background: #e2f4e4; color: #1f6a2a; border: 1px solid #b2d9b8; }}
|
||
.page-type-pill {{ display: inline-block; padding: 2px 8px; border-radius: 999px; font-size: 11px; font-weight: 600; letter-spacing: 0.3px; }}
|
||
.page-type-pill.page-type-artwork {{ background: #d6f0d8; color: #1f6a2a; }}
|
||
.page-type-pill.page-type-info {{ background: #eef2f7; color: #4a5a72; }}
|
||
.page-response summary {{ cursor: pointer; font-size: 12px; padding: 4px 0; }}
|
||
.page-row {{ background: white; padding: 6px 12px; margin-bottom: 4px; border-radius: 4px; box-shadow: 0 1px 2px rgba(0,0,0,0.04); }}
|
||
.page-row summary {{ display: flex; justify-content: space-between; cursor: pointer; font-size: 13px; }}
|
||
.page-num {{ font-weight: 600; }}
|
||
.font-count {{ color: #888; font-size: 12px; }}
|
||
.fonts-on-page {{ font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 12px; color: #555; margin: 6px 0 0; }}
|
||
.filter-bar {{ background: white; border-radius: 6px; padding: 10px 14px; margin-bottom: 8px; box-shadow: 0 1px 2px rgba(0,0,0,0.04); font-size: 13px; }}
|
||
.filter-bar label {{ cursor: pointer; }}
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<div class='wrap'>
|
||
<h1>QC Report — {html.escape(original_filename)}</h1>
|
||
<div class='meta'>
|
||
Profile: <strong>{html.escape(result.get('profile_name', ''))}</strong>
|
||
· Pages processed: {result.get('pages_processed', 0)} / {result.get('page_count', 0)}
|
||
· {html.escape(result.get('timestamp', ''))}
|
||
</div>
|
||
{truncated_banner}
|
||
{strict_banner}
|
||
|
||
<div class='overall-card'>
|
||
<div>
|
||
<div class='overall-score {_score_class(overall_score / 10 if overall_score else 0)}'>{overall_score}</div>
|
||
<div style='font-size:12px;color:#666;'>Overall score (0-100)</div>
|
||
</div>
|
||
<div>
|
||
<span class='grade-badge grade-{grade}'>{grade}</span>
|
||
</div>
|
||
</div>
|
||
|
||
<h2>Findings at a glance</h2>
|
||
{glance}
|
||
|
||
<h2>Check details</h2>
|
||
{check_sections}
|
||
|
||
<h2>Fonts inventory</h2>
|
||
<div class='overall-card' style='display:block;'>
|
||
{fonts_pill_strip}
|
||
</div>
|
||
|
||
<h2>Per-page summary</h2>
|
||
{page_strip}
|
||
</div>
|
||
</body>
|
||
</html>
|
||
"""
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Public entrypoint
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
|
||
def write_document_report(
|
||
result: Dict,
|
||
original_filename: str,
|
||
session_id: str,
|
||
output_dir: str,
|
||
output_mode: str = 'both',
|
||
) -> Dict[str, Optional[str]]:
|
||
"""Write JSON + HTML reports for a document-mode analysis.
|
||
|
||
Args:
|
||
result: aggregated dict from dispatcher.run_document_analysis().
|
||
original_filename: source PDF filename (for naming + display).
|
||
session_id: session id, used as the filename prefix.
|
||
output_dir: pre-created client-scoped output directory.
|
||
output_mode: 'json', 'html', or 'both'.
|
||
|
||
Returns:
|
||
{ 'json': path or None, 'html': path or None }
|
||
"""
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
slug = _slugify_filename(original_filename)
|
||
paths: Dict[str, Optional[str]] = {'json': None, 'html': None}
|
||
|
||
if output_mode in ('json', 'both'):
|
||
json_path = os.path.join(output_dir, f"{session_id}_{slug}_data.json")
|
||
with open(json_path, 'w', encoding='utf-8') as f:
|
||
json.dump(result, f, indent=2, default=str)
|
||
paths['json'] = json_path
|
||
|
||
if output_mode in ('html', 'both'):
|
||
html_path = os.path.join(output_dir, f"{session_id}_{slug}_report.html")
|
||
html_doc = _render_html(result, original_filename)
|
||
with open(html_path, 'w', encoding='utf-8') as f:
|
||
f.write(html_doc)
|
||
paths['html'] = html_path
|
||
|
||
return paths
|