From a5784feda66ab2da0dc6e4b02ab1414863242263 Mon Sep 17 00:00:00 2001 From: Vadym Samoilenko Date: Wed, 18 Mar 2026 10:51:10 +0000 Subject: [PATCH] Address client feedback: WCAG badges, table grouping, retention, history UX, AI prompt ethics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Issue 1: Recompute WCAG A/AA compliance badges after dismissing issues (JS + backend); exported reports now reflect updated pass/fail status - Issue 2: Group document-wide table issues into collapsible cards with Dismiss All button; reduces noise for multi-table documents - Issue 3: Split cleanup retention — uploads deleted after 24h, result/meta JSONs retained 30 days (RESULTS_RETENTION_HOURS env var, default 720h) - Issue 4A: Library shows adjusted score when available (.adjusted.json preferred) - Issue 4B: History page groups documents by retention countdown (red/yellow/green sections); adds 30-day retention banner - Issue 5+6: AI prompt updated — describe people by role/action not appearance, use specific brand names; flags images with people for human review Co-Authored-By: Claude Sonnet 4.6 --- .env.example | 6 ++ api.php | 52 +++++++++-- cleanup.py | 33 +++++-- css/styles.css | 32 +++++++ enterprise_pdf_checker.py | 36 +++++++- history.html | 5 +- js/history.js | 181 ++++++++++++++++++++++++++------------ js/results.js | 113 +++++++++++++++++++++++- 8 files changed, 381 insertions(+), 77 deletions(-) diff --git a/.env.example b/.env.example index 5c46c1e..bbe6060 100644 --- a/.env.example +++ b/.env.example @@ -35,6 +35,12 @@ GCP_SA_KEY_PATH=./pdf-api-invoker-key.json # GCS bucket for page images GCS_BUCKET_NAME=optical-pdf-images +# File retention +# Uploaded PDFs are deleted after RETENTION_HOURS (default 24h) +# Result/meta JSON files are kept for RESULTS_RETENTION_HOURS (default 720h = 30 days) +RETENTION_HOURS=24 +RESULTS_RETENTION_HOURS=720 + # Azure AD / MSAL Authentication AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385 AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef diff --git a/api.php b/api.php index 7b6964c..383d2e1 100644 --- a/api.php +++ b/api.php @@ -713,16 +713,19 @@ function handleList() { if ($job_user_id !== null) continue; } - // Enrich with result summary if available + // Enrich with result summary — prefer adjusted result if available $result_file = str_replace('.meta.json', '.result.json', $file); - if (file_exists($result_file)) { + $adjusted_file = str_replace('.meta.json', '.adjusted.json', $file); + $source_file = file_exists($adjusted_file) ? $adjusted_file : $result_file; + if (file_exists($source_file)) { $job_data['status'] = 'completed'; - $result = json_decode(file_get_contents($result_file), true); + $result = json_decode(file_get_contents($source_file), true); $job_data['score'] = $result['accessibility_score'] ?? ($result['score'] ?? null); $job_data['grade'] = $result['grade'] ?? null; $job_data['total_issues'] = $result['total_issues'] ?? null; $job_data['critical_count'] = $result['severity_counts']['critical'] ?? 0; $job_data['error_count'] = $result['severity_counts']['error'] ?? 0; + $job_data['score_adjusted'] = file_exists($adjusted_file); } $jobs[] = $job_data; @@ -1303,7 +1306,46 @@ function handleSaveAdjustedResult() { $result['score_breakdown']['penalty'] = $new_penalty; $result['score_breakdown']['adjusted'] = true; - // 3. Mark overridden checks in checks_performed + // 3. Recompute WCAG compliance badges based on non-dismissed CRITICAL/ERROR issues + $wcag_levels = [ + '1.1.1'=>'A','1.2.1'=>'A','1.2.2'=>'A','1.2.3'=>'A', + '1.2.4'=>'AA','1.2.5'=>'AA', + '1.3.1'=>'A','1.3.2'=>'A','1.3.3'=>'A', + '1.3.4'=>'AA','1.3.5'=>'AA', + '1.4.1'=>'A','1.4.2'=>'A', + '1.4.3'=>'AA','1.4.4'=>'AA','1.4.5'=>'AA', + '1.4.10'=>'AA','1.4.11'=>'AA','1.4.12'=>'AA','1.4.13'=>'AA', + '2.1.1'=>'A','2.1.2'=>'A','2.1.4'=>'A', + '2.2.1'=>'A','2.2.2'=>'A', + '2.3.1'=>'A', + '2.4.1'=>'A','2.4.2'=>'A','2.4.3'=>'A','2.4.4'=>'A', + '2.4.5'=>'AA','2.4.6'=>'AA','2.4.7'=>'AA', + '2.5.1'=>'A','2.5.2'=>'A','2.5.3'=>'A','2.5.4'=>'A', + '3.1.1'=>'A','3.1.2'=>'AA', + '3.2.1'=>'A','3.2.2'=>'A','3.2.3'=>'AA','3.2.4'=>'AA', + '3.3.1'=>'A','3.3.2'=>'A','3.3.3'=>'AA','3.3.4'=>'AA', + '4.1.1'=>'A','4.1.2'=>'A','4.1.3'=>'AA', + ]; + $failing_a = []; + $failing_aa = []; + if (isset($result['issues'])) { + foreach ($result['issues'] as $issue) { + if (!empty($issue['dismissed'])) continue; + $sev = strtoupper($issue['severity'] ?? ''); + if ($sev !== 'CRITICAL' && $sev !== 'ERROR') continue; + $crit = $issue['wcag_criterion'] ?? ''; + if (!$crit || !isset($wcag_levels[$crit])) continue; + $lvl = $wcag_levels[$crit]; + if ($lvl === 'A' && !in_array($crit, $failing_a)) $failing_a[] = $crit; + if ($lvl === 'AA' && !in_array($crit, $failing_aa)) $failing_aa[] = $crit; + } + } + $result['wcag_compliance']['level_a'] = empty($failing_a); + $result['wcag_compliance']['level_aa'] = empty($failing_a) && empty($failing_aa); + $result['wcag_compliance']['level_a_failures'] = $failing_a; + $result['wcag_compliance']['level_aa_failures'] = $failing_aa; + + // 4. Mark overridden checks in checks_performed if (!empty($overrides) && isset($result['checks_performed'])) { foreach ($result['checks_performed'] as &$check) { if (isset($overrides[$check['name']])) { @@ -1314,7 +1356,7 @@ function handleSaveAdjustedResult() { unset($check); } - // 4. Update Matterhorn checkpoints for H-type CPs linked to overridden checks + // 5. Update Matterhorn checkpoints for H-type CPs linked to overridden checks $check_to_cp = [ 'Color Contrast' => ['04'], 'Image Accessibility' => ['13'], diff --git a/cleanup.py b/cleanup.py index 33a9037..7368401 100644 --- a/cleanup.py +++ b/cleanup.py @@ -31,6 +31,7 @@ UPLOADS_DIR = Path(os.getenv('UPLOADS_DIR', '/opt/pdf-accessibility/uploads')) RESULTS_DIR = Path(os.getenv('RESULTS_DIR', '/opt/pdf-accessibility/results')) RATE_LIMIT_DIR = Path(os.getenv('RATE_LIMIT_DIR', '/opt/pdf-accessibility/rate_limits')) RETENTION_HOURS = int(os.getenv('RETENTION_HOURS', '24')) +RESULTS_RETENTION_HOURS = int(os.getenv('RESULTS_RETENTION_HOURS', '720')) # 30 days def get_age_hours(path: Path) -> float: @@ -38,11 +39,15 @@ def get_age_hours(path: Path) -> float: return (time.time() - path.stat().st_mtime) / 3600 -def cleanup_directory(directory: Path, patterns: list[str], dry_run: bool) -> tuple[int, int]: - """Delete files matching patterns older than RETENTION_HOURS. +def cleanup_directory(directory: Path, patterns: list[str], dry_run: bool, + retention_hours: int = None) -> tuple[int, int]: + """Delete files matching patterns older than retention_hours. Returns (files_deleted, bytes_freed). """ + if retention_hours is None: + retention_hours = RETENTION_HOURS + if not directory.exists(): logger.warning("Directory does not exist: %s", directory) return 0, 0 @@ -54,7 +59,7 @@ def cleanup_directory(directory: Path, patterns: list[str], dry_run: bool) -> tu for path in directory.glob(pattern): try: age = get_age_hours(path) - if age < RETENTION_HOURS: + if age < retention_hours: continue if path.is_dir(): @@ -100,19 +105,29 @@ def main(): if dry_run: logger.info("=== DRY RUN (pass --execute to delete) ===") - logger.info("Retention: %dh | Uploads: %s | Results: %s", - RETENTION_HOURS, UPLOADS_DIR, RESULTS_DIR) + logger.info("Retention: uploads=%dh, results=%dh | Uploads: %s | Results: %s", + RETENTION_HOURS, RESULTS_RETENTION_HOURS, UPLOADS_DIR, RESULTS_DIR) total_deleted = 0 total_freed = 0 - # Clean uploads (PDF files) - d, f = cleanup_directory(UPLOADS_DIR, ['*.pdf'], dry_run) + # Clean uploads (PDF files) — short retention (default 24h) + d, f = cleanup_directory(UPLOADS_DIR, ['*.pdf'], dry_run, RETENTION_HOURS) total_deleted += d total_freed += f - # Clean results (JSON, error logs — page images are on GCS with 7-day lifecycle) - d, f = cleanup_directory(RESULTS_DIR, ['*.result.json', '*.error.log', '*.meta.json'], dry_run) + # Clean error logs — short retention + d, f = cleanup_directory(RESULTS_DIR, ['*.error.log'], dry_run, RETENTION_HOURS) + total_deleted += d + total_freed += f + + # Clean result/meta/dismissed/overrides/adjusted JSONs — long retention (default 30 days) + d, f = cleanup_directory( + RESULTS_DIR, + ['*.result.json', '*.meta.json', '*.dismissed.json', '*.overrides.json', '*.adjusted.json'], + dry_run, + RESULTS_RETENTION_HOURS, + ) total_deleted += d total_freed += f diff --git a/css/styles.css b/css/styles.css index eefe490..297e895 100644 --- a/css/styles.css +++ b/css/styles.css @@ -1210,6 +1210,38 @@ h1::before { background: var(--error-bg); } +/* ── Issue Group Cards (table grouping) ── */ +.issue-group-card { + background: var(--surface); + border: 1px solid var(--border); + border-radius: 8px; + margin-bottom: 12px; + overflow: hidden; +} + +.issue-group-card.dismissed { + opacity: 0.5; +} + +.issue-group-header { + display: flex; + justify-content: space-between; + align-items: center; + padding: 10px 14px; + background: var(--surface-alt); + cursor: pointer; + user-select: none; +} + +.issue-group-header:hover { + background: var(--accent-subtle); +} + +.issue-group-details { + padding: 8px; + display: block; +} + .btn-undismiss { background: none; border: 1px solid var(--border); diff --git a/enterprise_pdf_checker.py b/enterprise_pdf_checker.py index 63fd4e7..555bcc9 100644 --- a/enterprise_pdf_checker.py +++ b/enterprise_pdf_checker.py @@ -936,6 +936,12 @@ class EnterprisePDFChecker: 4. Does it use color as the only means of conveying information? 5. Are there any accessibility concerns? 6. Quality rating (1-10) if this were to be used in a PDF +7. For images of people: describe their role, action, or function — not physical + appearance (race, ethnicity, age, gender, disability) unless directly relevant + to the image's informational purpose. A human reviewer will verify descriptions + of people. +8. If a brand name, logo, or product name is visible, use the specific brand name + in the alt text (e.g., "Scotch tape" not "adhesive tape", "Nike Air Max" not "sneakers"). Respond in JSON format: { @@ -946,7 +952,9 @@ Respond in JSON format: "color_only_info": true|false, "concerns": ["..."], "quality_rating": 1-10, - "recommendation": "..." + "recommendation": "...", + "contains_people": true|false, + "brands_detected": ["..."] }""" } ], @@ -1052,6 +1060,32 @@ Respond in JSON format: coordinates=coordinates ) + # Flag images containing people for human review + if analysis.get('contains_people'): + self.add_issue( + Severity.INFO, + "Images - People", + f"Page {page_num}, Image {img_num}: Image contains people — alt text description " + "should be verified by a human reviewer to ensure ethical and accurate representation.", + wcag_criterion="1.1.1", + recommendation="Review alt text to confirm it describes role/action rather than physical appearance.", + page_number=page_num, + coordinates=coordinates + ) + + # Note any detected brand names for reviewer awareness + brands = [b for b in analysis.get('brands_detected', []) if b] + if brands: + self.add_issue( + Severity.INFO, + "Images - Brands", + f"Page {page_num}, Image {img_num}: Brand name(s) detected: {', '.join(brands[:5])}. " + "Verify the alt text uses the specific brand name.", + wcag_criterion="1.1.1", + page_number=page_num, + coordinates=coordinates + ) + # Quality concerns — capped at 2 per image, downgraded to INFO # (these are advisory notes, not WCAG violations) concerns = analysis.get('concerns', []) diff --git a/history.html b/history.html index 3b5d3b0..0e1150f 100644 --- a/history.html +++ b/history.html @@ -49,10 +49,13 @@
'; } - // Document-wide issues + // Document-wide issues — group table issues by sub-type if (documentWide.length > 0) { + const tableIssues = documentWide.filter(i => i.category === 'Tables' && !i.page_number); + const otherIssues = documentWide.filter(i => !(i.category === 'Tables' && !i.page_number)); + + // Group table issues: scope warnings vs caption infos + const tableGroups = {}; + tableIssues.forEach(issue => { + const desc = issue.description || ''; + const key = desc.includes('scope') ? 'scope' + : desc.includes('Caption') ? 'caption' + : desc.includes('header') ? 'header' + : 'other'; + if (!tableGroups[key]) tableGroups[key] = []; + tableGroups[key].push(issue); + }); + + const groupLabels = { scope: 'Table Scope Issues', caption: 'Table Caption Issues', header: 'Table Header Issues', other: 'Table Issues' }; + const groupSeverity = { scope: 'WARNING', caption: 'INFO', header: 'ERROR', other: 'WARNING' }; + + let tableGroupHtml = ''; + Object.entries(tableGroups).forEach(([key, groupIssues]) => { + if (!groupIssues.length) return; + const groupIndices = groupIssues.map(i => allIssues.indexOf(i)); + const allDismissed = groupIndices.every(idx => dismissedIndices.has(idx)); + const label = groupLabels[key]; + const sev = groupSeverity[key]; + const groupId = `table-group-${key}`; + tableGroupHtml += ` +
+
+
+ ${sev} + ${label} (${groupIssues.length}) +
+
+ + +
+
+
+ ${groupIssues.map(i => createIssueCard(i, issueNumberMap.get(i), allIssues.indexOf(i))).join('')} +
+
`; + }); + + const visibleCount = otherIssues.length + Object.keys(tableGroups).length; html += `

- Document-Wide Issues (${documentWide.length}) + Document-Wide Issues (${visibleCount})

-
${documentWide.map(i => createIssueCard(i, issueNumberMap.get(i), allIssues.indexOf(i))).join('')}
+
+ ${tableGroupHtml} + ${otherIssues.map(i => createIssueCard(i, issueNumberMap.get(i), allIssues.indexOf(i))).join('')} +
`; } @@ -176,6 +245,25 @@ function togglePageSection(pageNum) { } } +function toggleGroupDetails(groupId) { + const section = document.getElementById(groupId); + const toggle = document.getElementById(`toggle-${groupId}`); + if (!section) return; + if (section.style.display === 'none') { + section.style.display = 'block'; + if (toggle) toggle.innerHTML = '▼'; + } else { + section.style.display = 'none'; + if (toggle) toggle.innerHTML = '▶'; + } +} + +function dismissIssueGroup(indices) { + indices.forEach(idx => { + if (!dismissedIndices.has(idx)) dismissIssue(idx); + }); +} + function scrollToPage(pageNum) { const el = document.getElementById(`page-${pageNum}`); if (el) { @@ -567,6 +655,25 @@ function recalculateScore() { updateStatsGrid(adj_crit, adj_err); updateBreakdownSummary(new_passed, bd.checks_total, new_base, new_penalty, new_score); + + // 6. Recompute WCAG compliance badges based on remaining non-dismissed issues + const failingA = [], failingAA = []; + allIssues.forEach((issue, idx) => { + if (dismissedIndices.has(idx)) return; + const sev = (issue.severity || '').toUpperCase(); + if (sev !== 'CRITICAL' && sev !== 'ERROR') return; + const crit = issue.wcag_criterion; + if (!crit) return; + const lvl = WCAG_LEVELS[crit]; + if (lvl === 'A' && !failingA.includes(crit)) failingA.push(crit); + if (lvl === 'AA' && !failingAA.includes(crit)) failingAA.push(crit); + }); + displayWcagCompliance({ + level_a: failingA.length === 0, + level_aa: failingA.length === 0 && failingAA.length === 0, + level_a_failures: failingA, + level_aa_failures: failingAA, + }); } function updateStatsGrid(adj_crit, adj_err) {