diff --git a/.gitignore b/.gitignore index b3bd7ae..9bcec96 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,18 @@ htmlcov/ uploads/ results/ logs/ + +# Frontend (Next.js) +frontend/node_modules/ +frontend/.next/ +frontend/.env.local +frontend/out/ + +# Backend (uv / Python) +backend/.venv/ +backend/.env + +# Supabase local +supabase/.branches/ +supabase/.temp/ +frontend/tsconfig.tsbuildinfo diff --git a/CLAUDE.md b/CLAUDE.md index 4e3b1f0..173261d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,100 +1,81 @@ -# CLAUDE.md +# PDF Accessibility SaaS — Claude Code Briefing -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. +## What this is -## Project Overview +A SaaS product by **Aimpress** that checks PDF documents for WCAG 2.1 AA / PDF/UA-1 compliance. +Regulatory tailwind: EU Accessibility Act (June 2025) requires accessible documents from banks, e-commerce, e-learning, and government. -AI-powered PDF accessibility checker that validates documents against WCAG 2.1 Level A & AA standards. Combines traditional PDF analysis (pypdf, pdfplumber) with AI models (Anthropic Claude, Google Cloud Vision) for ~95% automated WCAG coverage. Branded for "Oliver" (Montserrat font, black/#FFC407 palette). +## Stack -## Commands +- **Backend:** FastAPI + Python (single language — PHP removed) +- **Frontend:** Next.js 15 + shadcn/ui + Tailwind CSS +- **Auth:** Supabase Auth (email + magic link) +- **DB:** PostgreSQL 16 + Alembic migrations + Row-Level Security +- **Queue:** Celery + Redis +- **Storage:** MinIO (S3-compatible, self-hosted) +- **Deploy:** Docker Compose + Caddy on homelab Proxmox VM +- **CI:** Forgejo Actions -### Testing -```bash -source venv/bin/activate -pytest tests/ -v # Run all tests (31 tests) -pytest tests/ --cov=. --cov-report=html # With coverage report -pytest tests/test_checker.py -v # Single test file -pytest tests/ -m "not integration" # Skip integration tests +## Core AI/Checking Engine (DO NOT MODIFY without strong reason) + +- `enterprise_pdf_checker.py` — 30+ WCAG checks, ~2000 lines, uses Claude Sonnet + Google Vision +- `pdf_remediation.py` — auto-fix engine +- `report_generator.py` — converts JSON results → HTML/PDF report + +These are the product's moat. All other files wrap them. + +## Project structure + +``` +PDF-accessibility-saas/ +├── backend/ # FastAPI app +│ ├── app/ +│ │ ├── config.py # pydantic-settings +│ │ ├── deps.py # Supabase JWT auth dependency +│ │ ├── db.py # SQLAlchemy async engine +│ │ ├── main.py # FastAPI app entry +│ │ ├── routers/ # jobs, auth, billing +│ │ ├── services/ # checker, storage, queue +│ │ └── models/ # job, workspace +│ ├── alembic/ # DB migrations +│ └── pyproject.toml # uv-based deps +│ +├── frontend/ # Next.js 15 +│ ├── app/ +│ │ ├── (marketing)/ # Landing, Pricing (public) +│ │ ├── (auth)/ # Login, Signup (Supabase) +│ │ └── (app)/ # Dashboard, Jobs, Settings (auth-gated) +│ └── lib/supabase/ # SSR client/server helpers +│ +├── enterprise_pdf_checker.py # Core WCAG engine (from Oliver, reused 1:1) +├── pdf_remediation.py # Auto-fix engine +├── report_generator.py # HTML/PDF report generator +├── docker-compose.yml # Dev: postgres, redis, minio, api, celery +├── docker-compose.prod.yml # Prod: + nextjs, caddy +├── Caddyfile # Auto-SSL for pdfaccess.ai-impress.com +└── .forgejo/workflows/ci.yml # CI: test → build → deploy ``` -### Running Locally -```bash -source venv/bin/activate -php -S localhost:8000 # Start PHP dev server -``` +## Conventions -### Docker -```bash -docker-compose up # Development stack -docker-compose -f docker-compose.prod.yml up -d # Production stack -docker-compose exec worker pytest tests/ -v # Tests in container -``` +- All env vars via `pydantic-settings` in `backend/app/config.py` +- Logging via `structlog` (JSON in prod, pretty in dev) +- HTTP clients via `httpx` (async) +- Migrations via `alembic` — never raw ALTER TABLE in code +- Auth: Supabase JWT verified in `backend/app/deps.py::get_current_user()` +- Every endpoint requires workspace isolation (workspace_id from JWT) +- RLS active on jobs, workspaces, workspace_members, usage_events -### CLI Usage -```bash -python enterprise_pdf_checker.py document.pdf --output report.json # Full check -python enterprise_pdf_checker.py document.pdf --quick # Skip AI checks -python pdf_remediation.py document.pdf --output fixed.pdf --all # Auto-remediate -``` +## Branding -## Architecture +- Primary color: `#6366F1` (indigo) +- Font: Inter +- Product name: "Aimpress PDF Accessibility" +- Tagline: "WCAG-compliant PDFs in 60 seconds" +- Domain: `pdfaccess.ai-impress.com` -### Three Interfaces -- **Web UI** (`index.html` + `js/` + `css/`) — vanilla JS, drag-drop upload, visual inspector -- **REST API** (`api.php`) — PHP endpoints: upload, check, status, result, remediate, download -- **CLI** (`enterprise_pdf_checker.py`) — direct Python execution +## Pricing -### Request Flow (Docker/Production) -1. `api.php` receives upload, validates via `auth.php`, saves to `uploads/` -2. Job pushed to Redis queue (`pdf:queue`) and tracked in PostgreSQL -3. `worker.py` daemon pops jobs, runs `EnterprisePDFChecker.check_all()` -4. Results written to `results/{job_id}.result.json`, DB updated -5. Client polls `api.php?action=status` then fetches results - -### Key Source Files -| File | Purpose | -|------|---------| -| `enterprise_pdf_checker.py` | Core engine — 30+ WCAG checks, AI image analysis, scoring | -| `api.php` | REST API — file handling, job queue integration, CORS | -| `auth.php` | Authentication — Bearer/X-API-Key, dev mode localhost bypass | -| `worker.py` | Background daemon — Redis queue consumer, graceful shutdown | -| `db_manager.py` | PostgreSQL ORM — jobs CRUD, audit logging | -| `redis_queue.py` | Redis operations — job queue, status tracking, rate limiting | -| `pdf_remediation.py` | Auto-fix — metadata, tagging, language tags | -| `retry_helper.py` | Exponential backoff for external API calls | -| `report_generator.py` | Result formatting and report generation | -| `logger_config.py` | Structured logging with rotation (10MB max) | -| `cleanup.py` | File retention cleanup (24h for uploads/results) | - -### Data Layer -- **PostgreSQL** — `jobs` table (status, score, grade, result JSON), `audit_log` table. Schema in `db/init.sql` -- **Redis** — Job queue (`pdf:queue`), status tracking (`pdf:status:*`), rate limiting (`pdf:rate:*`) - -### External APIs -- **Anthropic Claude 3.5 Sonnet** — alt text validation, image classification, text-in-images -- **Google Cloud Vision** — OCR, text detection -- **veraPDF** (optional) — PDF/UA-1 compliance validation - -### Frontend Structure -`js/app.js` (controller), `js/upload.js` (drag-drop), `js/api.js` (HTTP client), `js/results.js` (display), `js/page-viewer.js` (PDF inspector), `js/batch.js` (batch processing), `js/utils.js` (helpers) - -## Tech Stack -- **Backend**: Python 3.11 (processing), PHP 8.2 (API) -- **Frontend**: Vanilla HTML/CSS/JS -- **Database**: PostgreSQL 16, Redis 7 -- **Infrastructure**: Docker, Nginx/Apache, PHP-FPM -- **System deps**: Tesseract OCR, Poppler, Ghostscript - -## Configuration -Environment variables via `.env` (see `.env.example`). Key settings: -- `ANTHROPIC_API_KEY` / `GOOGLE_API_KEY` — AI API credentials -- `DEV_MODE=true` — bypasses auth for localhost requests -- `DB_HOST`, `DB_PORT`, `REDIS_HOST`, `REDIS_PORT` — infrastructure endpoints -- Production uses ports 1220 (Redis) and 1221 (PostgreSQL) to avoid host conflicts - -## Testing -- pytest with markers: `integration`, `slow`, `api` -- Config in `pytest.ini` -- Fixtures in `tests/conftest.py` -- Sample PDFs in `Test_files/` -- No linter currently configured +- Free: 5 PDF/month +- Pro ($29/mo): 100 PDF + auto-fix +- Business ($149/mo): unlimited + API + team diff --git a/backend/alembic/script.py.mako b/backend/alembic/script.py.mako new file mode 100644 index 0000000..fbc4b07 --- /dev/null +++ b/backend/alembic/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/frontend/app/(app)/jobs/[id]/page.tsx b/frontend/app/(app)/jobs/[id]/page.tsx new file mode 100644 index 0000000..48cdde3 --- /dev/null +++ b/frontend/app/(app)/jobs/[id]/page.tsx @@ -0,0 +1,264 @@ +"use client"; +import { useEffect, useState } from "react"; +import { useParams } from "next/navigation"; + +interface Issue { + severity: string; + category: string; + description: string; + recommendation: string; + wcag_criterion: string; + page_number?: number; +} + +interface JobResult { + id: string; + filename: string; + status: string; + accessibility_score: number | null; + result: { + accessibility_score: number; + severity_counts: Record; + wcag_compliance: { level_a: boolean; level_aa: boolean }; + issues: Issue[]; + next_steps: Array<{ priority: number; category: string; action: string; wcag: string }>; + score_breakdown?: { adjusted: boolean }; + stats?: { duration: number; api_calls: number; total_cost_estimate: number }; + } | null; + error_message: string | null; +} + +const SEV_COLOR: Record = { + CRITICAL: "bg-red-100 text-red-700 border-red-200", + ERROR: "bg-orange-100 text-orange-700 border-orange-200", + WARNING: "bg-yellow-100 text-yellow-700 border-yellow-200", + INFO: "bg-blue-100 text-blue-700 border-blue-200", +}; + +const SEV_ICON: Record = { + CRITICAL: "🚨", ERROR: "❌", WARNING: "⚠️", INFO: "ℹ️", +}; + +function ScoreRing({ score }: { score: number }) { + const color = score >= 80 ? "#10b981" : score >= 60 ? "#f59e0b" : "#ef4444"; + const grade = score >= 90 ? "A" : score >= 80 ? "B" : score >= 70 ? "C" : score >= 60 ? "D" : "F"; + return ( +
+
+ + + + +
+
{score}
+
/ 100
+
+
+
+
{grade}
+
Grade
+
+
+ ); +} + +export default function JobReportPage() { + const { id } = useParams<{ id: string }>(); + const [job, setJob] = useState(null); + const [loading, setLoading] = useState(true); + const [filter, setFilter] = useState("ALL"); + + useEffect(() => { + let interval: NodeJS.Timeout; + const poll = async () => { + const res = await fetch(`/api/v1/jobs/${id}`, { credentials: "include" }); + if (!res.ok) return; + const data = await res.json(); + setJob(data); + setLoading(false); + if (data.status === "completed" || data.status === "failed") { + clearInterval(interval); + } + }; + poll(); + interval = setInterval(poll, 3000); + return () => clearInterval(interval); + }, [id]); + + async function handleRemediate() { + await fetch(`/api/v1/jobs/${id}/remediate`, { method: "POST", credentials: "include" }); + } + + async function downloadReport(format: "html" | "json") { + const res = await fetch(`/api/v1/jobs/${id}?format=${format}`, { credentials: "include" }); + const blob = await res.blob(); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + a.download = `report-${id}.${format}`; + a.click(); + } + + if (loading) { + return ( +
+
+
⚙️
+

Loading report...

+
+
+ ); + } + + if (!job) return
Job not found
; + + if (job.status === "pending" || job.status === "processing") { + return ( +
+
🔍
+

Checking accessibility...

+

{job.filename}

+
+
+
+

Running 30+ WCAG checks · This takes 15–60 seconds

+
+ ); + } + + if (job.status === "failed") { + return ( +
+
+

Check failed

+

{job.error_message || "Unknown error"}

+
+ ); + } + + const r = job.result!; + const sc = r.severity_counts || {}; + const issues = (r.issues || []).filter((i) => filter === "ALL" || i.severity === filter); + + return ( +
+ {/* Header */} +
+
+

{job.filename}

+

WCAG 2.1 AA + PDF/UA-1 Accessibility Report

+
+
+ + + +
+
+ + {/* Score + WCAG compliance */} +
+
+ +
+ {Object.entries(sc).map(([sev, count]) => ( +
+
{count as number}
+
{sev.toLowerCase()}
+
+ ))} +
+
+ +
+

WCAG 2.1 Conformance

+ {[ + { label: "Level A", pass: r.wcag_compliance?.level_a }, + { label: "Level AA", pass: r.wcag_compliance?.level_aa }, + ].map((lvl) => ( +
+ WCAG 2.1 {lvl.label} + {lvl.pass ? "✓ Pass" : "✗ Fail"} +
+ ))} +
+
+ + {/* Issues */} +
+
+

Issues ({r.issues?.length || 0})

+
+ {["ALL", "CRITICAL", "ERROR", "WARNING", "INFO"].map((sev) => ( + + ))} +
+
+ + {issues.length === 0 ? ( +
No issues for this filter
+ ) : ( +
+ {issues.map((issue, i) => ( +
+
+ + {SEV_ICON[issue.severity]} {issue.severity} + +
+
+ {issue.category} + {issue.wcag_criterion && ( + {issue.wcag_criterion} + )} + {issue.page_number && ( + Page {issue.page_number} + )} +
+

{issue.description}

+ {issue.recommendation && ( +

{issue.recommendation}

+ )} +
+
+
+ ))} +
+ )} +
+ + {/* Next steps */} + {r.next_steps && r.next_steps.length > 0 && ( +
+
+

Recommended Next Steps

+
+
+ {r.next_steps.slice(0, 10).map((step, i) => ( +
+ {i + 1} +
+ {step.wcag} + {step.action} +
+
+ ))} +
+
+ )} +
+ ); +} diff --git a/frontend/lib/supabase/server.ts b/frontend/lib/supabase/server.ts index b12ec08..63605c9 100644 --- a/frontend/lib/supabase/server.ts +++ b/frontend/lib/supabase/server.ts @@ -1,4 +1,4 @@ -import { createServerClient } from "@supabase/ssr"; +import { createServerClient, type CookieOptions } from "@supabase/ssr"; import { cookies } from "next/headers"; export async function createClient() { @@ -9,7 +9,7 @@ export async function createClient() { { cookies: { getAll: () => cookieStore.getAll(), - setAll: (cookiesToSet) => { + setAll: (cookiesToSet: Array<{ name: string; value: string; options?: CookieOptions }>) => { cookiesToSet.forEach(({ name, value, options }) => cookieStore.set(name, value, options) ); diff --git a/frontend/postcss.config.js b/frontend/postcss.config.js new file mode 100644 index 0000000..12a703d --- /dev/null +++ b/frontend/postcss.config.js @@ -0,0 +1,6 @@ +module.exports = { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +};