hp-studios-ai-content-agent/backend/app/api/documents.py
DJP 72c8a0d0fe Initial import — HP Studios AI Content Agent
Full-stack app that turns HP customer briefs (master asset + regional
supporting docs) into a set of branded Word deliverables via a RAG +
agent pipeline.

Stack
- FastAPI + SQLAlchemy + pgvector + RQ (backend, Python 3.12)
- React + Vite + TypeScript + Tailwind + TanStack Query (frontend)
- Claude Opus 4.7 (generation) + Haiku 4.5 (translation/OCR)
- Voyage voyage-3 or OpenAI text-embedding-3-small (embeddings)
- python-docx (branded Word output, Montserrat + HP blue)
- Docker Compose (5 services)

Features
- 6 built-in deliverable types (leadership themes, regional enrichment,
  LinkedIn posts, webinar spec, infographic specs, ABM enablement)
- Data-driven deliverable types: admins add new types at runtime via
  prompt + JSON schema + template_json — no code, no deploy
- Generic schema-driven review form + generic Word template renderer
- Document ingestion pipeline with translation, chunking, pgvector RAG
- Pluggable auth provider (password now, Entra SSO later); admin/user roles
- Re-roll / retry on every deliverable; cascading delete; brief editing;
  inline document upload; progress hints; router-level ErrorBoundary
- Admin panel with test-render preview for new deliverable types
- Help page at /help with architecture overview and usage guide

82 backend tests passing, 18 skipped (gated live-API tests).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 17:11:25 -04:00

148 lines
4.6 KiB
Python

from __future__ import annotations
import os
import uuid
from pathlib import Path
from typing import Any, List, Optional
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
from pydantic import BaseModel
from sqlalchemy.orm import Session
from app.core.config import settings
from app.core.deps import get_current_user
from app.db.models import Brief, Document, User
from app.db.session import get_db
router = APIRouter(tags=["documents"])
# ---------------------------------------------------------------------------
# Schemas
# ---------------------------------------------------------------------------
class DocumentOut(BaseModel):
id: str
brief_id: str
kind: str
filename: str
storage_path: str
mime_type: Optional[str]
language: Optional[str]
page_count: Optional[int]
ingestion_status: str
model_config = {"from_attributes": True}
def _doc_out(d: Document) -> DocumentOut:
return DocumentOut(
id=str(d.id),
brief_id=str(d.brief_id),
kind=d.kind,
filename=d.filename,
storage_path=d.storage_path,
mime_type=d.mime_type,
language=d.language,
page_count=d.page_count,
ingestion_status=d.ingestion_status,
)
def _get_brief_or_403(brief_id: str, db: Session, current_user: User) -> Brief:
brief: Optional[Brief] = db.query(Brief).filter(Brief.id == uuid.UUID(brief_id)).first()
if brief is None:
raise HTTPException(status_code=404, detail="Brief not found")
if current_user.role != "admin" and str(brief.created_by) != str(current_user.id):
raise HTTPException(status_code=403, detail="Not authorised")
return brief
# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------
@router.post("/briefs/{brief_id}/documents", response_model=DocumentOut, status_code=status.HTTP_201_CREATED)
async def upload_document(
brief_id: str,
file: UploadFile = File(...),
kind: str = Form(...),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
) -> Any:
if kind not in ("master", "supporting"):
raise HTTPException(status_code=400, detail="kind must be 'master' or 'supporting'")
brief = _get_brief_or_403(brief_id, db, current_user)
# Build storage path: /app/data/uploads/<brief_id>/<uuid>_<filename>
upload_dir = Path(settings.UPLOAD_DIR) / str(brief.id)
upload_dir.mkdir(parents=True, exist_ok=True)
safe_filename = file.filename or "upload"
dest_filename = f"{uuid.uuid4()}_{safe_filename}"
dest_path = upload_dir / dest_filename
content = await file.read()
dest_path.write_bytes(content)
doc = Document(
brief_id=brief.id,
kind=kind,
filename=safe_filename,
storage_path=str(dest_path),
mime_type=file.content_type,
ingestion_status="pending",
)
db.add(doc)
db.commit()
# Enqueue ingestion task (lazy import to tolerate missing worker deps at boot)
try:
from app.workers.tasks import enqueue_ingest # noqa: PLC0415
enqueue_ingest(str(doc.id))
except Exception: # noqa: BLE001
pass # worker not available — ingestion will be triggered manually
return _doc_out(doc)
@router.get("/briefs/{brief_id}/documents", response_model=List[DocumentOut])
def list_documents(
brief_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
) -> Any:
brief = _get_brief_or_403(brief_id, db, current_user)
docs = (
db.query(Document)
.filter(Document.brief_id == brief.id)
.order_by(Document.uploaded_at)
.all()
)
return [_doc_out(d) for d in docs]
@router.delete("/documents/{document_id}", status_code=status.HTTP_204_NO_CONTENT)
def delete_document(
document_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
) -> None:
doc: Optional[Document] = db.query(Document).filter(Document.id == uuid.UUID(document_id)).first()
if doc is None:
raise HTTPException(status_code=404, detail="Document not found")
# Authorisation: check the parent brief
brief: Optional[Brief] = db.query(Brief).filter(Brief.id == doc.brief_id).first()
if brief and current_user.role != "admin" and str(brief.created_by) != str(current_user.id):
raise HTTPException(status_code=403, detail="Not authorised")
# Remove file from disk
try:
Path(doc.storage_path).unlink(missing_ok=True)
except Exception: # noqa: BLE001
pass
db.delete(doc)
db.commit()