video-accessibility/backend/app/api/v1/routes_glossaries.py
Vadym Samoilenko 4645e67611 fix(glossary-list): show real embedding progress in glossary list view
- Add current_version_embedding_status/embedded_count/term_count to GlossaryResponse
- Batch-fetch current versions in list endpoint (single extra query, not N queries)
- Add get_versions_by_ids() helper to glossary_service
- Fix GlossaryList.tsx: embeddingBadge('') → embeddingBadge(g) with real status + pct

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-13 19:00:56 +01:00

326 lines
13 KiB
Python

"""
Glossary management endpoints.
Access:
- All glossary mutations (upload, activate, archive) → Admin or PM of the client
- Glossary reads (list, detail, terms) → Admin, PM, or staff members
Routes are nested under /clients/{client_id}/glossaries to keep ownership clear.
"""
from __future__ import annotations
from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile
from ...core.authz import MembershipContext, assert_user_in_org, get_membership_context
from ...core.logging import get_logger
from ...models.audit_log import AuditAction
from ...models.glossary import (
GlossaryDetailResponse,
GlossaryResponse,
GlossaryVersionResponse,
)
from ...models.organization import OrgRole
from ...services import audit_logger as audit_svc
from ...services import glossary_service as svc
logger = get_logger(__name__)
router = APIRouter(
prefix="/clients/{client_id}/glossaries",
tags=["glossaries"],
)
_ALLOWED_CONTENT_TYPES = {
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-excel",
}
_MAX_FILE_SIZE_MB = 50
# ── List glossaries ───────────────────────────────────────────────────────────
@router.get("", response_model=list[GlossaryResponse])
async def list_glossaries(
client_id: str,
ctx: MembershipContext = Depends(get_membership_context),
):
"""List all active glossaries for a client."""
assert_user_in_org(ctx, client_id, OrgRole.VIEWER)
glossaries = await svc.get_glossaries_for_client(client_id)
version_map = await svc.get_versions_by_ids([g.current_version_id for g in glossaries if g.current_version_id])
return [_to_response(g, version_map.get(g.current_version_id)) for g in glossaries]
# ── Upload new glossary ───────────────────────────────────────────────────────
@router.post("", response_model=GlossaryDetailResponse, status_code=201)
async def upload_glossary(
client_id: str,
file: UploadFile = File(..., description="xlsx glossary file"),
name: str = Form(...),
source_locale: str = Form(..., description="BCP-47 source locale, e.g. en-GB"),
source_locale_col: str = Form(..., description="xlsx column header for the source language, e.g. en_gb"),
description: str | None = Form(None),
change_note: str | None = Form(None),
ctx: MembershipContext = Depends(get_membership_context),
):
"""Upload a new glossary xlsx file and associate it with a client."""
assert_user_in_org(ctx, client_id, OrgRole.MANAGER)
_validate_xlsx(file)
try:
glossary, version = await svc.ingest_glossary(
client_id=client_id,
name=name,
source_locale=source_locale,
source_locale_col=source_locale_col,
file=file,
user_id=str(ctx.user.id),
description=description,
change_note=change_note,
)
except ValueError as exc:
raise HTTPException(status_code=422, detail=str(exc)) from exc
await audit_svc.audit_logger.log_action(
action=AuditAction.GLOSSARY_UPLOAD,
description=f"Glossary '{name}' uploaded for client {client_id}",
user=ctx.user,
resource_type="glossary",
resource_id=glossary.id,
details={"term_count": version.term_count, "source_locale": source_locale},
)
versions = await svc.get_versions(glossary.id)
return _to_detail_response(glossary, versions)
# ── Get glossary detail ───────────────────────────────────────────────────────
@router.get("/{glossary_id}", response_model=GlossaryDetailResponse)
async def get_glossary(
client_id: str,
glossary_id: str,
ctx: MembershipContext = Depends(get_membership_context),
):
assert_user_in_org(ctx, client_id, OrgRole.VIEWER)
glossary = await svc.get_glossary(glossary_id)
if not glossary or glossary.client_id != client_id:
raise HTTPException(status_code=404, detail="Glossary not found")
versions = await svc.get_versions(glossary_id)
return _to_detail_response(glossary, versions)
# ── Browse terms ──────────────────────────────────────────────────────────────
@router.get("/{glossary_id}/terms")
async def list_terms(
client_id: str,
glossary_id: str,
version_id: str | None = Query(None, description="Specific version; defaults to active"),
search: str | None = Query(None),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
ctx: MembershipContext = Depends(get_membership_context),
):
assert_user_in_org(ctx, client_id, OrgRole.VIEWER)
glossary = await svc.get_glossary(glossary_id)
if not glossary or glossary.client_id != client_id:
raise HTTPException(status_code=404, detail="Glossary not found")
vid = version_id or glossary.current_version_id
if not vid:
return {"terms": [], "total": 0, "page": page, "page_size": page_size}
terms, total = await svc.get_terms_page(vid, search=search, page=page, page_size=page_size)
return {
"terms": [{"source_term": t["source_term"], "translations": t["translations"]} for t in terms],
"total": total,
"page": page,
"page_size": page_size,
}
# ── Upload new version ────────────────────────────────────────────────────────
@router.post("/{glossary_id}/versions", response_model=GlossaryVersionResponse, status_code=201)
async def upload_version(
client_id: str,
glossary_id: str,
file: UploadFile = File(...),
source_locale_col: str = Form(...),
change_note: str | None = Form(None),
ctx: MembershipContext = Depends(get_membership_context),
):
"""Upload a new xlsx file as a new version of an existing glossary."""
assert_user_in_org(ctx, client_id, OrgRole.MANAGER)
_validate_xlsx(file)
glossary = await svc.get_glossary(glossary_id)
if not glossary or glossary.client_id != client_id:
raise HTTPException(status_code=404, detail="Glossary not found")
try:
version = await svc.ingest_new_version(
glossary_id=glossary_id,
source_locale_col=source_locale_col,
file=file,
user_id=str(ctx.user.id),
change_note=change_note,
)
except ValueError as exc:
raise HTTPException(status_code=422, detail=str(exc)) from exc
await audit_svc.audit_logger.log_action(
action=AuditAction.GLOSSARY_VERSION_UPLOAD,
description=f"New glossary version uploaded for glossary {glossary_id}",
user=ctx.user,
resource_type="glossary_version",
resource_id=version.id,
details={"term_count": version.term_count, "version_number": version.version_number},
)
return _version_to_response(version)
# ── Activate a version ────────────────────────────────────────────────────────
@router.post("/{glossary_id}/activate")
async def activate_version(
client_id: str,
glossary_id: str,
version_id: str = Form(...),
ctx: MembershipContext = Depends(get_membership_context),
):
assert_user_in_org(ctx, client_id, OrgRole.MANAGER)
glossary = await svc.get_glossary(glossary_id)
if not glossary or glossary.client_id != client_id:
raise HTTPException(status_code=404, detail="Glossary not found")
try:
await svc.activate_version(glossary_id, version_id)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
await audit_svc.audit_logger.log_action(
action=AuditAction.GLOSSARY_ACTIVATE,
description=f"Glossary version {version_id} activated",
user=ctx.user,
resource_type="glossary",
resource_id=glossary_id,
details={"version_id": version_id},
)
return {"status": "ok", "active_version_id": version_id}
# ── Re-queue embedding ────────────────────────────────────────────────────────
@router.post("/{glossary_id}/versions/{version_id}/reembed", status_code=202)
async def reembed_version(
client_id: str,
glossary_id: str,
version_id: str,
ctx: MembershipContext = Depends(get_membership_context),
):
"""Re-queue the embedding task for a glossary version (resets failed/pending/stuck embeds)."""
assert_user_in_org(ctx, client_id, OrgRole.MANAGER)
glossary = await svc.get_glossary(glossary_id)
if not glossary or glossary.client_id != client_id:
raise HTTPException(status_code=404, detail="Glossary not found")
versions = await svc.get_versions(glossary_id)
version = next((v for v in versions if str(v.id) == version_id), None)
if not version:
raise HTTPException(status_code=404, detail="Version not found")
try:
import motor.motor_asyncio
from bson import ObjectId
from ...core.config import settings
from ...tasks.embed_glossary import embed_glossary_version_task
client_db = motor.motor_asyncio.AsyncIOMotorClient(settings.mongodb_uri)
db = client_db[settings.mongodb_db]
await db.glossary_versions.update_one(
{"_id": ObjectId(version_id)},
{"$set": {"embedding_status": "pending", "embedded_count": 0}},
)
client_db.close()
embed_glossary_version_task.delay(version_id)
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Failed to queue embedding: {exc}") from exc
return {"status": "queued", "version_id": version_id}
# ── Delete ───────────────────────────────────────────────────────────────────
@router.delete("/{glossary_id}", status_code=204)
async def archive_glossary(
client_id: str,
glossary_id: str,
ctx: MembershipContext = Depends(get_membership_context),
):
assert_user_in_org(ctx, client_id, OrgRole.ADMIN)
glossary = await svc.get_glossary(glossary_id)
if not glossary or glossary.client_id != client_id:
raise HTTPException(status_code=404, detail="Glossary not found")
await svc.archive_glossary(glossary_id)
await audit_svc.audit_logger.log_action(
action=AuditAction.GLOSSARY_ARCHIVE,
description=f"Glossary {glossary_id} archived",
user=ctx.user,
resource_type="glossary",
resource_id=glossary_id,
)
# ── Helpers ───────────────────────────────────────────────────────────────────
def _validate_xlsx(file: UploadFile) -> None:
if file.content_type not in _ALLOWED_CONTENT_TYPES and not (
file.filename and file.filename.endswith(".xlsx")
):
raise HTTPException(
status_code=422,
detail="Only .xlsx files are accepted",
)
def _to_response(g, current_version=None) -> GlossaryResponse:
return GlossaryResponse(
id=str(g.id),
client_id=g.client_id,
name=g.name,
description=g.description,
source_locale=g.source_locale,
source=g.source,
status=g.status,
current_version_id=g.current_version_id,
current_version_embedding_status=current_version.embedding_status if current_version else None,
current_version_embedded_count=current_version.embedded_count if current_version else None,
current_version_term_count=current_version.term_count if current_version else None,
created_at=g.created_at,
created_by=g.created_by,
)
def _version_to_response(v) -> GlossaryVersionResponse:
return GlossaryVersionResponse(
id=str(v.id),
glossary_id=v.glossary_id,
version_number=v.version_number,
term_count=v.term_count,
embedded_count=v.embedded_count,
embedding_status=v.embedding_status,
created_at=v.created_at,
created_by=v.created_by,
change_note=v.change_note,
)
def _to_detail_response(glossary, versions) -> GlossaryDetailResponse:
return GlossaryDetailResponse(
**_to_response(glossary).model_dump(),
versions=[_version_to_response(v) for v in versions],
)