- Add current_version_embedding_status/embedded_count/term_count to GlossaryResponse
- Batch-fetch current versions in list endpoint (single extra query, not N queries)
- Add get_versions_by_ids() helper to glossary_service
- Fix GlossaryList.tsx: embeddingBadge('') → embeddingBadge(g) with real status + pct
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
142 lines
4 KiB
Python
142 lines
4 KiB
Python
from __future__ import annotations
|
|
|
|
from datetime import datetime
|
|
from enum import StrEnum
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class GlossarySource(StrEnum):
|
|
XLSX_UPLOAD = "xlsx_upload"
|
|
FRAZE_API = "fraze_api" # reserved for future FRAZE integration
|
|
|
|
|
|
class GlossaryStatus(StrEnum):
|
|
ACTIVE = "active"
|
|
ARCHIVED = "archived"
|
|
|
|
|
|
class EmbeddingStatus(StrEnum):
|
|
PENDING = "pending"
|
|
IN_PROGRESS = "in_progress"
|
|
DONE = "done"
|
|
FAILED = "failed"
|
|
|
|
|
|
class Glossary(BaseModel):
|
|
id: str | None = Field(None, alias="_id")
|
|
client_id: str
|
|
name: str
|
|
description: str | None = None
|
|
source_locale: str # BCP-47 source column, e.g. "en-GB"
|
|
source: GlossarySource = GlossarySource.XLSX_UPLOAD
|
|
status: GlossaryStatus = GlossaryStatus.ACTIVE
|
|
current_version_id: str | None = None
|
|
created_at: datetime = Field(default_factory=datetime.utcnow)
|
|
created_by: str # user_id
|
|
|
|
model_config = {"populate_by_name": True, "arbitrary_types_allowed": True}
|
|
|
|
|
|
class GlossaryVersion(BaseModel):
|
|
id: str | None = Field(None, alias="_id")
|
|
glossary_id: str
|
|
version_number: int
|
|
source_xlsx_gcs_path: str | None = None # GCS path to original file
|
|
term_count: int = 0
|
|
embedded_count: int = 0
|
|
embedding_status: EmbeddingStatus = EmbeddingStatus.PENDING
|
|
created_at: datetime = Field(default_factory=datetime.utcnow)
|
|
created_by: str
|
|
change_note: str | None = None
|
|
|
|
model_config = {"populate_by_name": True}
|
|
|
|
|
|
class GlossaryTerm(BaseModel):
|
|
"""One source term with its per-locale translations."""
|
|
id: str | None = Field(None, alias="_id")
|
|
glossary_id: str
|
|
version_id: str
|
|
cid: str | None = None # 3M Content ID from xlsx
|
|
tid: str | None = None # 3M Term ID from xlsx
|
|
source_term: str # canonical source text (whitespace-normalised)
|
|
source_term_lower: str # lowercase for case-insensitive index
|
|
translations: dict[str, str] = {} # {locale_code: translated_text}
|
|
embedding: list[float] | None = None # 768-dim Gemini embedding
|
|
|
|
model_config = {"populate_by_name": True}
|
|
|
|
|
|
# ── Schema models (API request/response) ──────────────────────────────────────
|
|
|
|
class GlossaryCreate(BaseModel):
|
|
name: str
|
|
description: str | None = None
|
|
source_locale: str
|
|
change_note: str | None = None
|
|
|
|
|
|
class GlossaryVersionCreate(BaseModel):
|
|
source_locale: str
|
|
change_note: str | None = None
|
|
|
|
|
|
class GlossaryResponse(BaseModel):
|
|
id: str
|
|
client_id: str
|
|
name: str
|
|
description: str | None = None
|
|
source_locale: str
|
|
source: GlossarySource
|
|
status: GlossaryStatus
|
|
current_version_id: str | None = None
|
|
current_version_embedding_status: EmbeddingStatus | None = None
|
|
current_version_embedded_count: int | None = None
|
|
current_version_term_count: int | None = None
|
|
created_at: datetime
|
|
created_by: str
|
|
|
|
|
|
class GlossaryVersionResponse(BaseModel):
|
|
id: str
|
|
glossary_id: str
|
|
version_number: int
|
|
term_count: int
|
|
embedded_count: int
|
|
embedding_status: EmbeddingStatus
|
|
created_at: datetime
|
|
created_by: str
|
|
change_note: str | None = None
|
|
|
|
|
|
class GlossaryDetailResponse(GlossaryResponse):
|
|
versions: list[GlossaryVersionResponse] = []
|
|
|
|
|
|
class GlossaryTermPreview(BaseModel):
|
|
"""Subset of GlossaryTerm for UI previews."""
|
|
source_term: str
|
|
translations: dict[str, str]
|
|
|
|
|
|
class MatchedTerm(BaseModel):
|
|
"""A term matched against VTT source text, with the target-locale translation."""
|
|
source_term: str
|
|
target_translation: str
|
|
match_kind: str # "exact" | "vector"
|
|
score: float # 1.0 for exact, cosine similarity for vector
|
|
|
|
|
|
def glossary_from_doc(doc: dict) -> Glossary:
|
|
doc = dict(doc)
|
|
if "_id" in doc:
|
|
doc["_id"] = str(doc["_id"])
|
|
return Glossary.model_validate(doc)
|
|
|
|
|
|
def glossary_version_from_doc(doc: dict) -> GlossaryVersion:
|
|
doc = dict(doc)
|
|
if "_id" in doc:
|
|
doc["_id"] = str(doc["_id"])
|
|
return GlossaryVersion.model_validate(doc)
|