216 lines
6.5 KiB
Python
216 lines
6.5 KiB
Python
"""VTT version control service — DB-backed snapshots with GCS storage."""
|
|
|
|
import difflib
|
|
from datetime import datetime
|
|
|
|
from motor.motor_asyncio import AsyncIOMotorDatabase
|
|
|
|
from ..core.config import settings
|
|
from ..core.logging import get_logger
|
|
from ..models.user import User
|
|
from ..models.vtt_version import (
|
|
DiffLine,
|
|
VttDiffResponse,
|
|
VttKind,
|
|
VttVersion,
|
|
VttVersionActor,
|
|
VttVersionListResponse,
|
|
VttVersionSummary,
|
|
)
|
|
from ..services.gcs import gcs_service
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
_COUNTER_COLLECTION = "vtt_version_counters"
|
|
_VERSION_COLLECTION = "vtt_versions"
|
|
|
|
|
|
def _snapshot_path(job_id: str, lang: str, kind: VttKind, version: int) -> str:
|
|
return f"{job_id}/{lang}/versions/{kind}/v{version}.vtt"
|
|
|
|
|
|
def _count_cues(content: str) -> int:
|
|
"""Count WebVTT cue blocks (lines containing ' --> ')."""
|
|
return sum(1 for line in content.splitlines() if " --> " in line)
|
|
|
|
|
|
async def _next_version(db: AsyncIOMotorDatabase, job_id: str, lang: str, kind: VttKind) -> int:
|
|
"""Atomically increment and return the next version number for this job/lang/kind."""
|
|
counter_id = f"{job_id}:{lang}:{kind}"
|
|
result = await db[_COUNTER_COLLECTION].find_one_and_update(
|
|
{"_id": counter_id},
|
|
{"$inc": {"count": 1}},
|
|
upsert=True,
|
|
return_document=True,
|
|
)
|
|
return result["count"]
|
|
|
|
|
|
async def create_version(
|
|
db: AsyncIOMotorDatabase,
|
|
job_id: str,
|
|
lang: str,
|
|
kind: VttKind,
|
|
content: str,
|
|
user: User,
|
|
note: str | None = None,
|
|
parent_version: int | None = None,
|
|
) -> VttVersionSummary:
|
|
"""Snapshot VTT content as a new immutable version. Returns summary (no content field)."""
|
|
version_num = await _next_version(db, job_id, lang, kind)
|
|
blob_path = _snapshot_path(job_id, lang, kind, version_num)
|
|
|
|
# Write snapshot to GCS (fire-and-forget errors — live file is source of truth)
|
|
try:
|
|
await gcs_service.upload_text_to_gcs(content, blob_path, "text/vtt")
|
|
except Exception as exc:
|
|
logger.warning(f"VTT version GCS upload failed (non-fatal): {exc}")
|
|
|
|
gcs_uri = f"gs://{settings.gcs_bucket}/{blob_path}"
|
|
actor = VttVersionActor(user_id=str(user.id), user_email=user.email)
|
|
doc = {
|
|
"job_id": job_id,
|
|
"lang": lang,
|
|
"kind": kind,
|
|
"version": version_num,
|
|
"content": content,
|
|
"gcs_uri": gcs_uri,
|
|
"created_at": datetime.utcnow(),
|
|
"created_by": actor.dict(),
|
|
"note": note,
|
|
"parent_version": parent_version,
|
|
"cue_count": _count_cues(content),
|
|
"byte_size": len(content.encode()),
|
|
}
|
|
result = await db[_VERSION_COLLECTION].insert_one(doc)
|
|
doc["_id"] = str(result.inserted_id)
|
|
return VttVersionSummary(**{**doc, "content": None})
|
|
|
|
|
|
async def list_versions(
|
|
db: AsyncIOMotorDatabase,
|
|
job_id: str,
|
|
lang: str,
|
|
kind: VttKind,
|
|
skip: int = 0,
|
|
limit: int = 50,
|
|
) -> VttVersionListResponse:
|
|
query = {"job_id": job_id, "lang": lang, "kind": kind}
|
|
total = await db[_VERSION_COLLECTION].count_documents(query)
|
|
cursor = (
|
|
db[_VERSION_COLLECTION]
|
|
.find(query, {"content": 0}) # exclude large content from list
|
|
.sort("version", -1)
|
|
.skip(skip)
|
|
.limit(limit)
|
|
)
|
|
docs = await cursor.to_list(length=limit)
|
|
versions = []
|
|
for d in docs:
|
|
d["_id"] = str(d["_id"])
|
|
versions.append(VttVersionSummary(**d))
|
|
return VttVersionListResponse(versions=versions, total=total)
|
|
|
|
|
|
async def get_version(
|
|
db: AsyncIOMotorDatabase,
|
|
job_id: str,
|
|
lang: str,
|
|
kind: VttKind,
|
|
version: int,
|
|
) -> VttVersion | None:
|
|
doc = await db[_VERSION_COLLECTION].find_one(
|
|
{"job_id": job_id, "lang": lang, "kind": kind, "version": version}
|
|
)
|
|
if not doc:
|
|
return None
|
|
doc["_id"] = str(doc["_id"])
|
|
return VttVersion(**doc)
|
|
|
|
|
|
async def restore_version(
|
|
db: AsyncIOMotorDatabase,
|
|
job_id: str,
|
|
lang: str,
|
|
kind: VttKind,
|
|
version: int,
|
|
user: User,
|
|
) -> VttVersionSummary | None:
|
|
"""Create a new version whose content is a copy of an older version (non-destructive)."""
|
|
src = await get_version(db, job_id, lang, kind, version)
|
|
if not src:
|
|
return None
|
|
note = f"Restored from v{version}"
|
|
return await create_version(
|
|
db, job_id, lang, kind, src.content, user,
|
|
note=note, parent_version=version,
|
|
)
|
|
|
|
|
|
def diff_versions(
|
|
job_id: str,
|
|
lang: str,
|
|
kind: VttKind,
|
|
from_version: VttVersion,
|
|
to_version: VttVersion,
|
|
) -> VttDiffResponse:
|
|
"""Line-level diff between two versions using difflib."""
|
|
old_lines = from_version.content.splitlines()
|
|
new_lines = to_version.content.splitlines()
|
|
|
|
diff_lines: list[DiffLine] = []
|
|
added = 0
|
|
removed = 0
|
|
|
|
old_no = 0
|
|
new_no = 0
|
|
matcher = difflib.SequenceMatcher(None, old_lines, new_lines, autojunk=False)
|
|
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
|
if tag == "equal":
|
|
for k in range(i2 - i1):
|
|
diff_lines.append(DiffLine(
|
|
type="unchanged",
|
|
content=old_lines[i1 + k],
|
|
line_no_old=old_no + k + 1,
|
|
line_no_new=new_no + k + 1,
|
|
))
|
|
old_no += i2 - i1
|
|
new_no += j2 - j1
|
|
elif tag in ("replace", "delete"):
|
|
for k in range(i2 - i1):
|
|
diff_lines.append(DiffLine(
|
|
type="removed",
|
|
content=old_lines[i1 + k],
|
|
line_no_old=old_no + k + 1,
|
|
))
|
|
removed += 1
|
|
old_no += i2 - i1
|
|
if tag == "replace":
|
|
for k in range(j2 - j1):
|
|
diff_lines.append(DiffLine(
|
|
type="added",
|
|
content=new_lines[j1 + k],
|
|
line_no_new=new_no + k + 1,
|
|
))
|
|
added += 1
|
|
new_no += j2 - j1
|
|
elif tag == "insert":
|
|
for k in range(j2 - j1):
|
|
diff_lines.append(DiffLine(
|
|
type="added",
|
|
content=new_lines[j1 + k],
|
|
line_no_new=new_no + k + 1,
|
|
))
|
|
added += 1
|
|
new_no += j2 - j1
|
|
|
|
return VttDiffResponse(
|
|
job_id=job_id,
|
|
lang=lang,
|
|
kind=kind,
|
|
from_version=from_version.version,
|
|
to_version=to_version.version,
|
|
lines=diff_lines,
|
|
added_count=added,
|
|
removed_count=removed,
|
|
)
|