Compare commits
No commits in common. "main" and "docs/en-first-help-content" have entirely different histories.
main
...
docs/en-fi
68 changed files with 581 additions and 1678 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -110,4 +110,3 @@ package-lock.json
|
|||
|
||||
# Test videos
|
||||
test-video.mp4
|
||||
.worktrees/
|
||||
|
|
|
|||
|
|
@ -253,7 +253,7 @@ async def update_user(
|
|||
action = AuditAction.USER_ROLE_CHANGE if user_update.role else AuditAction.USER_UPDATE
|
||||
await log_user_management(
|
||||
action, user_id, current_user, request,
|
||||
details=dict(user_update.dict(exclude_none=True).items()),
|
||||
details={k: v for k, v in user_update.dict(exclude_none=True).items()},
|
||||
)
|
||||
|
||||
return UserResponse(
|
||||
|
|
@ -439,7 +439,7 @@ async def detailed_health_check(
|
|||
try:
|
||||
from ...services.gcs import gcs_service
|
||||
# Simple check to see if bucket is accessible
|
||||
await gcs_service.file_exists("health_check_dummy") # This will return False but won't error if bucket accessible
|
||||
bucket_exists = await gcs_service.file_exists("health_check_dummy") # This will return False but won't error if bucket accessible
|
||||
health_status["components"]["gcs"] = {"status": "healthy"}
|
||||
except Exception as e:
|
||||
health_status["components"]["gcs"] = {"status": "unhealthy", "error": str(e)}
|
||||
|
|
|
|||
|
|
@ -143,13 +143,13 @@ async def microsoft_login(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"Microsoft authentication failed: {str(e)}",
|
||||
) from None
|
||||
)
|
||||
except MicrosoftAuthError as e:
|
||||
await log_auth_failure("microsoft-sso", request, f"MS auth service error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Microsoft authentication service error",
|
||||
) from None
|
||||
)
|
||||
|
||||
# Look up by Microsoft-derived ID first — handles email casing changes across logins
|
||||
ms_user_id = f"ms-{user_info.sub[:20]}"
|
||||
|
|
@ -287,7 +287,7 @@ async def refresh_token(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid refresh token",
|
||||
) from None
|
||||
)
|
||||
|
||||
|
||||
@router.post("/logout", response_model=LogoutResponse)
|
||||
|
|
|
|||
|
|
@ -1,13 +1,12 @@
|
|||
"""Job Brief CRUD endpoints."""
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
||||
from ...core.authz import MembershipContext, assert_user_in_org, get_membership_context
|
||||
from ...core.database import get_database
|
||||
from ...core.logging import get_logger
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.job_brief import (
|
||||
BriefStatus,
|
||||
JobBriefCreate,
|
||||
|
|
@ -15,7 +14,6 @@ from ...models.job_brief import (
|
|||
JobBriefUpdate,
|
||||
)
|
||||
from ...models.organization import OrgRole
|
||||
from ...services.audit_logger import audit_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
router = APIRouter(prefix="/briefs", tags=["briefs"])
|
||||
|
|
@ -63,7 +61,6 @@ async def list_briefs(
|
|||
@router.post("", response_model=JobBriefResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def create_brief(
|
||||
payload: JobBriefCreate,
|
||||
http_request: Request,
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -106,15 +103,6 @@ async def create_brief(
|
|||
"approved_by": None,
|
||||
}
|
||||
await db.job_briefs.insert_one(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.BRIEF_CREATE,
|
||||
description=f"Brief '{payload.title}' created",
|
||||
user=ctx.user,
|
||||
request=http_request,
|
||||
resource_type="brief",
|
||||
resource_id=str(doc["_id"]),
|
||||
details={"title": payload.title, "organization_id": org_id},
|
||||
)
|
||||
return _doc_to_response(doc)
|
||||
|
||||
|
||||
|
|
@ -135,7 +123,6 @@ async def get_brief(
|
|||
async def update_brief(
|
||||
brief_id: str,
|
||||
payload: JobBriefUpdate,
|
||||
http_request: Request,
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -163,22 +150,12 @@ async def update_brief(
|
|||
{"$set": updates},
|
||||
return_document=True,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.BRIEF_UPDATE,
|
||||
description=f"Brief '{brief_id}' updated",
|
||||
user=ctx.user,
|
||||
request=http_request,
|
||||
resource_type="brief",
|
||||
resource_id=brief_id,
|
||||
details={"fields_updated": list(updates.keys())},
|
||||
)
|
||||
return _doc_to_response(result)
|
||||
|
||||
|
||||
@router.post("/{brief_id}/submit", response_model=JobBriefResponse)
|
||||
async def submit_brief(
|
||||
brief_id: str,
|
||||
http_request: Request,
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -195,22 +172,12 @@ async def submit_brief(
|
|||
{"$set": {"status": BriefStatus.SUBMITTED.value, "submitted_at": now, "updated_at": now}},
|
||||
return_document=True,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.BRIEF_SUBMIT,
|
||||
description=f"Brief '{brief_id}' submitted for review",
|
||||
user=ctx.user,
|
||||
request=http_request,
|
||||
resource_type="brief",
|
||||
resource_id=brief_id,
|
||||
details={"organization_id": result.get("organization_id")},
|
||||
)
|
||||
return _doc_to_response(result)
|
||||
|
||||
|
||||
@router.post("/{brief_id}/approve", response_model=JobBriefResponse)
|
||||
async def approve_brief(
|
||||
brief_id: str,
|
||||
http_request: Request,
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -233,13 +200,4 @@ async def approve_brief(
|
|||
},
|
||||
return_document=True,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.BRIEF_APPROVE,
|
||||
description=f"Brief '{brief_id}' approved",
|
||||
user=ctx.user,
|
||||
request=http_request,
|
||||
resource_type="brief",
|
||||
resource_id=brief_id,
|
||||
details={"organization_id": result.get("organization_id")},
|
||||
)
|
||||
return _doc_to_response(result)
|
||||
|
|
|
|||
|
|
@ -12,13 +12,12 @@ Access rules:
|
|||
from datetime import UTC, datetime
|
||||
|
||||
from bson import ObjectId
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import get_current_user, require_roles
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.client import (
|
||||
Client,
|
||||
ClientCreate,
|
||||
|
|
@ -31,7 +30,6 @@ from ...models.client import (
|
|||
TeamUpdate,
|
||||
)
|
||||
from ...models.user import User, UserRole
|
||||
from ...services.audit_logger import audit_logger
|
||||
|
||||
router = APIRouter(prefix="/clients", tags=["clients"])
|
||||
|
||||
|
|
@ -123,7 +121,6 @@ async def list_clients(
|
|||
@router.post("", response_model=Client)
|
||||
async def create_client(
|
||||
body: ClientCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -140,18 +137,7 @@ async def create_client(
|
|||
"updated_at": now,
|
||||
})
|
||||
doc = await db.clients.find_one({"_id": client_id})
|
||||
client = _client_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_CREATE,
|
||||
description=f"Client '{client.name}' created",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=str(client.id),
|
||||
resource_name=client.name,
|
||||
details={"slug": client.slug},
|
||||
)
|
||||
return client
|
||||
return _client_from_doc(doc)
|
||||
|
||||
|
||||
@router.get("/{client_id}", response_model=Client)
|
||||
|
|
@ -172,12 +158,11 @@ async def get_client(
|
|||
async def update_client(
|
||||
client_id: str,
|
||||
body: ClientUpdate,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
update: dict = dict(body.model_dump(exclude_none=True).items())
|
||||
update: dict = {k: v for k, v in body.model_dump(exclude_none=True).items()}
|
||||
if not update:
|
||||
raise HTTPException(status_code=422, detail="No fields to update")
|
||||
if "slug" in update and await db.clients.find_one({"slug": update["slug"], "_id": {"$ne": client_id}}):
|
||||
|
|
@ -185,39 +170,17 @@ async def update_client(
|
|||
update["updated_at"] = _now()
|
||||
await db.clients.update_one({"_id": client_id}, {"$set": update})
|
||||
doc = await db.clients.find_one({"_id": client_id})
|
||||
client = _client_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_UPDATE,
|
||||
description=f"Client '{client.name}' updated",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client.name,
|
||||
details={"fields_updated": list(body.model_dump(exclude_none=True).keys())},
|
||||
)
|
||||
return client
|
||||
return _client_from_doc(doc)
|
||||
|
||||
|
||||
@router.delete("/{client_id}", status_code=204)
|
||||
async def deactivate_client(
|
||||
client_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
doc = await _get_client_or_404(client_id, db)
|
||||
await _get_client_or_404(client_id, db)
|
||||
await db.clients.update_one({"_id": client_id}, {"$set": {"is_active": False, "updated_at": _now()}})
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_DEACTIVATE,
|
||||
description=f"Client '{doc['name']}' deactivated",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=doc["name"],
|
||||
details={"was_active": doc.get("is_active", True)},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -232,11 +195,10 @@ class AssignPMRequest(BaseModel):
|
|||
async def assign_pm(
|
||||
client_id: str,
|
||||
body: AssignPMRequest,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _get_client_or_404(client_id, db)
|
||||
user_doc = await db.users.find_one({"_id": body.user_id})
|
||||
if not user_doc:
|
||||
raise HTTPException(status_code=404, detail="User not found")
|
||||
|
|
@ -247,28 +209,16 @@ async def assign_pm(
|
|||
"$set": {"role": UserRole.PROJECT_MANAGER.value, "updated_at": _now()},
|
||||
},
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_PM_ASSIGN,
|
||||
description=f"PM '{user_doc.get('email', body.user_id)}' assigned to client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"pm_user_id": body.user_id, "pm_email": user_doc.get("email")},
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{client_id}/pm/{user_id}", status_code=204)
|
||||
async def remove_pm(
|
||||
client_id: str,
|
||||
user_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
pm_doc = await db.users.find_one({"_id": user_id})
|
||||
await _get_client_or_404(client_id, db)
|
||||
await db.users.update_one(
|
||||
{"_id": user_id},
|
||||
{"$pull": {"pm_client_ids": client_id}, "$set": {"updated_at": _now()}},
|
||||
|
|
@ -280,16 +230,6 @@ async def remove_pm(
|
|||
{"_id": user_id},
|
||||
{"$set": {"role": UserRole.CLIENT.value, "updated_at": _now()}},
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_PM_REMOVE,
|
||||
description=f"PM '{pm_doc.get('email', user_id) if pm_doc else user_id}' removed from client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"pm_user_id": user_id, "pm_email": pm_doc.get("email") if pm_doc else None},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{client_id}/pm", response_model=list[dict])
|
||||
|
|
@ -326,11 +266,10 @@ async def list_teams(
|
|||
async def create_team(
|
||||
client_id: str,
|
||||
body: TeamCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
now = _now()
|
||||
team_id = str(ObjectId())
|
||||
|
|
@ -343,18 +282,7 @@ async def create_team(
|
|||
"updated_at": now,
|
||||
})
|
||||
doc = await db.teams.find_one({"_id": team_id})
|
||||
team = _team_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_TEAM_CREATE,
|
||||
description=f"Team '{team.name}' created for client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"team_id": team_id, "team_name": team.name},
|
||||
)
|
||||
return team
|
||||
return _team_from_doc(doc)
|
||||
|
||||
|
||||
@router.patch("/{client_id}/teams/{team_id}", response_model=Team)
|
||||
|
|
@ -362,55 +290,32 @@ async def update_team(
|
|||
client_id: str,
|
||||
team_id: str,
|
||||
body: TeamUpdate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
await _get_team_or_404(team_id, client_id, db)
|
||||
update = dict(body.model_dump(exclude_none=True).items())
|
||||
update = {k: v for k, v in body.model_dump(exclude_none=True).items()}
|
||||
if not update:
|
||||
raise HTTPException(status_code=422, detail="No fields to update")
|
||||
update["updated_at"] = _now()
|
||||
await db.teams.update_one({"_id": team_id}, {"$set": update})
|
||||
doc = await db.teams.find_one({"_id": team_id})
|
||||
team = _team_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_TEAM_UPDATE,
|
||||
description=f"Team '{team.name}' updated for client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"team_id": team_id, "team_name": team.name, "fields_updated": list(body.model_dump(exclude_none=True).keys())},
|
||||
)
|
||||
return team
|
||||
return _team_from_doc(doc)
|
||||
|
||||
|
||||
@router.delete("/{client_id}/teams/{team_id}", status_code=204)
|
||||
async def delete_team(
|
||||
client_id: str,
|
||||
team_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
team_doc = await _get_team_or_404(team_id, client_id, db)
|
||||
await _get_team_or_404(team_id, client_id, db)
|
||||
await db.teams.delete_one({"_id": team_id})
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_TEAM_DELETE,
|
||||
description=f"Team '{team_doc['name']}' deleted from client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"team_id": team_id, "team_name": team_doc["name"]},
|
||||
)
|
||||
|
||||
|
||||
# Team membership
|
||||
|
|
@ -424,15 +329,13 @@ async def add_team_member(
|
|||
client_id: str,
|
||||
team_id: str,
|
||||
body: AddMemberRequest,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
team_doc = await _get_team_or_404(team_id, client_id, db)
|
||||
member_doc = await db.users.find_one({"_id": body.user_id})
|
||||
if not member_doc:
|
||||
await _get_team_or_404(team_id, client_id, db)
|
||||
if not await db.users.find_one({"_id": body.user_id}):
|
||||
raise HTTPException(status_code=404, detail="User not found")
|
||||
# Write to both Team.member_user_ids (legacy) and Membership.team_ids (MT-17)
|
||||
await db.teams.update_one(
|
||||
|
|
@ -443,16 +346,6 @@ async def add_team_member(
|
|||
{"user_id": body.user_id, "organization_id": client_id},
|
||||
{"$addToSet": {"team_ids": team_id}},
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_TEAM_MEMBER_ADD,
|
||||
description=f"User '{member_doc.get('email', body.user_id)}' added to team '{team_doc['name']}' of client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"team_id": team_id, "team_name": team_doc["name"], "member_user_id": body.user_id, "member_email": member_doc.get("email")},
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{client_id}/teams/{team_id}/members/{user_id}", status_code=204)
|
||||
|
|
@ -460,14 +353,12 @@ async def remove_team_member(
|
|||
client_id: str,
|
||||
team_id: str,
|
||||
user_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
team_doc = await _get_team_or_404(team_id, client_id, db)
|
||||
member_doc = await db.users.find_one({"_id": user_id})
|
||||
await _get_team_or_404(team_id, client_id, db)
|
||||
await db.teams.update_one(
|
||||
{"_id": team_id},
|
||||
{"$pull": {"member_user_ids": user_id}, "$set": {"updated_at": _now()}},
|
||||
|
|
@ -476,16 +367,6 @@ async def remove_team_member(
|
|||
{"user_id": user_id, "organization_id": client_id},
|
||||
{"$pull": {"team_ids": team_id}},
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_TEAM_MEMBER_REMOVE,
|
||||
description=f"User '{member_doc.get('email', user_id) if member_doc else user_id}' removed from team '{team_doc['name']}' of client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"team_id": team_id, "team_name": team_doc["name"], "member_user_id": user_id, "member_email": member_doc.get("email") if member_doc else None},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -526,11 +407,10 @@ async def list_projects(
|
|||
async def create_project(
|
||||
client_id: str,
|
||||
body: ProjectCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_client_member(current_user, client_id, db)
|
||||
now = _now()
|
||||
project_id = str(ObjectId())
|
||||
|
|
@ -546,18 +426,7 @@ async def create_project(
|
|||
"updated_at": now,
|
||||
})
|
||||
doc = await db.projects.find_one({"_id": project_id})
|
||||
project = _project_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_PROJECT_CREATE,
|
||||
description=f"Project '{project.name}' created for client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"project_id": project_id, "project_name": project.name, "default_languages": body.default_languages},
|
||||
)
|
||||
return project
|
||||
return _project_from_doc(doc)
|
||||
|
||||
|
||||
@router.patch("/{client_id}/projects/{project_id}", response_model=Project)
|
||||
|
|
@ -565,58 +434,35 @@ async def update_project(
|
|||
client_id: str,
|
||||
project_id: str,
|
||||
body: ProjectUpdate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
await _get_project_or_404(project_id, client_id, db)
|
||||
update = dict(body.model_dump(exclude_none=True).items())
|
||||
update = {k: v for k, v in body.model_dump(exclude_none=True).items()}
|
||||
if not update:
|
||||
raise HTTPException(status_code=422, detail="No fields to update")
|
||||
update["updated_at"] = _now()
|
||||
await db.projects.update_one({"_id": project_id}, {"$set": update})
|
||||
doc = await db.projects.find_one({"_id": project_id})
|
||||
project = _project_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_PROJECT_UPDATE,
|
||||
description=f"Project '{project.name}' updated for client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"project_id": project_id, "project_name": project.name, "fields_updated": list(body.model_dump(exclude_none=True).keys())},
|
||||
)
|
||||
return project
|
||||
return _project_from_doc(doc)
|
||||
|
||||
|
||||
@router.delete("/{client_id}/projects/{project_id}", status_code=204)
|
||||
async def archive_project(
|
||||
client_id: str,
|
||||
project_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
project_doc = await _get_project_or_404(project_id, client_id, db)
|
||||
await _get_project_or_404(project_id, client_id, db)
|
||||
await db.projects.update_one(
|
||||
{"_id": project_id},
|
||||
{"$set": {"is_active": False, "updated_at": _now()}},
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_PROJECT_ARCHIVE,
|
||||
description=f"Project '{project_doc['name']}' archived for client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"project_id": project_id, "project_name": project_doc["name"]},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -62,4 +62,4 @@ async def get_signed_upload_url(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to generate signed upload URL: {str(e)}"
|
||||
) from None
|
||||
)
|
||||
|
|
|
|||
|
|
@ -47,8 +47,7 @@ async def list_glossaries(
|
|||
"""List all active glossaries for a client."""
|
||||
assert_user_in_org(ctx, client_id, OrgRole.VIEWER)
|
||||
glossaries = await svc.get_glossaries_for_client(client_id)
|
||||
version_map = await svc.get_versions_by_ids([g.current_version_id for g in glossaries if g.current_version_id])
|
||||
return [_to_response(g, version_map.get(g.current_version_id)) for g in glossaries]
|
||||
return [_to_response(g) for g in glossaries]
|
||||
|
||||
|
||||
# ── Upload new glossary ───────────────────────────────────────────────────────
|
||||
|
|
@ -253,7 +252,7 @@ async def reembed_version(
|
|||
return {"status": "queued", "version_id": version_id}
|
||||
|
||||
|
||||
# ── Delete ───────────────────────────────────────────────────────────────────
|
||||
# ── Archive (soft-delete) ─────────────────────────────────────────────────────
|
||||
|
||||
@router.delete("/{glossary_id}", status_code=204)
|
||||
async def archive_glossary(
|
||||
|
|
@ -287,7 +286,7 @@ def _validate_xlsx(file: UploadFile) -> None:
|
|||
)
|
||||
|
||||
|
||||
def _to_response(g, current_version=None) -> GlossaryResponse:
|
||||
def _to_response(g) -> GlossaryResponse:
|
||||
return GlossaryResponse(
|
||||
id=str(g.id),
|
||||
client_id=g.client_id,
|
||||
|
|
@ -297,9 +296,6 @@ def _to_response(g, current_version=None) -> GlossaryResponse:
|
|||
source=g.source,
|
||||
status=g.status,
|
||||
current_version_id=g.current_version_id,
|
||||
current_version_embedding_status=current_version.embedding_status if current_version else None,
|
||||
current_version_embedded_count=current_version.embedded_count if current_version else None,
|
||||
current_version_term_count=current_version.term_count if current_version else None,
|
||||
created_at=g.created_at,
|
||||
created_by=g.created_by,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ import re
|
|||
import secrets
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
||||
from ...core.authz import bump_user_membership_cache
|
||||
|
|
@ -27,7 +27,6 @@ from ...core.security import (
|
|||
create_refresh_token,
|
||||
get_password_hash,
|
||||
)
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.invitation import (
|
||||
InvitationAcceptRequest,
|
||||
InvitationCreate,
|
||||
|
|
@ -36,7 +35,6 @@ from ...models.invitation import (
|
|||
)
|
||||
from ...models.organization import OrgRole
|
||||
from ...models.user import AuthProvider, User, UserRole
|
||||
from ...services.audit_logger import audit_logger
|
||||
from ...services.emailer import email_service
|
||||
from ...services.membership_service import get_membership, upsert_membership
|
||||
|
||||
|
|
@ -105,7 +103,6 @@ org_router = APIRouter(prefix="/organizations", tags=["invitations"])
|
|||
async def create_invitation(
|
||||
org_id: str,
|
||||
body: InvitationCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -172,17 +169,7 @@ async def create_invitation(
|
|||
expires_at=expires_at,
|
||||
)
|
||||
|
||||
inv = _inv_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.INVITATION_CREATE,
|
||||
description=f"Invitation created for '{email_lower}' to organization '{org_id}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="invitation",
|
||||
resource_id=inv.id,
|
||||
details={"invited_email": email_lower, "org_id": org_id, "role": body.role_in_org},
|
||||
)
|
||||
return inv
|
||||
return _inv_from_doc(doc)
|
||||
|
||||
|
||||
@org_router.get("/{org_id}/invitations", response_model=list[InvitationResponse])
|
||||
|
|
@ -202,30 +189,16 @@ async def list_invitations(
|
|||
async def revoke_invitation(
|
||||
org_id: str,
|
||||
invitation_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _assert_org_admin(org_id, current_user, db)
|
||||
inv_doc = await db.invitations.find_one({"_id": invitation_id, "organization_id": org_id})
|
||||
result = await db.invitations.update_one(
|
||||
{"_id": invitation_id, "organization_id": org_id, "accepted_at": None, "revoked_at": None},
|
||||
{"$set": {"revoked_at": _now()}},
|
||||
)
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(status_code=404, detail="Invitation not found or already accepted/revoked")
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.INVITATION_REVOKE,
|
||||
description=f"Invitation '{invitation_id}' revoked in organization '{org_id}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="invitation",
|
||||
resource_id=invitation_id,
|
||||
details={
|
||||
"invited_email": inv_doc["email"] if inv_doc else None,
|
||||
"org_id": org_id,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -297,7 +270,6 @@ async def preview_invitation(
|
|||
@router.post("/invitations/accept")
|
||||
async def accept_invitation(
|
||||
body: InvitationAcceptRequest,
|
||||
request: Request,
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Accept an invitation. Creates user if needed, creates membership, returns tokens."""
|
||||
|
|
@ -387,16 +359,6 @@ async def accept_invitation(
|
|||
|
||||
org_name, org_slug = await _get_org_name(org_id, db)
|
||||
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.INVITATION_ACCEPT,
|
||||
description=f"Invitation accepted by '{email_lower}' for organization '{org_id}'",
|
||||
user=None,
|
||||
request=request,
|
||||
resource_type="invitation",
|
||||
resource_id=str(doc["_id"]),
|
||||
details={"invited_email": email_lower, "org_id": org_id},
|
||||
)
|
||||
|
||||
return {
|
||||
"access_token": access_token,
|
||||
"refresh_token": refresh_token,
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ async def complete_chunked_upload(
|
|||
outputs_data = json.loads(json.dumps(payload.requested_outputs))
|
||||
outputs = RequestedOutputs(**outputs_data)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=400, detail="Invalid requested_outputs format") from None
|
||||
raise HTTPException(status_code=400, detail="Invalid requested_outputs format")
|
||||
|
||||
organization_id: str | None = None
|
||||
brief_doc = None
|
||||
|
|
@ -196,7 +196,7 @@ async def complete_chunked_upload(
|
|||
logger.info("Dispatched ingest task for chunked-upload job %s", payload.job_id)
|
||||
except Exception as e:
|
||||
logger.error("Failed to dispatch ingest task for job %s: %s", payload.job_id, e)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start processing: {e}") from None
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start processing: {e}")
|
||||
|
||||
await log_job_action(
|
||||
AuditAction.JOB_CREATE, payload.job_id, current_user, request,
|
||||
|
|
@ -246,7 +246,7 @@ async def create_job(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Invalid requested_outputs format"
|
||||
) from None
|
||||
)
|
||||
|
||||
# Resolve brief if provided — overrides some fields and sets organization_id
|
||||
brief_doc = None
|
||||
|
|
@ -330,7 +330,7 @@ async def create_job(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to start processing: {e}",
|
||||
) from None
|
||||
)
|
||||
|
||||
await log_job_action(
|
||||
AuditAction.JOB_CREATE, job_id, current_user, request,
|
||||
|
|
@ -774,6 +774,7 @@ async def get_job(
|
|||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
job_doc = await get_job_or_403(job_id, ctx, db)
|
||||
current_user = ctx.user
|
||||
|
||||
# Check task status if task_id exists
|
||||
task_id = job_doc.get("task_id")
|
||||
|
|
@ -1369,21 +1370,20 @@ async def get_job_downloads(
|
|||
if _ap is not None and not _own and not (_jpid and _jpid in (_ap or [])):
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
|
||||
|
||||
# Block only the statuses where outputs can't possibly exist yet
|
||||
_no_output_statuses = {
|
||||
JobStatus.CREATED.value,
|
||||
JobStatus.INGESTING.value,
|
||||
JobStatus.AI_PROCESSING.value,
|
||||
}
|
||||
if job_doc["status"] in _no_output_statuses:
|
||||
# Allow downloads for jobs that have outputs available
|
||||
# (PENDING_QC, APPROVED_ENGLISH, TRANSLATING, COMPLETED, etc.)
|
||||
if job_doc["status"] in [JobStatus.CREATED.value, JobStatus.INGESTING.value, JobStatus.AI_PROCESSING.value]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Job is still being processed"
|
||||
)
|
||||
|
||||
# No outputs yet — return empty instead of 400 (e.g. failed jobs with partial state)
|
||||
# Check if job has outputs
|
||||
if not job_doc.get("outputs"):
|
||||
return JobDownloadsResponse(downloads={})
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="No outputs available for this job"
|
||||
)
|
||||
|
||||
# Generate signed URLs for all outputs
|
||||
downloads = {}
|
||||
|
|
@ -1585,7 +1585,7 @@ async def update_job_vtt_content(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"You are not assigned to language '{target_language}'"
|
||||
) from None
|
||||
)
|
||||
|
||||
outputs = job_doc.get("outputs", {})
|
||||
lang_output = outputs.get(target_language, {})
|
||||
|
|
@ -1617,9 +1617,8 @@ async def update_job_vtt_content(
|
|||
|
||||
# Validate and update captions VTT
|
||||
if request.captions_vtt: # treat empty string same as None — nothing to update
|
||||
# Auto-fix minor overlaps before validation (mirrors AI-generation pipeline)
|
||||
captions_vtt_fixed = VTTEditor.fix_overlapping_cues(request.captions_vtt)
|
||||
is_valid, errors = VTTEditor.validate_vtt(captions_vtt_fixed)
|
||||
# Validate VTT format
|
||||
is_valid, errors = VTTEditor.validate_vtt(request.captions_vtt)
|
||||
if not is_valid:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
|
|
@ -1628,21 +1627,20 @@ async def update_job_vtt_content(
|
|||
|
||||
# Snapshot before overwriting live file
|
||||
await vtt_versioning.create_version(
|
||||
db, job_id, target_language, "captions", captions_vtt_fixed, current_user,
|
||||
note=request.note,
|
||||
db, job_id, target_language, "captions", request.captions_vtt, current_user
|
||||
)
|
||||
|
||||
# Upload updated VTT
|
||||
new_captions_uri = await upload_vtt_to_gcs(
|
||||
captions_vtt_fixed,
|
||||
request.captions_vtt,
|
||||
f"{job_id}/{target_language}/captions.vtt"
|
||||
)
|
||||
lang_output["captions_vtt_gcs"] = new_captions_uri
|
||||
|
||||
# Validate and update audio description VTT
|
||||
if request.audio_description_vtt: # treat empty string same as None — nothing to update
|
||||
ad_vtt_fixed = VTTEditor.fix_overlapping_cues(request.audio_description_vtt)
|
||||
is_valid, errors = VTTEditor.validate_vtt(ad_vtt_fixed)
|
||||
# Validate VTT format
|
||||
is_valid, errors = VTTEditor.validate_vtt(request.audio_description_vtt)
|
||||
if not is_valid:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
|
|
@ -1667,7 +1665,7 @@ async def update_job_vtt_content(
|
|||
except Exception as _e:
|
||||
logger.warning(f"Could not read old AD VTT for diff: {_e}")
|
||||
|
||||
new_cues = [c["text"] for c in _parse_ad_cues_for_diff(ad_vtt_fixed)]
|
||||
new_cues = [c["text"] for c in _parse_ad_cues_for_diff(request.audio_description_vtt)]
|
||||
|
||||
# Queue TTS regeneration for any cue whose text changed or that is newly added
|
||||
edit_state = lang_output.get("accessible_video_edit_state") or {}
|
||||
|
|
@ -1714,13 +1712,12 @@ async def update_job_vtt_content(
|
|||
|
||||
# Snapshot before overwriting live file
|
||||
await vtt_versioning.create_version(
|
||||
db, job_id, target_language, "ad", ad_vtt_fixed, current_user,
|
||||
note=request.note,
|
||||
db, job_id, target_language, "ad", request.audio_description_vtt, current_user
|
||||
)
|
||||
|
||||
# Upload updated VTT
|
||||
new_ad_uri = await upload_vtt_to_gcs(
|
||||
ad_vtt_fixed,
|
||||
request.audio_description_vtt,
|
||||
f"{job_id}/{target_language}/ad.vtt"
|
||||
)
|
||||
lang_output["ad_vtt_gcs"] = new_ad_uri
|
||||
|
|
@ -1733,7 +1730,7 @@ async def update_job_vtt_content(
|
|||
generate_descriptive_transcript as _gen_transcript,
|
||||
)
|
||||
|
||||
captions_text = captions_vtt_fixed if request.captions_vtt else None
|
||||
captions_text = request.captions_vtt
|
||||
if not captions_text:
|
||||
cc_gcs = lang_output.get("captions_vtt_gcs")
|
||||
if cc_gcs:
|
||||
|
|
@ -1744,7 +1741,7 @@ async def update_job_vtt_content(
|
|||
gcs_service.executor, _cc_blob.download_as_text
|
||||
)
|
||||
|
||||
ad_text = ad_vtt_fixed if request.audio_description_vtt else None
|
||||
ad_text = request.audio_description_vtt
|
||||
if not ad_text:
|
||||
ad_gcs = lang_output.get("ad_vtt_gcs")
|
||||
if ad_gcs:
|
||||
|
|
@ -2025,7 +2022,7 @@ async def adjust_vtt_timing(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to adjust captions timing"
|
||||
) from None
|
||||
)
|
||||
|
||||
# Adjust audio description VTT if requested and exists
|
||||
if request.adjust_audio_description and "ad_vtt_gcs" in outputs:
|
||||
|
|
@ -2056,7 +2053,7 @@ async def adjust_vtt_timing(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to adjust audio description timing"
|
||||
) from None
|
||||
)
|
||||
|
||||
if not update_operations:
|
||||
raise HTTPException(
|
||||
|
|
@ -2197,7 +2194,7 @@ async def delete_job(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to delete job: {str(e)}"
|
||||
) from None
|
||||
)
|
||||
|
||||
|
||||
async def _delete_job_gcs_assets(job_id: str, job_doc: dict):
|
||||
|
|
@ -2323,7 +2320,7 @@ async def retry_tts(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to start TTS retry"
|
||||
) from None
|
||||
)
|
||||
|
||||
return JobResponse(
|
||||
id=str(result["_id"]),
|
||||
|
|
@ -2461,7 +2458,7 @@ async def retry_job(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to start retry task",
|
||||
) from None
|
||||
)
|
||||
|
||||
return JobResponse(
|
||||
id=str(result["_id"]),
|
||||
|
|
@ -2931,7 +2928,7 @@ async def trigger_accessible_video_rerender(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"You are not assigned to language '{language}'"
|
||||
) from None
|
||||
)
|
||||
|
||||
# Get edit state
|
||||
lang_output = job_doc.get("outputs", {}).get(language)
|
||||
|
|
@ -2951,6 +2948,7 @@ async def trigger_accessible_video_rerender(
|
|||
]
|
||||
|
||||
# Update job status to RENDERING_QC — conditional to prevent concurrent render races
|
||||
job_title = job_doc.get("title", "Untitled Job")
|
||||
transition_result = await db.jobs.update_one(
|
||||
{"_id": job_id, "status": JobStatus.PENDING_QC.value}, # Only transition from PENDING_QC
|
||||
{
|
||||
|
|
|
|||
|
|
@ -2,17 +2,15 @@
|
|||
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from fastapi import APIRouter, Depends, Query, Request
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import require_roles
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.job import LanguageQCComment, LanguageQCState
|
||||
from ...models.user import User, UserRole
|
||||
from ...services import language_qc as lqc
|
||||
from ...services.audit_logger import audit_logger
|
||||
|
||||
router = APIRouter(tags=["language-qc"])
|
||||
|
||||
|
|
@ -133,15 +131,6 @@ async def assign_language(
|
|||
db, job_id, lang, request.linguist_user_id, current_user,
|
||||
http_request=http_request, notes=request.notes, deadline=request.deadline,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_ASSIGN,
|
||||
description=f"Language '{lang}' assigned to linguist '{request.linguist_user_id}' for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "linguist_user_id": request.linguist_user_id},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -160,15 +149,6 @@ async def reassign_language(
|
|||
db, job_id, lang, request.linguist_user_id, current_user,
|
||||
http_request=http_request, notes=request.notes, deadline=request.deadline,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_REASSIGN,
|
||||
description=f"Language '{lang}' reassigned to linguist '{request.linguist_user_id}' for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "linguist_user_id": request.linguist_user_id},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -189,15 +169,6 @@ async def assign_reviewer(
|
|||
db, job_id, lang, request.reviewer_user_id, current_user,
|
||||
http_request=http_request, notes=request.notes, deadline=request.deadline,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_REVIEWER_ASSIGN,
|
||||
description=f"Reviewer '{request.reviewer_user_id}' assigned to language '{lang}' for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "reviewer_user_id": request.reviewer_user_id},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -216,15 +187,6 @@ async def reassign_reviewer(
|
|||
db, job_id, lang, request.reviewer_user_id, current_user,
|
||||
http_request=http_request, notes=request.notes, deadline=request.deadline,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_REVIEWER_REASSIGN,
|
||||
description=f"Reviewer reassigned to '{request.reviewer_user_id}' for language '{lang}', job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "reviewer_user_id": request.reviewer_user_id},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -243,6 +205,7 @@ async def bulk_assign_languages(
|
|||
"""Assign one linguist (and optionally one reviewer) to multiple languages in one call."""
|
||||
job_doc = await db["jobs"].find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
from fastapi import HTTPException
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
available = list((job_doc.get("outputs") or {}).keys())
|
||||
|
|
@ -286,21 +249,6 @@ async def bulk_assign_languages(
|
|||
|
||||
assigned.append(lang)
|
||||
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_BULK_ASSIGN,
|
||||
description=f"Bulk assignment for job {job_id}: {len(assigned)} language(s) assigned to linguist '{request.linguist_user_id}'",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={
|
||||
"languages": assigned,
|
||||
"linguist_user_id": request.linguist_user_id,
|
||||
"reviewer_user_id": request.reviewer_user_id,
|
||||
"skipped": skipped,
|
||||
"errors": errors,
|
||||
},
|
||||
)
|
||||
return BulkAssignResponse(assigned=assigned, skipped=skipped, errors=errors)
|
||||
|
||||
|
||||
|
|
@ -318,15 +266,6 @@ async def start_linguist_work(
|
|||
):
|
||||
"""Linguist opens the language — pending → in_progress."""
|
||||
state = await lqc.start_linguist_work(db, job_id, lang, current_user)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_START_WORK,
|
||||
description=f"Linguist started work on language '{lang}' for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -342,15 +281,6 @@ async def submit_for_review(
|
|||
):
|
||||
"""Linguist submits — in_progress → pending_review. Notifies reviewer by email."""
|
||||
state = await lqc.submit_for_review(db, job_id, lang, current_user, http_request=http_request)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_SUBMIT,
|
||||
description=f"Language '{lang}' submitted for review for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -366,15 +296,6 @@ async def open_review(
|
|||
):
|
||||
"""Reviewer opens the review — pending_review → in_review."""
|
||||
state = await lqc.open_review(db, job_id, lang, current_user, http_request=http_request)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_OPEN_REVIEW,
|
||||
description=f"Reviewer opened review for language '{lang}', job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -394,15 +315,6 @@ async def approve_language(
|
|||
state = await lqc.approve_language(
|
||||
db, job_id, lang, current_user, http_request=http_request, notes=request.notes,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_APPROVE,
|
||||
description=f"Language '{lang}' approved for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "notes": request.notes},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -420,15 +332,6 @@ async def reject_language(
|
|||
state = await lqc.reject_language(
|
||||
db, job_id, lang, current_user, request.notes, category=request.category, http_request=http_request,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_REJECT,
|
||||
description=f"Language '{lang}' rejected for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "notes": request.notes, "category": request.category},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -441,7 +344,6 @@ async def mark_cue_reviewed(
|
|||
job_id: str,
|
||||
lang: str,
|
||||
request: MarkCueReviewedRequest,
|
||||
http_request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -450,6 +352,10 @@ async def mark_cue_reviewed(
|
|||
if not job_doc:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
update: dict = {
|
||||
f"language_qc.{lang}.reviewed_cues": 1, # will use $inc below
|
||||
"updated_at": datetime.utcnow(),
|
||||
}
|
||||
inc_op: dict = {f"language_qc.{lang}.reviewed_cues": 1}
|
||||
set_op: dict = {"updated_at": datetime.utcnow()}
|
||||
|
||||
|
|
@ -477,15 +383,6 @@ async def reopen_language(
|
|||
state = await lqc.reopen_language(
|
||||
db, job_id, lang, current_user, http_request=http_request, notes=request.notes,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_REOPEN,
|
||||
description=f"Language '{lang}' reopened for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "notes": request.notes},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -506,15 +403,6 @@ async def add_comment(
|
|||
comment = await lqc.add_comment(
|
||||
db, job_id, lang, current_user, request.body, http_request=http_request,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_COMMENT,
|
||||
description=f"Comment added to language '{lang}' for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "comment_id": str(comment.id) if hasattr(comment, "id") else None},
|
||||
)
|
||||
return comment
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -14,14 +14,13 @@ endpoints coexist without data duplication.
|
|||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ...core.authz import bump_user_membership_cache
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import get_current_user, require_roles
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.membership import MemberDetail, MembershipCreate, MembershipUpdate
|
||||
from ...models.organization import (
|
||||
Organization,
|
||||
|
|
@ -30,7 +29,6 @@ from ...models.organization import (
|
|||
OrgRole,
|
||||
)
|
||||
from ...models.user import User, UserRole
|
||||
from ...services.audit_logger import audit_logger
|
||||
from ...services.membership_service import (
|
||||
get_membership,
|
||||
get_memberships_for_user,
|
||||
|
|
@ -121,7 +119,6 @@ class _OrgCreate(BaseModel):
|
|||
@router.post("", response_model=Organization, status_code=201)
|
||||
async def create_organization(
|
||||
body: OrganizationCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -140,25 +137,13 @@ async def create_organization(
|
|||
"updated_at": now,
|
||||
}
|
||||
await db.clients.insert_one(doc)
|
||||
org = _org_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.ORG_CREATE,
|
||||
description=f"Organization '{org.name}' created",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="organization",
|
||||
resource_id=str(org.id),
|
||||
resource_name=org.name,
|
||||
details={"slug": org.slug},
|
||||
)
|
||||
return org
|
||||
return _org_from_doc(doc)
|
||||
|
||||
|
||||
@router.patch("/{org_id}", response_model=Organization)
|
||||
async def update_organization(
|
||||
org_id: str,
|
||||
body: OrganizationUpdate,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -175,18 +160,7 @@ async def update_organization(
|
|||
|
||||
await db.clients.update_one({"_id": org_id}, {"$set": updates})
|
||||
updated = {**doc, **updates}
|
||||
org = _org_from_doc(updated)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.ORG_UPDATE,
|
||||
description=f"Organization '{org.name}' updated",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="organization",
|
||||
resource_id=str(org.id),
|
||||
resource_name=org.name,
|
||||
details={k: v for k, v in updates.items() if k != "updated_at"},
|
||||
)
|
||||
return org
|
||||
return _org_from_doc(updated)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -208,7 +182,6 @@ async def list_members(
|
|||
async def add_member(
|
||||
org_id: str,
|
||||
body: MembershipCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -224,15 +197,6 @@ async def add_member(
|
|||
members = await list_org_members(org_id, db)
|
||||
for m in members:
|
||||
if m.user_id == body.user_id:
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.ORG_MEMBER_ADD,
|
||||
description=f"Member '{body.user_id}' added to organization '{org_id}' with role '{body.role_in_org}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="organization",
|
||||
resource_id=org_id,
|
||||
details={"user_id": body.user_id, "role": body.role_in_org},
|
||||
)
|
||||
return m
|
||||
raise HTTPException(status_code=500, detail="Membership created but could not be retrieved")
|
||||
|
||||
|
|
@ -242,7 +206,6 @@ async def update_member(
|
|||
org_id: str,
|
||||
user_id: str,
|
||||
body: MembershipUpdate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -259,15 +222,6 @@ async def update_member(
|
|||
members = await list_org_members(org_id, db)
|
||||
for m in members:
|
||||
if m.user_id == user_id:
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.ORG_MEMBER_UPDATE,
|
||||
description=f"Member '{user_id}' role updated in organization '{org_id}' to '{body.role_in_org}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="organization",
|
||||
resource_id=org_id,
|
||||
details={"user_id": user_id, "role": body.role_in_org},
|
||||
)
|
||||
return m
|
||||
raise HTTPException(status_code=500, detail="Could not retrieve updated membership")
|
||||
|
||||
|
|
@ -276,7 +230,6 @@ async def update_member(
|
|||
async def remove_member(
|
||||
org_id: str,
|
||||
user_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -290,15 +243,6 @@ async def remove_member(
|
|||
|
||||
await remove_membership(user_id, org_id, db)
|
||||
await bump_user_membership_cache(user_id)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.ORG_MEMBER_REMOVE,
|
||||
description=f"Member '{user_id}' removed from organization '{org_id}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="organization",
|
||||
resource_id=org_id,
|
||||
details={"user_id": user_id, "role": existing.role_in_org},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -4,17 +4,15 @@ import secrets
|
|||
from datetime import datetime, timedelta
|
||||
from typing import Literal
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ...core.config import settings
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import require_roles
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.share_token import ShareTokenResponse
|
||||
from ...models.user import User, UserRole
|
||||
from ...services.audit_logger import audit_logger
|
||||
from ...services.gcs import get_signed_download_url
|
||||
|
||||
router = APIRouter(tags=["share"])
|
||||
|
|
@ -71,7 +69,6 @@ class ClientDecisionResponse(BaseModel):
|
|||
async def create_share_token(
|
||||
job_id: str,
|
||||
request: CreateShareTokenRequest,
|
||||
http_request: Request,
|
||||
current_user: User = Depends(require_roles(
|
||||
UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
)),
|
||||
|
|
@ -98,15 +95,6 @@ async def create_share_token(
|
|||
"label": request.label,
|
||||
}
|
||||
await db[_TOKENS].insert_one(token_doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.SHARE_TOKEN_CREATE,
|
||||
description=f"Share token created for job '{job_id}'",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"token_id": token_id, "label": request.label, "expires_in_days": request.expires_in_days},
|
||||
)
|
||||
|
||||
return ShareTokenResponse(
|
||||
id=token_id,
|
||||
|
|
@ -153,7 +141,6 @@ async def list_share_tokens(
|
|||
async def revoke_share_token(
|
||||
job_id: str,
|
||||
token_id: str,
|
||||
http_request: Request,
|
||||
current_user: User = Depends(require_roles(
|
||||
UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
)),
|
||||
|
|
@ -166,15 +153,6 @@ async def revoke_share_token(
|
|||
)
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(status_code=404, detail="Token not found")
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.SHARE_TOKEN_REVOKE,
|
||||
description=f"Share token '{token_id}' revoked for job '{job_id}'",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"token_id": token_id},
|
||||
)
|
||||
|
||||
|
||||
# ── Public route (no auth) ────────────────────────────────────────────────────
|
||||
|
|
@ -249,7 +227,6 @@ async def get_public_job_preview(
|
|||
async def client_decision(
|
||||
token: str,
|
||||
request: ClientDecisionRequest,
|
||||
http_request: Request,
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Submit client approval or rejection via a share link. No authentication required."""
|
||||
|
|
@ -328,22 +305,6 @@ async def client_decision(
|
|||
detail="Decision could not be submitted — the job status may have changed"
|
||||
)
|
||||
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.SHARE_CLIENT_DECISION,
|
||||
description=f"Client '{request.client_name or 'anonymous'}' submitted decision '{request.action}' for job '{job_id}' via share token",
|
||||
user=None,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={
|
||||
"action": request.action,
|
||||
"token": token,
|
||||
"client_name": request.client_name,
|
||||
"new_status": new_status,
|
||||
"notes": request.notes,
|
||||
},
|
||||
)
|
||||
|
||||
if request.action == "approve":
|
||||
try:
|
||||
from ...tasks.notify import notify_client_task
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ async def restore_vtt_version(
|
|||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Version snapshot created (v{new_ver.version}) but live file update failed: {exc}",
|
||||
) from None
|
||||
)
|
||||
|
||||
# Update the GCS URI pointer in the job document
|
||||
gcs_uri_key = "captions_vtt_gcs" if kind == "captions" else "ad_vtt_gcs"
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ async def _cached_memberships(
|
|||
"""Load memberships, with Redis cache (60s TTL)."""
|
||||
cache_key = f"mem:user:{user_id}"
|
||||
try:
|
||||
redis = await get_redis()
|
||||
redis = get_redis()
|
||||
if redis:
|
||||
cached = await redis.get(cache_key)
|
||||
if cached:
|
||||
|
|
@ -77,7 +77,7 @@ async def _cached_memberships(
|
|||
memberships = await _load_memberships(user_id, db)
|
||||
|
||||
try:
|
||||
redis = await get_redis()
|
||||
redis = get_redis()
|
||||
if redis:
|
||||
await redis.setex(
|
||||
cache_key,
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ class Settings(BaseSettings):
|
|||
|
||||
# GCP
|
||||
gcp_project_id: str
|
||||
gcp_location: str = "us-central1"
|
||||
gcs_bucket: str = "accessible-video"
|
||||
google_application_credentials: str = ""
|
||||
|
||||
|
|
@ -223,8 +222,8 @@ class Settings(BaseSettings):
|
|||
|
||||
# Gemini TTS Model Options
|
||||
gemini_tts_models: dict[str, str] = {
|
||||
"flash": "gemini-3.1-flash-tts-preview", # Fast, cost-efficient (Preview)
|
||||
"pro": "gemini-2.5-pro-tts", # Higher quality (GA)
|
||||
"flash": "gemini-3.1-flash-tts-preview", # Fast, cost-efficient
|
||||
"pro": "gemini-2.5-pro-preview-tts", # Higher quality
|
||||
}
|
||||
|
||||
# Gemini TTS Style Presets - prompts prepended to text for style control
|
||||
|
|
|
|||
|
|
@ -58,4 +58,4 @@ def decode_token(token: str) -> dict[str, Any]:
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Could not validate credentials",
|
||||
) from None
|
||||
)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ class VTTCue:
|
|||
end_time: float # seconds
|
||||
text: str
|
||||
identifier: str | None = None
|
||||
settings: str = ""
|
||||
|
||||
|
||||
class VTTParser:
|
||||
|
|
@ -38,11 +37,10 @@ class VTTParser:
|
|||
|
||||
# Parse timing line
|
||||
if " --> " in line:
|
||||
timing_match = re.match(r'([\d:.,]+)\s+-->\s+([\d:.,]+)\s*(.*)', line)
|
||||
timing_match = re.match(r'([\d:.,]+)\s+-->\s+([\d:.,]+)', line)
|
||||
if timing_match:
|
||||
start_time = VTTParser._parse_timestamp(timing_match.group(1))
|
||||
end_time = VTTParser._parse_timestamp(timing_match.group(2))
|
||||
settings = timing_match.group(3).strip()
|
||||
|
||||
# Collect text lines until empty line or next cue
|
||||
i += 1
|
||||
|
|
@ -55,8 +53,7 @@ class VTTParser:
|
|||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
text="\n".join(text_lines),
|
||||
identifier=identifier,
|
||||
settings=settings,
|
||||
identifier=identifier
|
||||
))
|
||||
else:
|
||||
i += 1
|
||||
|
|
@ -73,13 +70,10 @@ class VTTParser:
|
|||
if cue.identifier:
|
||||
lines.append(cue.identifier)
|
||||
|
||||
# Add timing line (preserve cue settings like line:0%)
|
||||
# Add timing line
|
||||
start_timestamp = VTTParser._format_timestamp(cue.start_time)
|
||||
end_timestamp = VTTParser._format_timestamp(cue.end_time)
|
||||
timing_line = f"{start_timestamp} --> {end_timestamp}"
|
||||
if cue.settings:
|
||||
timing_line += f" {cue.settings}"
|
||||
lines.append(timing_line)
|
||||
lines.append(f"{start_timestamp} --> {end_timestamp}")
|
||||
|
||||
# Add text (can be multi-line)
|
||||
lines.append(cue.text)
|
||||
|
|
@ -162,11 +156,11 @@ class VTTEditor:
|
|||
raise ValueError(
|
||||
f"Cue count mismatch for {lang}: EN has {len(en_cues)}, target has {len(tgt_cues)}"
|
||||
)
|
||||
for i, (en, tgt) in enumerate(zip(en_cues, tgt_cues, strict=True)):
|
||||
if en.start_time != tgt.start_time or en.end_time != tgt.end_time:
|
||||
for i, (en, tgt) in enumerate(zip(en_cues, tgt_cues)):
|
||||
if en.start != tgt.start or en.end != tgt.end:
|
||||
raise ValueError(
|
||||
f"Timestamp mismatch for {lang} cue {i}: "
|
||||
f"EN {en.start_time}-->{en.end_time}, target {tgt.start_time}-->{tgt.end_time}"
|
||||
f"EN {en.start}-->{en.end}, target {tgt.start}-->{tgt.end}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -207,20 +201,6 @@ class VTTEditor:
|
|||
|
||||
return len(errors) == 0, errors
|
||||
|
||||
@staticmethod
|
||||
def fix_overlapping_cues(vtt_content: str) -> str:
|
||||
"""Trim end_time of each cue so it does not overlap the next cue's start_time."""
|
||||
cues = VTTParser.parse(vtt_content)
|
||||
for i in range(1, len(cues)):
|
||||
if cues[i].start_time < cues[i - 1].end_time:
|
||||
# Clamp previous cue end to 1ms before next cue start
|
||||
new_end = cues[i].start_time - 0.001
|
||||
# Never let end_time go at or below start_time
|
||||
if new_end <= cues[i - 1].start_time:
|
||||
new_end = cues[i - 1].start_time + 0.001
|
||||
cues[i - 1].end_time = new_end
|
||||
return VTTParser.build(cues)
|
||||
|
||||
@staticmethod
|
||||
def get_cue_count(vtt_content: str) -> int:
|
||||
"""Get the number of cues in VTT content"""
|
||||
|
|
@ -256,7 +236,7 @@ class VTTEditor:
|
|||
)
|
||||
return False, errors
|
||||
|
||||
for i, (src, tgt) in enumerate(zip(source_cues, translated_cues, strict=False)):
|
||||
for i, (src, tgt) in enumerate(zip(source_cues, translated_cues)):
|
||||
if abs(src.start_time - tgt.start_time) > 0.001:
|
||||
errors.append(
|
||||
f"Cue {i + 1}: start time changed "
|
||||
|
|
@ -286,33 +266,3 @@ class VTTEditor:
|
|||
|
||||
return VTTParser.build(cues)
|
||||
|
||||
# DCMP §6.01 filler patterns per language (whole-word, case-insensitive)
|
||||
_FILLER_PATTERNS: dict[str, str] = {
|
||||
"en": r'\b(um+|uh+|ah+|er+|hmm+|you know|i mean|sort of|kind of|basically|literally|honestly|actually|right\?|so yeah)\b',
|
||||
"es": r'\b(eh+|este|o sea|pues|bueno|o sea que|mmm+)\b',
|
||||
"fr": r'\b(euh+|beh|ben|donc|quoi|enfin|voilà|genre)\b',
|
||||
"de": r'\b(äh+|ähm+|halt|ne|also|naja|sozusagen|quasi)\b',
|
||||
"it": r'\b(ehm+|allora|cioè|tipo|praticamente|insomma|ecco)\b',
|
||||
"nl": r'\b(eh+|nou|zeg|eigenlijk|gewoon|toch|zo van|hè)\b',
|
||||
"pt": r'\b(ahn+|hã+|né|sabe|tipo|então|assim)\b',
|
||||
"pl": r'\b(no|że|bo|znaczy|właśnie|jakby|wiesz)\b',
|
||||
"uk": r'\b(ну+|ем+|типу|знаєш|значить|власне|от)\b',
|
||||
"ru": r'\b(ну+|эм+|типа|знаешь|значит|вот|собственно)\b',
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def clean_disfluencies(vtt_content: str, lang: str) -> str:
|
||||
"""Remove filler words and hesitations per DCMP §6.01 for supported languages."""
|
||||
pattern = VTTEditor._FILLER_PATTERNS.get(lang.split("-")[0].lower())
|
||||
if not pattern:
|
||||
return vtt_content
|
||||
cues = VTTParser.parse(vtt_content)
|
||||
compiled = re.compile(pattern, re.IGNORECASE)
|
||||
for cue in cues:
|
||||
cleaned = compiled.sub("", cue.text)
|
||||
# Collapse multiple spaces and strip leading/trailing punctuation artifacts
|
||||
cleaned = re.sub(r'[ \t]{2,}', ' ', cleaned).strip().strip(',').strip()
|
||||
if cleaned:
|
||||
cue.text = cleaned
|
||||
return VTTParser.build(cues)
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ class RateLimiter:
|
|||
) -> tuple[bool, dict[str, int]]:
|
||||
"""
|
||||
Check if request is allowed under rate limit.
|
||||
|
||||
|
||||
Returns:
|
||||
Tuple of (is_allowed, rate_limit_info)
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -57,8 +57,8 @@ class RequestValidator:
|
|||
r"%2e%2e%2f",
|
||||
r"%2e%2e\\",
|
||||
|
||||
# Command injection (removed $ and ; — semicolons are common in natural language)
|
||||
r"[&|`](?!\s*$)",
|
||||
# Command injection (removed $ to allow MongoDB operators in controlled contexts)
|
||||
r"[;&|`](?!\s*$)", # Allow $ but not as command separator
|
||||
r"\b(rm|wget|curl|nc|bash|sh|cmd|powershell)\b\s+",
|
||||
|
||||
# MongoDB injection — NoSQL operator abuse
|
||||
|
|
@ -125,9 +125,9 @@ class RequestValidator:
|
|||
subtitle_extensions = {'vtt', 'srt', 'txt'}
|
||||
|
||||
if expected_type == "video" and ext not in video_extensions:
|
||||
raise ValidationError(f"Invalid video file extension: {ext}") from None
|
||||
raise ValidationError(f"Invalid video file extension: {ext}")
|
||||
elif expected_type == "subtitle" and ext not in subtitle_extensions:
|
||||
raise ValidationError(f"Invalid subtitle file extension: {ext}") from None
|
||||
raise ValidationError(f"Invalid subtitle file extension: {ext}")
|
||||
return
|
||||
|
||||
if expected_type == "video" and detected_type not in self.allowed_video_types:
|
||||
|
|
@ -186,10 +186,7 @@ class RequestValidator:
|
|||
return payload
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValidationError(f"Invalid JSON: {e}") from e
|
||||
|
||||
# Fields that contain free-form natural language — skip injection pattern checks
|
||||
_FREETEXT_FIELDS = {"captions_vtt", "audio_description_vtt", "text", "notes", "change_note", "description"}
|
||||
raise ValidationError(f"Invalid JSON: {e}")
|
||||
|
||||
def _validate_json_values(self, obj: Any, path: str = "root") -> None:
|
||||
"""Recursively validate JSON values."""
|
||||
|
|
@ -199,9 +196,7 @@ class RequestValidator:
|
|||
|
||||
for key, value in obj.items():
|
||||
self.validate_string_content(key, f"{path}.key")
|
||||
# Skip pattern scanning for free-text fields (VTT content, notes, etc.)
|
||||
if key not in self._FREETEXT_FIELDS:
|
||||
self._validate_json_values(value, f"{path}.{key}")
|
||||
self._validate_json_values(value, f"{path}.{key}")
|
||||
|
||||
elif isinstance(obj, list):
|
||||
if len(obj) > 1000: # Prevent large arrays
|
||||
|
|
|
|||
|
|
@ -141,10 +141,10 @@ class MigrationManager:
|
|||
async def migrate_up(self, target_version: str | None = None) -> list[str]:
|
||||
"""
|
||||
Apply migrations up to the target version.
|
||||
|
||||
|
||||
Args:
|
||||
target_version: Version to migrate to. If None, applies all pending migrations.
|
||||
|
||||
|
||||
Returns:
|
||||
List of applied migration versions.
|
||||
"""
|
||||
|
|
@ -189,10 +189,10 @@ class MigrationManager:
|
|||
async def migrate_down(self, target_version: str) -> list[str]:
|
||||
"""
|
||||
Rollback migrations down to the target version.
|
||||
|
||||
|
||||
Args:
|
||||
target_version: Version to rollback to.
|
||||
|
||||
|
||||
Returns:
|
||||
List of rolled back migration versions.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
"""Entry point for running migrations: python -m app.migrations.run"""
|
||||
import asyncio
|
||||
|
||||
from app.core.database import close_mongo_connection, connect_to_mongo
|
||||
from app.core.database import connect_to_mongo, close_mongo_connection
|
||||
from app.migrations.migrator import MigrationManager
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,26 +0,0 @@
|
|||
"""Backfill source_has_ad=False on existing jobs and job_briefs."""
|
||||
from app.migrations.migrator import Migration
|
||||
|
||||
|
||||
class Migration(Migration):
|
||||
version = "2026-05-08-000000"
|
||||
description = "Add source_has_ad field to jobs.source and job_briefs"
|
||||
|
||||
async def up(self) -> None:
|
||||
db = self.db
|
||||
|
||||
jobs_result = await db.jobs.update_many(
|
||||
{"source.source_has_ad": {"$exists": False}},
|
||||
{"$set": {"source.source_has_ad": False}},
|
||||
)
|
||||
briefs_result = await db.job_briefs.update_many(
|
||||
{"source_has_ad": {"$exists": False}},
|
||||
{"$set": {"source_has_ad": False}},
|
||||
)
|
||||
|
||||
print(f"✅ Backfilled source_has_ad on {jobs_result.modified_count} jobs, {briefs_result.modified_count} job_briefs")
|
||||
|
||||
async def down(self) -> None:
|
||||
db = self.db
|
||||
await db.jobs.update_many({}, {"$unset": {"source.source_has_ad": ""}})
|
||||
await db.job_briefs.update_many({}, {"$unset": {"source_has_ad": ""}})
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
"""Audit log model for tracking sensitive operations."""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from bson import ObjectId
|
||||
|
|
@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
|
|||
from .user import PyObjectId
|
||||
|
||||
|
||||
class AuditAction(StrEnum):
|
||||
class AuditAction(str, Enum):
|
||||
"""Enumeration of auditable actions."""
|
||||
|
||||
# Authentication actions
|
||||
|
|
@ -77,49 +77,6 @@ class AuditAction(StrEnum):
|
|||
GLOSSARY_ACTIVATE = "glossary.activate"
|
||||
GLOSSARY_ARCHIVE = "glossary.archive"
|
||||
|
||||
# Client management
|
||||
CLIENT_CREATE = "client.create"
|
||||
CLIENT_UPDATE = "client.update"
|
||||
CLIENT_DEACTIVATE = "client.deactivate"
|
||||
CLIENT_PM_ASSIGN = "client.pm_assign"
|
||||
CLIENT_PM_REMOVE = "client.pm_remove"
|
||||
CLIENT_TEAM_CREATE = "client.team_create"
|
||||
CLIENT_TEAM_UPDATE = "client.team_update"
|
||||
CLIENT_TEAM_DELETE = "client.team_delete"
|
||||
CLIENT_TEAM_MEMBER_ADD = "client.team_member_add"
|
||||
CLIENT_TEAM_MEMBER_REMOVE = "client.team_member_remove"
|
||||
CLIENT_PROJECT_CREATE = "client.project_create"
|
||||
CLIENT_PROJECT_UPDATE = "client.project_update"
|
||||
CLIENT_PROJECT_ARCHIVE = "client.project_archive"
|
||||
|
||||
# Organization management
|
||||
ORG_CREATE = "org.create"
|
||||
ORG_UPDATE = "org.update"
|
||||
ORG_MEMBER_ADD = "org.member_add"
|
||||
ORG_MEMBER_UPDATE = "org.member_update"
|
||||
ORG_MEMBER_REMOVE = "org.member_remove"
|
||||
|
||||
# Invitations
|
||||
INVITATION_CREATE = "invitation.create"
|
||||
INVITATION_REVOKE = "invitation.revoke"
|
||||
INVITATION_ACCEPT = "invitation.accept"
|
||||
|
||||
# Language QC (additional)
|
||||
LANGUAGE_QC_BULK_ASSIGN = "language_qc.bulk_assign"
|
||||
LANGUAGE_QC_START_WORK = "language_qc.start_work"
|
||||
LANGUAGE_QC_MARK_CUE_REVIEWED = "language_qc.mark_cue_reviewed"
|
||||
|
||||
# Brief management
|
||||
BRIEF_CREATE = "brief.create"
|
||||
BRIEF_UPDATE = "brief.update"
|
||||
BRIEF_SUBMIT = "brief.submit"
|
||||
BRIEF_APPROVE = "brief.approve"
|
||||
|
||||
# Share tokens
|
||||
SHARE_TOKEN_CREATE = "share.token_create"
|
||||
SHARE_TOKEN_REVOKE = "share.token_revoke"
|
||||
SHARE_CLIENT_DECISION = "share.client_decision"
|
||||
|
||||
# Security events
|
||||
RATE_LIMIT_EXCEEDED = "security.rate_limit.exceeded"
|
||||
VALIDATION_FAILURE = "security.validation.failure"
|
||||
|
|
@ -127,7 +84,7 @@ class AuditAction(StrEnum):
|
|||
SUSPICIOUS_ACTIVITY = "security.suspicious.activity"
|
||||
|
||||
|
||||
class AuditLogSeverity(StrEnum):
|
||||
class AuditLogSeverity(str, Enum):
|
||||
"""Severity levels for audit events."""
|
||||
|
||||
INFO = "info" # Normal operations
|
||||
|
|
|
|||
|
|
@ -91,9 +91,6 @@ class GlossaryResponse(BaseModel):
|
|||
source: GlossarySource
|
||||
status: GlossaryStatus
|
||||
current_version_id: str | None = None
|
||||
current_version_embedding_status: EmbeddingStatus | None = None
|
||||
current_version_embedded_count: int | None = None
|
||||
current_version_term_count: int | None = None
|
||||
created_at: datetime
|
||||
created_by: str
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
from enum import Enum
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field, constr
|
||||
|
|
@ -7,7 +7,7 @@ from pydantic import BaseModel, Field, constr
|
|||
FailureStep = Literal["ingestion", "ai_processing", "translation", "tts", "render"]
|
||||
|
||||
|
||||
class JobStatus(StrEnum):
|
||||
class JobStatus(str, Enum):
|
||||
CREATED = "created"
|
||||
INGESTING = "ingesting"
|
||||
AI_PROCESSING = "ai_processing"
|
||||
|
|
@ -50,7 +50,6 @@ class Source(BaseModel):
|
|||
language: constr(min_length=2, max_length=10) = "en" # Final source language (from detection or explicit)
|
||||
language_hint: str | None = None # User-provided hint for non-English videos
|
||||
detected_language: str | None = None # AI-detected language from Gemini
|
||||
source_has_ad: bool = False # Source video already contains professional audio descriptions
|
||||
|
||||
|
||||
class TTSPreferences(BaseModel):
|
||||
|
|
@ -158,7 +157,7 @@ class Review(BaseModel):
|
|||
|
||||
# ── Per-language QC ───────────────────────────────────────────────────────────
|
||||
|
||||
class LanguageQCStatus(StrEnum):
|
||||
class LanguageQCStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
IN_PROGRESS = "in_progress" # linguist is working
|
||||
PENDING_REVIEW = "pending_review" # linguist submitted, awaiting reviewer
|
||||
|
|
@ -282,7 +281,6 @@ class JobCreate(BaseModel):
|
|||
language_hint: str | None = None # Optional hint when source_is_english=False
|
||||
requested_outputs: RequestedOutputs
|
||||
brand_context: str | None = None # Comma-separated brand names present in the video (e.g. "Sellotape, Coca-Cola")
|
||||
source_has_ad: bool = False # Source video already contains professional audio descriptions
|
||||
|
||||
|
||||
class JobUpdate(BaseModel):
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
"""Job Brief model — pre-approved work order submitted before job creation."""
|
||||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .job import RequestedOutputs
|
||||
|
||||
|
||||
class BriefStatus(StrEnum):
|
||||
class BriefStatus(str, Enum):
|
||||
DRAFT = "draft"
|
||||
SUBMITTED = "submitted"
|
||||
APPROVED = "approved"
|
||||
|
|
@ -45,7 +45,6 @@ class JobBriefCreate(BaseModel):
|
|||
deadline: datetime | None = None
|
||||
project_id: str | None = None
|
||||
assignee_id: str | None = None
|
||||
source_has_ad: bool = False # Source video already contains professional audio descriptions
|
||||
|
||||
|
||||
class JobBriefUpdate(BaseModel):
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class OrgRole(StrEnum):
|
||||
class OrgRole(str, Enum):
|
||||
OWNER = "owner"
|
||||
ADMIN = "admin"
|
||||
MANAGER = "manager"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
from enum import Enum
|
||||
from typing import Annotated
|
||||
|
||||
from bson import ObjectId
|
||||
|
|
@ -18,7 +18,7 @@ def validate_object_id(v) -> str:
|
|||
PyObjectId = Annotated[str, BeforeValidator(validate_object_id)]
|
||||
|
||||
|
||||
class UserRole(StrEnum):
|
||||
class UserRole(str, Enum):
|
||||
CLIENT = "client"
|
||||
REVIEWER = "reviewer"
|
||||
LINGUIST = "linguist"
|
||||
|
|
@ -27,7 +27,7 @@ class UserRole(StrEnum):
|
|||
ADMIN = "admin"
|
||||
|
||||
|
||||
class AuthProvider(StrEnum):
|
||||
class AuthProvider(str, Enum):
|
||||
LOCAL = "local"
|
||||
MICROSOFT = "microsoft"
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ You are given a video. Return a JSON object with:
|
|||
- captions_vtt: a valid WebVTT file as a single string, with accurate timings and no styling (in the detected language)
|
||||
- audio_description_vtt: a valid WebVTT file as a single string, describing key visual elements (no spoilers), synchronized with the program (MUST be written in the detected language)
|
||||
{SDH_FIELD}
|
||||
{SOURCE_HAS_AD}
|
||||
|
||||
CRITICAL LANGUAGE REQUIREMENT:
|
||||
- First, detect the language spoken in the video
|
||||
|
|
@ -37,7 +36,7 @@ CRITICAL TIMING REQUIREMENTS:
|
|||
- Each caption cue should end exactly when the speaker finishes that phrase/sentence
|
||||
- Listen carefully to detect natural speech pauses and word boundaries
|
||||
- Avoid starting captions too early or ending them too late
|
||||
- Caption ALL audible speech — include off-screen narrators, voiceover, and any speaker not visible on screen. Do NOT omit speech because the speaker is not visible or because it plays over non-dialogue segments.
|
||||
- Ensure captions align with lip movement and speech rhythm
|
||||
- For audio descriptions, time them during natural speech gaps or over non-dialogue audio
|
||||
- Validate that all timestamps are monotonically increasing (each cue starts after the previous one ends)
|
||||
|
||||
|
|
@ -58,14 +57,6 @@ CAPTION FORMATTING (DCMP standard):
|
|||
- Minimum caption duration: approximately 1.3 seconds. Maximum: 6 seconds
|
||||
- Use mixed case. Use ALL CAPS only for screaming or shouting
|
||||
|
||||
DISFLUENCY REMOVAL (DCMP §6.01):
|
||||
- MANDATORY: Never include filler words, false starts, or hesitations in captions — remove them silently
|
||||
- English fillers to remove: "um", "uh", "ah", "er", "hmm", "you know", "I mean", "sort of", "kind of", "basically", "literally", "honestly"
|
||||
- Language-specific fillers: French "euh"/"beh"/"ben"/"genre", German "äh"/"ähm"/"halt"/"also", Spanish "eh"/"este"/"o sea"/"pues", Italian "ehm"/"allora"/"cioè"/"tipo", Dutch "eh"/"nou"/"zeg"/"eigenlijk", Portuguese "ahn"/"né"/"sabe"/"tipo"
|
||||
- Remove false starts when the speaker self-corrects immediately (e.g., "I was — I went to the store" → "I went to the store")
|
||||
- Do NOT remove meaningful repetition, emphasis, or intentional stylistic choices
|
||||
- When in doubt whether a word is a filler or content: omit it — clean captions are preferred over over-inclusive ones
|
||||
|
||||
SOUND AND MUSIC FORMATTING (DCMP standard):
|
||||
- Sound effects: lowercase in square brackets — e.g., [door slams], [footsteps approaching]
|
||||
- Use present participle for sustained sounds: [dog barking]; use third person for abrupt sounds: [dog barks]
|
||||
|
|
@ -78,9 +69,7 @@ SOUND AND MUSIC FORMATTING (DCMP standard):
|
|||
|
||||
CAPTION PLACEMENT:
|
||||
- Captions are normally positioned at the bottom of the screen
|
||||
- CRITICAL: When ANY of the following are visible at the BOTTOM of the frame during a caption cue — on-screen text, lower-thirds, name plates, location titles, graphics, logos, product labels, URLs, or any visual information — you MUST add the VTT cue setting "line:0%" to move that cue to the top of the screen. Format: "00:00:01.000 --> 00:00:03.000 line:0%"
|
||||
- When in doubt whether bottom content conflicts with captions, use "line:0%" — it is better to be at the top than to obstruct important on-screen information
|
||||
- Example: if a lower-third name plate is visible at seconds 0:05–0:08, all caption cues overlapping that range must have "line:0%"
|
||||
- When visible text, graphics, logos, or on-screen information appear at the bottom of the frame during a caption cue, add the VTT cue setting "line:0%" to move that caption to the top — format: "00:00:01.000 --> 00:00:03.000 line:0%"
|
||||
|
||||
ETHICAL GUIDELINES FOR DESCRIBING PEOPLE (DCMP standard):
|
||||
- Consistently identify people/characters by name. When a name is not yet known, identify by the most obvious visible attribute (e.g., "the person in the red jacket") until the name is established, then switch to the name and use it consistently
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ You are given a video. Return a JSON object with:
|
|||
- captions_vtt: a valid WebVTT file as a single string, with accurate timings and no styling (written in {TARGET_LANGUAGE})
|
||||
- audio_description_vtt: a valid WebVTT file as a single string, describing key visual elements (no spoilers), synchronized with the program (written in {TARGET_LANGUAGE})
|
||||
{SDH_FIELD}
|
||||
{SOURCE_HAS_AD}
|
||||
|
||||
TARGET LANGUAGE: {TARGET_LANGUAGE}
|
||||
|
||||
|
|
@ -41,7 +40,7 @@ CRITICAL TIMING REQUIREMENTS:
|
|||
- Each caption cue should end exactly when the speaker finishes that phrase/sentence
|
||||
- Listen carefully to detect natural speech pauses and word boundaries
|
||||
- Avoid starting captions too early or ending them too late
|
||||
- Caption ALL audible speech — include off-screen narrators, voiceover, and any speaker not visible on screen. Do NOT omit speech because the speaker is not visible or because it plays over non-dialogue segments.
|
||||
- Ensure captions align with lip movement and speech rhythm
|
||||
- For audio descriptions, time them during natural speech gaps or over non-dialogue audio
|
||||
- Validate that all timestamps are monotonically increasing (each cue starts after the previous one ends)
|
||||
|
||||
|
|
@ -62,13 +61,6 @@ CAPTION FORMATTING (DCMP standard):
|
|||
- Minimum caption duration: approximately 1.3 seconds. Maximum: 6 seconds
|
||||
- Use mixed case. Use ALL CAPS only for screaming or shouting
|
||||
|
||||
DISFLUENCY REMOVAL (DCMP §6.01):
|
||||
- Do NOT include filler words, false starts, or hesitations in captions
|
||||
- Remove: "um", "uh", "ah", "er", "hmm", "like" (as filler), "you know" (as filler), "I mean" (as filler)
|
||||
- Also remove language-specific fillers (e.g., "euh"/"beh" in French, "äh"/"ähm" in German, "eh"/"este" in Spanish, "ehm"/"allora" in Italian)
|
||||
- Remove false starts when the speaker self-corrects immediately (e.g., "I was — I went to the store" → "I went to the store")
|
||||
- Do NOT remove meaningful repetition, emphasis, or intentional stylistic choices
|
||||
|
||||
SOUND AND MUSIC FORMATTING (DCMP standard):
|
||||
- Sound effects: lowercase in square brackets — e.g., [door slams], [footsteps approaching]
|
||||
- Use present participle for sustained sounds: [dog barking]; use third person for abrupt sounds: [dog barks]
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
"""Schemas for accessible video generation with embedded audio descriptions."""
|
||||
|
||||
from enum import StrEnum
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class AccessibleVideoMethod(StrEnum):
|
||||
class AccessibleVideoMethod(str, Enum):
|
||||
"""Method used for integrating audio descriptions into video."""
|
||||
OVERLAY = "overlay"
|
||||
PAUSE_INSERT = "pause_insert"
|
||||
|
|
|
|||
|
|
@ -80,7 +80,6 @@ class VttUpdateRequest(BaseModel):
|
|||
language: str | None = None # If None, defaults to source language
|
||||
if_match: str | None = None # Optimistic locking — SHA1 of expected current content
|
||||
retranslate_languages: bool = False # Re-translate all target languages from updated source VTT
|
||||
note: str | None = None # Optional save message shown in version history
|
||||
|
||||
@field_validator('captions_vtt', 'audio_description_vtt', mode='before')
|
||||
@classmethod
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ class AuditLogger:
|
|||
) -> str:
|
||||
"""
|
||||
Log an audit event.
|
||||
|
||||
|
||||
Returns:
|
||||
The ID of the created audit log entry.
|
||||
"""
|
||||
|
|
@ -94,15 +94,11 @@ class AuditLogger:
|
|||
api_version="v1"
|
||||
)
|
||||
|
||||
# Save to database — non-raising so audit failure never aborts the primary operation
|
||||
# Save to database
|
||||
collection = await self._get_collection()
|
||||
try:
|
||||
result = await collection.insert_one(audit_log.dict(by_alias=True))
|
||||
return str(result.inserted_id)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
import logging
|
||||
logging.getLogger(__name__).error("audit log insert failed: %s", exc)
|
||||
return ""
|
||||
result = await collection.insert_one(audit_log.dict(by_alias=True))
|
||||
|
||||
return str(result.inserted_id)
|
||||
|
||||
@trace_async_operation("audit_logger.query_logs")
|
||||
async def query_logs(self, query: AuditLogQuery) -> AuditLogResponse:
|
||||
|
|
|
|||
|
|
@ -1,135 +0,0 @@
|
|||
"""Align Gemini caption VTT timings against Whisper word-level timestamps.
|
||||
|
||||
Algorithm:
|
||||
For each VTT cue, tokenise its text and search for the token sequence in the
|
||||
Whisper word stream starting from the cursor position (with a look-ahead window).
|
||||
When a match of sufficient confidence is found the cue's start/end timestamps
|
||||
are replaced with the matched Whisper words' start/end. Cues that cannot be
|
||||
matched (music notation, sound effects, empty cues) keep their original Gemini
|
||||
timestamps. The result has Whisper-accurate timings early in the video and
|
||||
graceful fallbacks where Whisper didn't capture the audio.
|
||||
"""
|
||||
|
||||
import bisect
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ..core.logging import get_logger
|
||||
from ..lib.vtt import VTTEditor, VTTParser
|
||||
from ..services.whisper_service import WordTimestamp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# Characters to strip when comparing tokens
|
||||
_PUNCT = re.compile(r"[^\w']", re.UNICODE)
|
||||
# Tokens shorter than this are considered stop-words and excluded from matching
|
||||
_MIN_TOKEN_LEN = 2
|
||||
# Minimum fraction of cue tokens that must match Whisper words for alignment.
|
||||
# Lowered from 0.5 → 0.35 to handle Gemini paraphrasing and short cues.
|
||||
_MIN_MATCH_RATIO = 0.35
|
||||
# How many Whisper words ahead of the cursor to search for a cue's tokens.
|
||||
# Widened from 60 → 150 so the window stays valid even after several failed cues.
|
||||
_SEARCH_WINDOW = 150
|
||||
|
||||
|
||||
def _tokenise(text: str) -> list[str]:
|
||||
"""Lower-case, strip punctuation, drop short tokens."""
|
||||
return [
|
||||
t for t in (_PUNCT.sub("", w).lower() for w in text.split())
|
||||
if len(t) >= _MIN_TOKEN_LEN
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Match:
|
||||
first_word_idx: int
|
||||
last_word_idx: int
|
||||
ratio: float # matched_tokens / cue_tokens
|
||||
|
||||
|
||||
def _find_match(
|
||||
cue_tokens: list[str],
|
||||
whisper_words: list[WordTimestamp],
|
||||
cursor: int,
|
||||
) -> _Match | None:
|
||||
"""Return the best match for cue_tokens starting at cursor ± SEARCH_WINDOW."""
|
||||
if not cue_tokens:
|
||||
return None
|
||||
|
||||
best: _Match | None = None
|
||||
end = min(cursor + _SEARCH_WINDOW, len(whisper_words))
|
||||
|
||||
for start_idx in range(cursor, end):
|
||||
matched = 0
|
||||
last_idx = start_idx
|
||||
token_pos = 0
|
||||
|
||||
for w_idx in range(start_idx, end):
|
||||
if token_pos >= len(cue_tokens):
|
||||
break
|
||||
w_tok = _PUNCT.sub("", whisper_words[w_idx].word).lower()
|
||||
if w_tok == cue_tokens[token_pos]:
|
||||
matched += 1
|
||||
last_idx = w_idx
|
||||
token_pos += 1
|
||||
|
||||
ratio = matched / len(cue_tokens)
|
||||
if ratio >= _MIN_MATCH_RATIO:
|
||||
if best is None or ratio > best.ratio:
|
||||
best = _Match(start_idx, last_idx, ratio)
|
||||
if ratio == 1.0:
|
||||
break # perfect match — no need to search further
|
||||
|
||||
return best
|
||||
|
||||
|
||||
def _cursor_for_time(whisper_words: list[WordTimestamp], t: float, from_idx: int) -> int:
|
||||
"""Return the index of the first Whisper word at or after time t, starting from from_idx."""
|
||||
starts = [w.start for w in whisper_words]
|
||||
idx = bisect.bisect_left(starts, t, from_idx)
|
||||
return min(idx, len(whisper_words) - 1)
|
||||
|
||||
|
||||
def align(captions_vtt: str, whisper_words: list[WordTimestamp]) -> str:
|
||||
"""Replace VTT cue timings with Whisper-accurate timestamps where possible.
|
||||
|
||||
Returns a VTT string with the same cue count as the input, with improved
|
||||
timing accuracy on cues that could be matched to Whisper word output.
|
||||
"""
|
||||
if not whisper_words:
|
||||
logger.warning("caption_aligner: no Whisper words supplied — returning original VTT")
|
||||
return captions_vtt
|
||||
|
||||
cues = VTTParser.parse(captions_vtt)
|
||||
cursor = 0
|
||||
aligned = 0
|
||||
|
||||
for cue in cues:
|
||||
tokens = _tokenise(cue.text)
|
||||
if not tokens:
|
||||
continue
|
||||
|
||||
match = _find_match(tokens, whisper_words, cursor)
|
||||
if match is None:
|
||||
# Advance cursor to the Whisper word closest to this cue's start time
|
||||
# so subsequent cues don't search from a stale position.
|
||||
cursor = _cursor_for_time(whisper_words, cue.start_time, cursor)
|
||||
continue
|
||||
|
||||
new_start = whisper_words[match.first_word_idx].start
|
||||
new_end = whisper_words[match.last_word_idx].end
|
||||
|
||||
if new_end > new_start:
|
||||
cue.start_time = new_start
|
||||
cue.end_time = new_end
|
||||
aligned += 1
|
||||
|
||||
cursor = match.last_word_idx + 1
|
||||
|
||||
logger.info(
|
||||
f"caption_aligner: aligned {aligned}/{len(cues)} cues "
|
||||
f"against {len(whisper_words)} Whisper words"
|
||||
)
|
||||
return VTTEditor.translate_preserving_timing(
|
||||
captions_vtt, [c.text for c in cues]
|
||||
) if aligned == 0 else VTTParser.build(cues)
|
||||
|
|
@ -82,7 +82,7 @@ def _celery_fallback(task: str, job_id: str, **extra_args) -> str:
|
|||
from ..tasks.translate_and_synthesize import translate_and_synthesize_task
|
||||
_langs = extra_args.get("languages")
|
||||
if isinstance(_langs, str):
|
||||
_langs = [lang for lang in _langs.split(",") if lang]
|
||||
_langs = [l for l in _langs.split(",") if l]
|
||||
translate_and_synthesize_task.delay(job_id, languages=_langs or None)
|
||||
elif task == "render":
|
||||
from ..tasks.render_accessible_video import render_accessible_video_task
|
||||
|
|
|
|||
|
|
@ -75,10 +75,8 @@ def record(
|
|||
if chars is not None:
|
||||
units["char"] = chars
|
||||
else:
|
||||
if input_tokens:
|
||||
units["token_input"] = input_tokens
|
||||
if output_tokens:
|
||||
units["token_output"] = output_tokens
|
||||
if input_tokens: units["token_input"] = input_tokens
|
||||
if output_tokens: units["token_output"] = output_tokens
|
||||
|
||||
payload: dict = {
|
||||
"source_app": settings.cost_tracker_source_app,
|
||||
|
|
@ -89,10 +87,8 @@ def record(
|
|||
"latency_ms": latency_ms,
|
||||
"status": status,
|
||||
}
|
||||
if project_id:
|
||||
payload["project_external_id"] = project_id
|
||||
if job_external_id:
|
||||
payload["job_external_id"] = job_external_id
|
||||
if project_id: payload["project_external_id"] = project_id
|
||||
if job_external_id: payload["job_external_id"] = job_external_id
|
||||
|
||||
httpx.post(
|
||||
f"{settings.cost_tracker_base_url}/usage/record",
|
||||
|
|
|
|||
|
|
@ -1,15 +1,13 @@
|
|||
"""
|
||||
Embedding service backed by Vertex AI text-multilingual-embedding-002.
|
||||
Embedding service backed by Gemini text-embedding-004.
|
||||
|
||||
Uses the google-genai SDK in Vertex AI mode (Application Default Credentials)
|
||||
instead of AI Studio so we get higher per-project quotas and no per-user limits.
|
||||
|
||||
Batch size: 100 texts per API call.
|
||||
Provides batch embedding with retry/backoff for use in glossary ingestion.
|
||||
Batch size: 100 texts per API call (API limit is 2048 but we keep it conservative
|
||||
for memory and retry ergonomics with large glossaries).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
|
||||
from google import genai
|
||||
|
|
@ -20,29 +18,15 @@ from ..core.logging import get_logger
|
|||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# Vertex AI multilingual model — 768-dim, 50+ languages, higher quota than AI Studio
|
||||
_EMBED_MODEL = "text-multilingual-embedding-002"
|
||||
_EMBED_MODEL = "gemini-embedding-001"
|
||||
_BATCH_SIZE = 100
|
||||
_MAX_RETRIES = 5
|
||||
_INITIAL_BACKOFF = 4.0
|
||||
|
||||
# Matches the 'retryDelay': '7s' field in Gemini/Vertex 429 error bodies
|
||||
_RETRY_DELAY_RE = re.compile(r"'retryDelay':\s*'(\d+)s'")
|
||||
|
||||
|
||||
def _parse_retry_delay(exc: Exception) -> float | None:
|
||||
"""Extract the server-suggested retry delay from a 429 error."""
|
||||
m = _RETRY_DELAY_RE.search(str(exc))
|
||||
return float(m.group(1)) if m else None
|
||||
_MAX_RETRIES = 3
|
||||
_INITIAL_BACKOFF = 2.0
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
def __init__(self) -> None:
|
||||
self._client = genai.Client(
|
||||
vertexai=True,
|
||||
project=settings.gcp_project_id,
|
||||
location=settings.gcp_location,
|
||||
)
|
||||
self._client = genai.Client(api_key=settings.gemini_api_key)
|
||||
|
||||
async def embed_texts(self, texts: Sequence[str]) -> list[list[float]]:
|
||||
"""
|
||||
|
|
@ -78,12 +62,8 @@ class EmbeddingService:
|
|||
if attempt == _MAX_RETRIES:
|
||||
logger.error(f"Embedding batch failed after {_MAX_RETRIES} attempts: {exc}")
|
||||
raise
|
||||
# Honour the server-suggested retryDelay when present (e.g. 429 RESOURCE_EXHAUSTED).
|
||||
# Fall back to our own exponential backoff otherwise.
|
||||
server_delay = _parse_retry_delay(exc)
|
||||
delay = max(server_delay + 1.0, backoff) if server_delay else backoff
|
||||
logger.warning(f"Embedding attempt {attempt} failed, retrying in {delay}s: {exc}")
|
||||
await asyncio.sleep(delay)
|
||||
logger.warning(f"Embedding attempt {attempt} failed, retrying in {backoff}s: {exc}")
|
||||
await asyncio.sleep(backoff)
|
||||
backoff *= 2
|
||||
|
||||
raise RuntimeError("unreachable") # makes type-checker happy
|
||||
|
|
|
|||
|
|
@ -273,7 +273,7 @@ async def run_ffmpeg(request: RunFFmpegRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"FFmpeg operation failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/probe", response_model=ProbeResponse)
|
||||
|
|
@ -328,7 +328,7 @@ async def probe_video(request: ProbeRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Probe failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/encode-segment", response_model=RunFFmpegResponse)
|
||||
|
|
@ -380,7 +380,7 @@ async def encode_segment(request: EncodeSegmentRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Encode segment failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/extract-frame", response_model=RunFFmpegResponse)
|
||||
|
|
@ -425,7 +425,7 @@ async def extract_frame(request: ExtractFrameRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Extract frame failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/create-freeze-segment", response_model=RunFFmpegResponse)
|
||||
|
|
@ -480,7 +480,7 @@ async def create_freeze_segment(request: CreateFreezeSegmentRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Create freeze segment failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/concatenate", response_model=RunFFmpegResponse)
|
||||
|
|
@ -534,4 +534,4 @@ async def concatenate_segments(request: ConcatenateRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Concatenate failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ class GCSService:
|
|||
return await loop.run_in_executor(self.executor, _upload)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to upload file to GCS: {e}")
|
||||
raise HTTPException(status_code=500, detail="File upload failed") from None
|
||||
raise HTTPException(status_code=500, detail="File upload failed")
|
||||
|
||||
async def upload_text_to_gcs(
|
||||
self,
|
||||
|
|
@ -76,7 +76,7 @@ class GCSService:
|
|||
return await loop.run_in_executor(self.executor, _upload)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to upload text to GCS: {e}")
|
||||
raise HTTPException(status_code=500, detail="Text upload failed") from None
|
||||
raise HTTPException(status_code=500, detail="Text upload failed")
|
||||
|
||||
async def get_signed_url(
|
||||
self,
|
||||
|
|
@ -104,10 +104,10 @@ class GCSService:
|
|||
try:
|
||||
return await loop.run_in_executor(self.executor, _get_signed_url)
|
||||
except NotFound:
|
||||
raise HTTPException(status_code=404, detail="File not found") from None
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate signed URL: {e}")
|
||||
raise HTTPException(status_code=500, detail="Failed to generate download URL") from None
|
||||
raise HTTPException(status_code=500, detail="Failed to generate download URL")
|
||||
|
||||
async def create_resumable_upload_session(self, blob_path: str, content_type: str) -> str:
|
||||
"""Create a GCS resumable upload session and return the session URI."""
|
||||
|
|
@ -123,7 +123,7 @@ class GCSService:
|
|||
return await loop.run_in_executor(self.executor, _create)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create resumable upload session: {e}")
|
||||
raise HTTPException(status_code=500, detail="Failed to initiate upload session") from None
|
||||
raise HTTPException(status_code=500, detail="Failed to initiate upload session")
|
||||
|
||||
async def delete_file(self, blob_path: str) -> bool:
|
||||
"""Delete a file from GCS"""
|
||||
|
|
@ -139,7 +139,7 @@ class GCSService:
|
|||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete file from GCS: {e}")
|
||||
raise HTTPException(status_code=500, detail="File deletion failed") from None
|
||||
raise HTTPException(status_code=500, detail="File deletion failed")
|
||||
|
||||
async def file_exists(self, blob_path: str) -> bool:
|
||||
"""Check if a file exists in GCS"""
|
||||
|
|
|
|||
|
|
@ -44,39 +44,10 @@ async def _record_gemini_usage(
|
|||
|
||||
|
||||
class GeminiService:
|
||||
_fallback_models: list[str] = [
|
||||
"gemini-3-flash-preview",
|
||||
"gemini-2.5-pro",
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self.model_name = 'gemini-3.1-pro-preview'
|
||||
self.prompts_dir = Path(__file__).parent.parent / "prompts"
|
||||
|
||||
async def _generate(self, contents: Any, config: Any = None) -> tuple[Any, str]:
|
||||
"""Call generate_content, falling back on 429/503 transient errors. Returns (response, model_used)."""
|
||||
for model in [self.model_name, *self._fallback_models]:
|
||||
try:
|
||||
kw: dict[str, Any] = {"model": model, "contents": contents}
|
||||
if config is not None:
|
||||
kw["config"] = config
|
||||
response = await asyncio.to_thread(client.models.generate_content, **kw)
|
||||
if response.text is None:
|
||||
logger.warning(f"Model {model!r} returned empty response (safety block or overload), trying next fallback")
|
||||
last_exc: Exception = RuntimeError(f"Model {model!r} returned empty response")
|
||||
continue
|
||||
if model != self.model_name:
|
||||
logger.warning(f"Used fallback model {model!r} (primary unavailable)")
|
||||
return response, model
|
||||
except Exception as exc:
|
||||
msg = str(exc)
|
||||
if "429" in msg or "RESOURCE_EXHAUSTED" in msg or "503" in msg or "UNAVAILABLE" in msg:
|
||||
logger.warning(f"Model {model!r} unavailable, trying next fallback")
|
||||
last_exc = exc
|
||||
continue
|
||||
raise
|
||||
raise last_exc # noqa: F821 — set in loop above when all models exhausted
|
||||
|
||||
def _load_prompt(self, prompt_file: str) -> str:
|
||||
"""Load prompt template from prompts directory"""
|
||||
prompt_path = self.prompts_dir / prompt_file
|
||||
|
|
@ -142,18 +113,6 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
return glossary_block.strip()
|
||||
return ""
|
||||
|
||||
def _build_source_has_ad_block(self, source_has_ad: bool) -> str:
|
||||
if source_has_ad:
|
||||
return (
|
||||
"SOURCE AUDIO DESCRIPTION NOTICE: This video already has professional audio descriptions "
|
||||
"embedded in its audio track. "
|
||||
"1) Return an empty audio_description_vtt containing only the WEBVTT header (\"WEBVTT\\n\") — do NOT generate new audio descriptions. "
|
||||
"2) For captions_vtt: transcribe ONLY the original program dialogue and relevant sound effects. "
|
||||
"Do NOT caption the audio description narration — AD narration is spoken during natural pauses "
|
||||
"and describes visual scenes rather than being part of the original dialogue."
|
||||
)
|
||||
return ""
|
||||
|
||||
def _build_brand_context_block(self, brand_context: str | None) -> str:
|
||||
"""Build the brand context instruction block for injection into prompts."""
|
||||
if brand_context and brand_context.strip():
|
||||
|
|
@ -166,7 +125,7 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
)
|
||||
return "No specific brand names have been provided for this video."
|
||||
|
||||
async def extract_accessibility(self, video_file_path: str, brand_context: str | None = None, sdh_requested: bool = False, glossary_block: str | None = None, source_has_ad: bool = False, _cost_ctx: dict | None = None) -> dict[str, Any]:
|
||||
async def extract_accessibility(self, video_file_path: str, brand_context: str | None = None, sdh_requested: bool = False, glossary_block: str | None = None, _cost_ctx: dict | None = None) -> dict[str, Any]:
|
||||
"""
|
||||
Extract captions and audio descriptions from video using Gemini 2.0
|
||||
Returns structured JSON with transcript, captions VTT, and audio description VTT
|
||||
|
|
@ -178,7 +137,6 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
.replace("{GLOSSARY}", self._build_glossary_block(glossary_block))
|
||||
.replace("{SDH_FIELD}", self._build_sdh_field(sdh_requested))
|
||||
.replace("{SDH_GUIDELINES}", self._build_sdh_guidelines(sdh_requested))
|
||||
.replace("{SOURCE_HAS_AD}", self._build_source_has_ad_block(source_has_ad))
|
||||
)
|
||||
uploaded_file = None
|
||||
|
||||
|
|
@ -206,7 +164,9 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
# Generate content using new API - use asyncio.to_thread to avoid blocking
|
||||
logger.info("Generating content with Gemini model...")
|
||||
_t0 = time.monotonic()
|
||||
response, _model_used = await self._generate(
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[
|
||||
genai.types.Part.from_text(text=prompt),
|
||||
genai.types.Part.from_uri(
|
||||
|
|
@ -215,13 +175,13 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
)
|
||||
],
|
||||
config=genai.types.GenerateContentConfig(
|
||||
temperature=0.2,
|
||||
temperature=0.2, # Lower temperature for consistent, deterministic AD output
|
||||
top_p=0.8,
|
||||
top_k=40,
|
||||
),
|
||||
)
|
||||
if _cost_ctx:
|
||||
asyncio.create_task(_record_gemini_usage(response, _model_used, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
asyncio.create_task(_record_gemini_usage(response, self.model_name, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
|
||||
# Parse JSON response
|
||||
response_text = response.text.strip()
|
||||
|
|
@ -322,7 +282,9 @@ Fix the JSON and return it:
|
|||
"""
|
||||
|
||||
try:
|
||||
response, _ = await self._generate(
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[genai.types.Part.from_text(text=self_heal_prompt)]
|
||||
)
|
||||
|
||||
|
|
@ -358,7 +320,7 @@ Fix the JSON and return it:
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Self-heal attempt failed: {e}")
|
||||
raise ValueError("Failed to get valid JSON from Gemini after self-heal attempt") from e
|
||||
raise ValueError("Failed to get valid JSON from Gemini after self-heal attempt")
|
||||
|
||||
async def extract_accessibility_targeted(
|
||||
self,
|
||||
|
|
@ -422,7 +384,9 @@ Fix the JSON and return it:
|
|||
# Generate content using new API
|
||||
logger.info(f"Generating content with Gemini model for {target_language}...")
|
||||
_t0 = time.monotonic()
|
||||
response, _model_used = await self._generate(
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[
|
||||
genai.types.Part.from_text(text=prompt),
|
||||
genai.types.Part.from_uri(
|
||||
|
|
@ -432,7 +396,7 @@ Fix the JSON and return it:
|
|||
]
|
||||
)
|
||||
if _cost_ctx:
|
||||
asyncio.create_task(_record_gemini_usage(response, _model_used, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
asyncio.create_task(_record_gemini_usage(response, self.model_name, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
|
||||
# Parse JSON response
|
||||
response_text = response.text.strip()
|
||||
|
|
@ -535,7 +499,9 @@ Fix the JSON and return it:
|
|||
"""
|
||||
|
||||
try:
|
||||
response, _ = await self._generate(
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[genai.types.Part.from_text(text=self_heal_prompt)]
|
||||
)
|
||||
|
||||
|
|
@ -567,7 +533,7 @@ Fix the JSON and return it:
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Self-heal attempt failed for {target_language}: {e}")
|
||||
raise ValueError(f"Failed to get valid JSON from Gemini targeted extraction for {target_language}") from e
|
||||
raise ValueError(f"Failed to get valid JSON from Gemini targeted extraction for {target_language}")
|
||||
|
||||
def _attempt_json_fix(self, json_text: str) -> dict[str, Any] | None:
|
||||
"""Attempt to fix common JSON syntax issues"""
|
||||
|
|
@ -692,7 +658,9 @@ Fix the JSON and return it:
|
|||
|
||||
# Generate content with video and prompt
|
||||
logger.info("Analyzing video with Gemini for accessible video placement...")
|
||||
response, _ = await self._generate(
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[
|
||||
genai.types.Part.from_text(text=prompt),
|
||||
genai.types.Part.from_uri(
|
||||
|
|
@ -774,7 +742,9 @@ Fix the JSON and return it:
|
|||
"""
|
||||
|
||||
try:
|
||||
response, _ = await self._generate(
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[genai.types.Part.from_text(text=self_heal_prompt)]
|
||||
)
|
||||
|
||||
|
|
@ -788,7 +758,7 @@ Fix the JSON and return it:
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Self-heal attempt for accessible video analysis failed: {e}")
|
||||
raise ValueError("Failed to get valid JSON from accessible video analysis after self-heal") from e
|
||||
raise ValueError("Failed to get valid JSON from accessible video analysis after self-heal")
|
||||
|
||||
async def transcreate_content(
|
||||
self,
|
||||
|
|
@ -822,11 +792,15 @@ JSON:
|
|||
|
||||
try:
|
||||
_t0 = time.monotonic()
|
||||
response, _model_used = await self._generate(
|
||||
contents=[genai.types.Part.from_text(text=prompt + "\n\n" + user_prompt)]
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[
|
||||
genai.types.Part.from_text(text=prompt + "\n\n" + user_prompt)
|
||||
]
|
||||
)
|
||||
if _cost_ctx:
|
||||
asyncio.create_task(_record_gemini_usage(response, _model_used, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
asyncio.create_task(_record_gemini_usage(response, self.model_name, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
|
||||
response_text = response.text.strip()
|
||||
|
||||
|
|
@ -845,7 +819,7 @@ JSON:
|
|||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse transcreation JSON response: {e}")
|
||||
raise ValueError("Invalid JSON response from transcreation") from e
|
||||
raise ValueError("Invalid JSON response from transcreation")
|
||||
except Exception as e:
|
||||
logger.error(f"Transcreation failed: {e}")
|
||||
raise
|
||||
|
|
@ -894,10 +868,6 @@ JSON:
|
|||
_tgt_label = locale_lib.get_gemini_label(target_language)
|
||||
_glossary_section = self._build_glossary_block(glossary_block)
|
||||
_glossary_line = f"\n\n{_glossary_section}" if _glossary_section else ""
|
||||
_glossary_req = (
|
||||
"\n- MUST use the exact approved terms from the glossary below — these override natural translation choices, even for English terms"
|
||||
if _glossary_section else ""
|
||||
)
|
||||
_adapt_line = _style_instruction.format(tgt=_tgt_label) if style == "transcreate" else ""
|
||||
prompt = f"""Translate the following {cue_count} numbered text segments from {_src_label} to {_tgt_label}.
|
||||
|
||||
|
|
@ -906,17 +876,19 @@ REQUIREMENTS:
|
|||
- Format: "1. translated text", "2. translated text", etc.
|
||||
- Preserve speaker labels like [Speaker 1]: unchanged
|
||||
- {_adapt_line}Use natural, idiomatic {_tgt_label}
|
||||
- Do NOT add any explanation, preamble, or extra lines{extra_instruction}{_glossary_req}{_glossary_line}
|
||||
- Do NOT add any explanation, preamble, or extra lines{extra_instruction}{_glossary_line}
|
||||
|
||||
Segments to translate:
|
||||
{numbered_texts}"""
|
||||
|
||||
_t0 = time.monotonic()
|
||||
response, _model_used = await self._generate(
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[genai.types.Part.from_text(text=prompt)]
|
||||
)
|
||||
if _cost_ctx:
|
||||
asyncio.create_task(_record_gemini_usage(response, _model_used, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
asyncio.create_task(_record_gemini_usage(response, self.model_name, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
return self._parse_numbered_translation(response.text.strip(), cue_count)
|
||||
|
||||
try:
|
||||
|
|
@ -1003,11 +975,13 @@ Segments to translate:
|
|||
logger.info(f"Rewriting TTS cue for safety: '{original_text[:50]}...'")
|
||||
|
||||
_t0 = time.monotonic()
|
||||
response, _model_used = await self._generate(
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[genai.types.Part.from_text(text=prompt)]
|
||||
)
|
||||
if _cost_ctx:
|
||||
asyncio.create_task(_record_gemini_usage(response, _model_used, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
asyncio.create_task(_record_gemini_usage(response, self.model_name, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
|
||||
result = response.text.strip()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import io
|
||||
import re
|
||||
|
||||
from google.cloud import texttospeech
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
from pydub import AudioSegment
|
||||
|
||||
from ..core.config import settings
|
||||
|
|
@ -22,26 +22,14 @@ class TTSSynthesisError(Exception):
|
|||
|
||||
|
||||
class GeminiTTSService:
|
||||
"""Text-to-Speech service using Google Cloud Text-to-Speech API with Gemini models."""
|
||||
"""Text-to-Speech service using Gemini TTS API"""
|
||||
|
||||
def __init__(self):
|
||||
self.client = texttospeech.TextToSpeechClient()
|
||||
self.client = genai.Client(api_key=settings.gemini_api_key)
|
||||
self.model = settings.gemini_tts_model
|
||||
self.default_voice = settings.gemini_tts_default_voice
|
||||
logger.info(f"Gemini TTS service initialized with model: {self.model}")
|
||||
|
||||
@staticmethod
|
||||
def _extract_retry_after(error: Exception) -> float | None:
|
||||
"""Return seconds to wait from a Google API 429 retryDelay, or None."""
|
||||
msg = str(error)
|
||||
m = re.search(r"retry in ([0-9.]+)s", msg, re.IGNORECASE)
|
||||
if m:
|
||||
return float(m.group(1)) + 5
|
||||
m = re.search(r"'retryDelay':\s*'([0-9.]+)s'", msg)
|
||||
if m:
|
||||
return float(m.group(1)) + 5
|
||||
return None
|
||||
|
||||
async def synthesize_text(
|
||||
self,
|
||||
text: str,
|
||||
|
|
@ -52,56 +40,117 @@ class GeminiTTSService:
|
|||
style_prompt: str = ""
|
||||
) -> bytes:
|
||||
"""
|
||||
Synthesize text to MP3 using Google Cloud TTS with Gemini model.
|
||||
Synthesize text to audio using Gemini TTS.
|
||||
Returns MP3 audio bytes.
|
||||
|
||||
Args:
|
||||
text: The text to synthesize
|
||||
voice_name: Gemini voice name (e.g. "Kore", "Puck")
|
||||
language: Language code (e.g. "en", "en-US", "fr")
|
||||
model: Model variant key — "flash" or "pro"
|
||||
speed: Speech rate multiplier (0.25–4.0)
|
||||
style_prompt: Natural-language style instruction sent as prompt
|
||||
voice_name: Name of the voice to use
|
||||
language: Language code (e.g., "en", "es")
|
||||
model: Model variant - "flash" (fast) or "pro" (quality)
|
||||
speed: Speech rate multiplier (0.5 to 2.0)
|
||||
style_prompt: Style instructions to prepend (e.g., "Speak calmly...")
|
||||
"""
|
||||
if not text.strip():
|
||||
raise ValueError("Text cannot be empty")
|
||||
|
||||
# Validate voice
|
||||
if voice_name not in settings.gemini_tts_voices:
|
||||
logger.warning(f"Unknown voice '{voice_name}', using default '{self.default_voice}'")
|
||||
voice_name = self.default_voice
|
||||
|
||||
# Select model from config
|
||||
model_id = settings.gemini_tts_models.get(model, settings.gemini_tts_model)
|
||||
language_code = locale_lib.get_tts_lang(language)
|
||||
|
||||
# Build the full prompt with style and speed instructions
|
||||
prompt_parts = []
|
||||
|
||||
# Add style prompt if provided
|
||||
if style_prompt:
|
||||
prompt_parts.append(style_prompt)
|
||||
|
||||
# Add speed instruction if not default
|
||||
if speed != 1.0:
|
||||
speed_pct = int(speed * 100)
|
||||
if speed < 1.0:
|
||||
prompt_parts.append(f"Speak slowly at approximately {speed_pct}% of normal speed. ")
|
||||
else:
|
||||
prompt_parts.append(f"Speak quickly at approximately {speed_pct}% of normal speed. ")
|
||||
|
||||
# Combine prompts with actual text
|
||||
full_text = "".join(prompt_parts) + text
|
||||
|
||||
try:
|
||||
synthesis_input = texttospeech.SynthesisInput(text=text)
|
||||
if style_prompt:
|
||||
synthesis_input = texttospeech.SynthesisInput(
|
||||
text=text,
|
||||
prompt=style_prompt,
|
||||
# Generate audio using Gemini TTS
|
||||
response = self.client.models.generate_content(
|
||||
model=model_id,
|
||||
contents=full_text,
|
||||
config=types.GenerateContentConfig(
|
||||
response_modalities=["AUDIO"],
|
||||
speech_config=types.SpeechConfig(
|
||||
voice_config=types.VoiceConfig(
|
||||
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
||||
voice_name=voice_name,
|
||||
)
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
response = self.client.synthesize_speech(
|
||||
input=synthesis_input,
|
||||
voice=texttospeech.VoiceSelectionParams(
|
||||
language_code=language_code,
|
||||
name=voice_name,
|
||||
model_name=model_id,
|
||||
),
|
||||
audio_config=texttospeech.AudioConfig(
|
||||
audio_encoding=texttospeech.AudioEncoding.MP3,
|
||||
speaking_rate=speed,
|
||||
),
|
||||
)
|
||||
|
||||
if not response.audio_content:
|
||||
raise ValueError("Empty audio content in Cloud TTS response")
|
||||
# Extract PCM audio data from response with proper null-safe checks
|
||||
if not response.candidates:
|
||||
logger.error(
|
||||
f"Gemini TTS response missing candidates. "
|
||||
f"Response type: {type(response)}, Response: {response}"
|
||||
)
|
||||
raise ValueError("No candidates in Gemini TTS response")
|
||||
|
||||
return response.audio_content
|
||||
candidate = response.candidates[0]
|
||||
|
||||
if candidate.content is None:
|
||||
logger.error(
|
||||
f"Gemini TTS candidate has no content. "
|
||||
f"Finish reason: {getattr(candidate, 'finish_reason', 'unknown')}, "
|
||||
f"Safety ratings: {getattr(candidate, 'safety_ratings', 'unknown')}"
|
||||
)
|
||||
raise ValueError(
|
||||
f"Candidate content is None in Gemini TTS response. "
|
||||
f"Finish reason: {getattr(candidate, 'finish_reason', 'unknown')}"
|
||||
)
|
||||
|
||||
if not candidate.content.parts:
|
||||
logger.error(
|
||||
f"Gemini TTS content has no parts. "
|
||||
f"Content role: {getattr(candidate.content, 'role', 'unknown')}"
|
||||
)
|
||||
raise ValueError("No parts in Gemini TTS response content")
|
||||
|
||||
part = candidate.content.parts[0]
|
||||
if not hasattr(part, 'inline_data') or part.inline_data is None:
|
||||
logger.error(
|
||||
f"Gemini TTS part missing inline_data. "
|
||||
f"Part type: {type(part)}, Part: {part}"
|
||||
)
|
||||
raise ValueError("No inline_data in Gemini TTS response part")
|
||||
|
||||
pcm_data = part.inline_data.data
|
||||
|
||||
# Convert PCM to MP3
|
||||
mp3_data = self._pcm_to_mp3(pcm_data)
|
||||
|
||||
return mp3_data
|
||||
|
||||
except Exception as e:
|
||||
# Log comprehensive error information for debugging
|
||||
error_context = {
|
||||
"text_length": len(text),
|
||||
"text_preview": text[:100] + "..." if len(text) > 100 else text,
|
||||
"voice_name": voice_name,
|
||||
"language": language,
|
||||
"model_id": model_id,
|
||||
}
|
||||
logger.error(
|
||||
f"Gemini TTS synthesis failed: {e}. "
|
||||
f"text_len={len(text)}, voice={voice_name}, model={model_id}, lang={language_code}"
|
||||
f"Gemini TTS synthesis failed: {e}. Context: {error_context}"
|
||||
)
|
||||
raise
|
||||
|
||||
|
|
@ -113,18 +162,23 @@ class GeminiTTSService:
|
|||
speed: float = 1.0,
|
||||
style_prompt: str = ""
|
||||
) -> bytes:
|
||||
"""Generate a preview audio sample for voice selection."""
|
||||
"""
|
||||
Generate a preview audio sample for voice selection.
|
||||
Uses language-specific sample text and applies all TTS settings.
|
||||
"""
|
||||
# Get preview sample text — try settings override, then locale registry, then fallback
|
||||
sample_text = (
|
||||
settings.gemini_tts_preview_samples.get(language)
|
||||
or locale_lib.get_preview_sample(language)
|
||||
)
|
||||
|
||||
return await self.synthesize_text(
|
||||
sample_text,
|
||||
voice_name,
|
||||
language,
|
||||
model=model,
|
||||
speed=speed,
|
||||
style_prompt=style_prompt,
|
||||
style_prompt=style_prompt
|
||||
)
|
||||
|
||||
async def _synthesize_cue_with_retry(
|
||||
|
|
@ -139,7 +193,26 @@ class GeminiTTSService:
|
|||
max_attempts: int = 3,
|
||||
base_delay: float = 1.0
|
||||
) -> bytes:
|
||||
"""Synthesize a single cue with retry, honouring API-provided retryDelay on 429."""
|
||||
"""
|
||||
Synthesize a single cue with exponential backoff retry.
|
||||
|
||||
Args:
|
||||
cue_index: Index of the cue (for error reporting)
|
||||
text: Text to synthesize
|
||||
voice_name: TTS voice name
|
||||
language: Language code
|
||||
model: Model variant
|
||||
speed: Speech rate
|
||||
style_prompt: Style instructions
|
||||
max_attempts: Total attempts (1 initial + retries)
|
||||
base_delay: Base delay in seconds for backoff
|
||||
|
||||
Returns:
|
||||
MP3 audio bytes
|
||||
|
||||
Raises:
|
||||
TTSSynthesisError: If all attempts fail
|
||||
"""
|
||||
import asyncio
|
||||
import random
|
||||
|
||||
|
|
@ -154,31 +227,32 @@ class GeminiTTSService:
|
|||
language,
|
||||
model=model,
|
||||
speed=speed,
|
||||
style_prompt=style_prompt,
|
||||
style_prompt=style_prompt
|
||||
)
|
||||
except Exception as e:
|
||||
last_exception = e
|
||||
api_response_info = str(e)
|
||||
|
||||
if attempt < max_attempts - 1:
|
||||
api_delay = self._extract_retry_after(e)
|
||||
delay = api_delay if api_delay else base_delay * (2 ** attempt) + random.uniform(0, 1)
|
||||
# Exponential backoff with jitter
|
||||
delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
|
||||
logger.warning(
|
||||
f"TTS attempt {attempt + 1}/{max_attempts} failed for cue {cue_index}. "
|
||||
f"TTS synthesis attempt {attempt + 1}/{max_attempts} failed for cue {cue_index}. "
|
||||
f"Retrying in {delay:.2f}s. Error: {e}"
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
else:
|
||||
logger.error(
|
||||
f"TTS FAILED after {max_attempts} attempts for cue {cue_index}. "
|
||||
f"text='{text[:50]}{'...' if len(text) > 50 else ''}'. Error: {e}"
|
||||
f"TTS synthesis FAILED after {max_attempts} attempts for cue {cue_index}. "
|
||||
f"Text: {text[:50]}{'...' if len(text) > 50 else ''}. Error: {e}"
|
||||
)
|
||||
|
||||
# All retries exhausted - raise hard failure
|
||||
raise TTSSynthesisError(
|
||||
message=f"TTS synthesis failed after {max_attempts} attempts: {last_exception}",
|
||||
cue_index=cue_index,
|
||||
cue_text=text,
|
||||
api_response_info=api_response_info,
|
||||
api_response_info=api_response_info
|
||||
)
|
||||
|
||||
async def synthesize_audio_description(
|
||||
|
|
@ -193,38 +267,56 @@ class GeminiTTSService:
|
|||
"""
|
||||
Synthesize full audio description from VTT content.
|
||||
Maintains timing alignment with original VTT cues.
|
||||
|
||||
Args:
|
||||
ad_vtt_content: VTT content with audio description cues
|
||||
language: Language code (e.g., "en", "es")
|
||||
voice_name: Name of the voice to use (defaults to service default)
|
||||
model: Model variant - "flash" (fast) or "pro" (quality)
|
||||
speed: Speech rate multiplier (0.5 to 2.0)
|
||||
style_prompt: Style instructions to prepend to each cue
|
||||
"""
|
||||
if voice_name is None:
|
||||
voice_name = self.default_voice
|
||||
|
||||
# Validate voice
|
||||
if voice_name not in settings.gemini_tts_voices:
|
||||
logger.warning(f"Unknown voice '{voice_name}', using default '{self.default_voice}'")
|
||||
voice_name = self.default_voice
|
||||
|
||||
# Parse VTT cues
|
||||
cues = self._parse_ad_cues(ad_vtt_content)
|
||||
|
||||
if not cues:
|
||||
raise ValueError("No audio description cues found in VTT content")
|
||||
|
||||
logger.info(
|
||||
f"Synthesizing {len(cues)} AD cues: voice='{voice_name}', model='{model}', speed={speed}x"
|
||||
f"Synthesizing {len(cues)} audio description cues with voice '{voice_name}', "
|
||||
f"model '{model}', speed {speed}x"
|
||||
)
|
||||
|
||||
# Synthesize each cue with precise timing anchoring
|
||||
audio_segments = []
|
||||
current_audio_position = 0.0
|
||||
|
||||
for i, cue in enumerate(cues):
|
||||
target_start_time = cue["start_time"]
|
||||
|
||||
# Add silence to reach the exact VTT start time
|
||||
if target_start_time > current_audio_position:
|
||||
silence_duration = target_start_time - current_audio_position
|
||||
audio_segments.append(AudioSegment.silent(duration=int(silence_duration * 1000)))
|
||||
silence = AudioSegment.silent(duration=int(silence_duration * 1000))
|
||||
audio_segments.append(silence)
|
||||
current_audio_position = target_start_time
|
||||
|
||||
# Synthesize this cue's text
|
||||
text = cue["text"].strip()
|
||||
if text:
|
||||
# Ensure proper punctuation for natural TTS flow
|
||||
if not text.endswith(('.', '!', '?')):
|
||||
text += "."
|
||||
|
||||
# Use retry helper - will raise TTSSynthesisError on failure after retries
|
||||
audio_data = await self._synthesize_cue_with_retry(
|
||||
cue_index=i,
|
||||
text=text,
|
||||
|
|
@ -234,62 +326,107 @@ class GeminiTTSService:
|
|||
speed=speed,
|
||||
style_prompt=style_prompt,
|
||||
max_attempts=3,
|
||||
base_delay=1.0,
|
||||
base_delay=1.0
|
||||
)
|
||||
|
||||
# Convert to AudioSegment and get actual duration
|
||||
audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3")
|
||||
audio_segments.append(audio_segment)
|
||||
current_audio_position += len(audio_segment) / 1000.0
|
||||
|
||||
final_audio = sum(audio_segments, AudioSegment.empty()) if audio_segments else AudioSegment.silent(duration=1000)
|
||||
# Update position based on actual audio duration
|
||||
actual_audio_duration = len(audio_segment) / 1000.0
|
||||
current_audio_position += actual_audio_duration
|
||||
|
||||
# Combine all segments
|
||||
if audio_segments:
|
||||
final_audio = sum(audio_segments, AudioSegment.empty())
|
||||
else:
|
||||
final_audio = AudioSegment.silent(duration=1000)
|
||||
|
||||
# Export to MP3
|
||||
output_buffer = io.BytesIO()
|
||||
final_audio.export(output_buffer, format="mp3", bitrate="128k")
|
||||
|
||||
logger.info(f"Audio description synthesized: {len(output_buffer.getvalue())} bytes")
|
||||
return output_buffer.getvalue()
|
||||
|
||||
def _pcm_to_mp3(self, pcm_data: bytes) -> bytes:
|
||||
"""
|
||||
Convert raw PCM audio (24kHz, 16-bit, mono) to MP3.
|
||||
Uses lameenc (pure Python) — no system ffmpeg required.
|
||||
"""
|
||||
import lameenc
|
||||
encoder = lameenc.Encoder()
|
||||
encoder.set_bit_rate(128)
|
||||
encoder.set_in_sample_rate(24000)
|
||||
encoder.set_channels(1)
|
||||
encoder.set_quality(2) # 2 = high quality
|
||||
mp3_data = encoder.encode(pcm_data)
|
||||
mp3_data += encoder.flush()
|
||||
return bytes(mp3_data)
|
||||
|
||||
def _parse_ad_cues(self, vtt_content: str) -> list[dict]:
|
||||
"""Parse audio description VTT and extract timing + text."""
|
||||
"""Parse audio description VTT and extract timing + text"""
|
||||
lines = vtt_content.strip().split('\n')
|
||||
cues = []
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
if line in ("WEBVTT", "") or line.startswith("NOTE"):
|
||||
|
||||
# Skip header and empty lines
|
||||
if line == "WEBVTT" or line == "" or line.startswith("NOTE"):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check for timing line
|
||||
if " --> " in line:
|
||||
timing_parts = line.split(" --> ")
|
||||
start_time = self._parse_timestamp(timing_parts[0].strip())
|
||||
end_time = self._parse_timestamp(timing_parts[1].strip())
|
||||
|
||||
# Get text from next line(s)
|
||||
i += 1
|
||||
text_lines = []
|
||||
while i < len(lines) and lines[i].strip():
|
||||
while i < len(lines) and lines[i].strip() != "":
|
||||
text_lines.append(lines[i].strip())
|
||||
i += 1
|
||||
|
||||
if text_lines:
|
||||
cues.append({"start_time": start_time, "end_time": end_time, "text": " ".join(text_lines)})
|
||||
cues.append({
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
"text": " ".join(text_lines)
|
||||
})
|
||||
else:
|
||||
i += 1
|
||||
|
||||
return cues
|
||||
|
||||
def _parse_timestamp(self, timestamp: str) -> float:
|
||||
"""Convert VTT timestamp to seconds."""
|
||||
"""Convert VTT timestamp to seconds"""
|
||||
parts = timestamp.split(":")
|
||||
if len(parts) == 3:
|
||||
|
||||
if len(parts) == 3: # HH:MM:SS.mmm
|
||||
hours, minutes, seconds = parts
|
||||
elif len(parts) == 2:
|
||||
elif len(parts) == 2: # MM:SS.mmm
|
||||
hours, minutes, seconds = "0", parts[0], parts[1]
|
||||
else:
|
||||
raise ValueError(f"Invalid timestamp format: {timestamp}")
|
||||
|
||||
sec_parts = seconds.split(".")
|
||||
return (
|
||||
int(hours) * 3600
|
||||
+ int(minutes) * 60
|
||||
+ int(sec_parts[0])
|
||||
+ (int(sec_parts[1]) / 1000.0 if len(sec_parts) > 1 else 0)
|
||||
seconds_val = int(sec_parts[0])
|
||||
milliseconds = int(sec_parts[1]) if len(sec_parts) > 1 else 0
|
||||
|
||||
total_seconds = (
|
||||
int(hours) * 3600 +
|
||||
int(minutes) * 60 +
|
||||
seconds_val +
|
||||
milliseconds / 1000.0
|
||||
)
|
||||
|
||||
return total_seconds
|
||||
|
||||
|
||||
# Global service instance
|
||||
gemini_tts_service = GeminiTTSService()
|
||||
|
|
|
|||
|
|
@ -334,24 +334,12 @@ async def activate_version(glossary_id: str, version_id: str) -> None:
|
|||
|
||||
|
||||
async def archive_glossary(glossary_id: str) -> None:
|
||||
"""Hard-delete the glossary and all its versions and terms."""
|
||||
db = await get_database()
|
||||
|
||||
versions = await db[_COLL_VERSIONS].find(
|
||||
{"glossary_id": glossary_id}, {"_id": 1}
|
||||
).to_list(length=None)
|
||||
version_ids = [str(v["_id"]) for v in versions]
|
||||
|
||||
if version_ids:
|
||||
terms_result = await db[_COLL_TERMS].delete_many({"version_id": {"$in": version_ids}})
|
||||
logger.info(f"Deleted {terms_result.deleted_count} terms for glossary {glossary_id}")
|
||||
|
||||
await db[_COLL_VERSIONS].delete_many({"glossary_id": glossary_id})
|
||||
logger.info(f"Deleted {len(version_ids)} versions for glossary {glossary_id}")
|
||||
|
||||
await db[_COLL_GLOSSARIES].delete_one({"_id": ObjectId(glossary_id)})
|
||||
await db[_COLL_GLOSSARIES].update_one(
|
||||
{"_id": ObjectId(glossary_id)},
|
||||
{"$set": {"status": GlossaryStatus.ARCHIVED.value}},
|
||||
)
|
||||
await _invalidate_cache(glossary_id)
|
||||
logger.info(f"Deleted glossary {glossary_id}")
|
||||
|
||||
|
||||
# ── Retrieval ─────────────────────────────────────────────────────────────────
|
||||
|
|
@ -559,26 +547,18 @@ async def _vector_match(
|
|||
|
||||
|
||||
def _get_translation(translations: dict[str, str], target_locale: str) -> str | None:
|
||||
"""Look up a translation with locale-fallback.
|
||||
|
||||
Specific → bare: fr-CA → fr-FR siblings → fr
|
||||
Bare → specific: fr → fr-FR, fr-CA (first match)
|
||||
"""
|
||||
"""Look up a translation with locale-fallback: fr-CA → fr-FR → fr → None."""
|
||||
if not translations or not target_locale:
|
||||
return None
|
||||
if target_locale in translations:
|
||||
return translations[target_locale]
|
||||
if "-" in target_locale:
|
||||
# Specific locale: try sibling regions and bare parent (fr-CA → fr-FR → fr)
|
||||
parent = target_locale.split("-")[0]
|
||||
# Try parent language
|
||||
parent = target_locale.split("-")[0] if "-" in target_locale else None
|
||||
if parent:
|
||||
# Try sibling locales, e.g. fr-CA not found → try fr-FR
|
||||
for code, text in translations.items():
|
||||
if code.startswith(parent + "-") or code == parent:
|
||||
return text
|
||||
else:
|
||||
# Bare code (fr): try any fr-* region variant stored in the glossary
|
||||
for code, text in translations.items():
|
||||
if code == target_locale or code.startswith(target_locale + "-"):
|
||||
return text
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -728,17 +708,6 @@ async def get_glossary(glossary_id: str) -> Glossary | None:
|
|||
return glossary_from_doc(doc) if doc else None
|
||||
|
||||
|
||||
async def get_versions_by_ids(version_ids: list[str]) -> dict[str, GlossaryVersion]:
|
||||
"""Batch-fetch versions by ID, returns {version_id: GlossaryVersion}."""
|
||||
if not version_ids:
|
||||
return {}
|
||||
db = await get_database()
|
||||
docs = await db[_COLL_VERSIONS].find(
|
||||
{"_id": {"$in": [ObjectId(vid) for vid in version_ids]}}
|
||||
).to_list(length=len(version_ids))
|
||||
return {str(d["_id"]): glossary_version_from_doc(d) for d in docs}
|
||||
|
||||
|
||||
async def get_versions(glossary_id: str) -> list[GlossaryVersion]:
|
||||
db = await get_database()
|
||||
cursor = db[_COLL_VERSIONS].find(
|
||||
|
|
|
|||
|
|
@ -726,33 +726,6 @@ async def approve_language(
|
|||
return LanguageQCState(**updated_state)
|
||||
except Exception as exc:
|
||||
logger.error(f"Job {job_id}: failed to dispatch translation after EN approval: {exc}")
|
||||
elif (refreshed.get("requested_outputs") or {}).get("accessible_video_mp4"):
|
||||
# Source-only job requesting accessible video: no translation needed,
|
||||
# but TTS+render pipeline must run to produce the accessible MP4.
|
||||
try:
|
||||
from ..services.cloud_run_dispatch import dispatch as _cr_dispatch
|
||||
await db[_JOBS].update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": JobStatus.TRANSLATING.value,
|
||||
"updated_at": datetime.utcnow(),
|
||||
},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": datetime.utcnow(),
|
||||
"status": JobStatus.TRANSLATING.value,
|
||||
"by": "system",
|
||||
"notes": "EN approved — dispatching TTS and accessible video render (source-only)",
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
await _cr_dispatch("translate", job_id)
|
||||
logger.info(f"Job {job_id}: EN approved (source-only), dispatched TTS+render pipeline")
|
||||
return LanguageQCState(**updated_state)
|
||||
except Exception as exc:
|
||||
logger.error(f"Job {job_id}: failed to dispatch TTS+render after EN approval: {exc}")
|
||||
|
||||
await _maybe_advance_job(db, refreshed)
|
||||
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ class MicrosoftAuthService:
|
|||
return response.json()
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"Failed to fetch OpenID configuration: {e}")
|
||||
raise MicrosoftAuthError("Failed to fetch Microsoft authentication configuration") from e
|
||||
raise MicrosoftAuthError("Failed to fetch Microsoft authentication configuration")
|
||||
|
||||
async def _get_jwks(self, force_refresh: bool = False) -> dict:
|
||||
"""Fetch JSON Web Key Set (JWKS) from Microsoft.
|
||||
|
|
@ -97,7 +97,7 @@ class MicrosoftAuthService:
|
|||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"Failed to fetch JWKS: {e}")
|
||||
raise MicrosoftAuthError("Failed to fetch Microsoft public keys") from e
|
||||
raise MicrosoftAuthError("Failed to fetch Microsoft public keys")
|
||||
|
||||
async def validate_token(self, id_token: str) -> MicrosoftUserInfo:
|
||||
"""Validate Microsoft ID token and extract user information.
|
||||
|
|
@ -145,7 +145,7 @@ class MicrosoftAuthService:
|
|||
issuer=f"https://login.microsoftonline.com/{self.tenant_id}/v2.0"
|
||||
)
|
||||
except JWTError as e:
|
||||
raise MicrosoftTokenValidationError(f"Token validation failed: {str(e)}") from e
|
||||
raise MicrosoftTokenValidationError(f"Token validation failed: {str(e)}")
|
||||
|
||||
email = payload.get('email') or payload.get('preferred_username')
|
||||
if not email:
|
||||
|
|
@ -176,12 +176,12 @@ class MicrosoftAuthService:
|
|||
|
||||
except JWKError as e:
|
||||
logger.error(f"JWK error during token validation: {e}")
|
||||
raise MicrosoftTokenValidationError(f"Key processing error: {str(e)}") from e
|
||||
raise MicrosoftTokenValidationError(f"Key processing error: {str(e)}")
|
||||
except Exception as e:
|
||||
if isinstance(e, (MicrosoftAuthError, MicrosoftTokenValidationError)):
|
||||
raise
|
||||
logger.error(f"Unexpected error during token validation: {e}")
|
||||
raise MicrosoftTokenValidationError(f"Token validation failed: {str(e)}") from e
|
||||
raise MicrosoftTokenValidationError(f"Token validation failed: {str(e)}")
|
||||
|
||||
|
||||
# Singleton instance
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class SecretsManager:
|
|||
logger.info("Secret Manager client initialized")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize Secret Manager client: {e}")
|
||||
raise SecretManagerError(f"Failed to initialize Secret Manager: {e}") from e
|
||||
raise SecretManagerError(f"Failed to initialize Secret Manager: {e}")
|
||||
|
||||
return self.client
|
||||
|
||||
|
|
@ -44,14 +44,14 @@ class SecretsManager:
|
|||
async def get_secret(self, secret_name: str, version: str = "latest") -> str:
|
||||
"""
|
||||
Retrieve a secret from Google Cloud Secret Manager.
|
||||
|
||||
|
||||
Args:
|
||||
secret_name: Name of the secret
|
||||
version: Version of the secret (default: "latest")
|
||||
|
||||
|
||||
Returns:
|
||||
The secret value as a string
|
||||
|
||||
|
||||
Raises:
|
||||
SecretManagerError: If secret cannot be retrieved
|
||||
"""
|
||||
|
|
@ -89,26 +89,26 @@ class SecretsManager:
|
|||
except gcp_exceptions.NotFound:
|
||||
error_msg = f"Secret not found: {secret_name}"
|
||||
logger.error(error_msg)
|
||||
raise SecretManagerError(error_msg) from None
|
||||
raise SecretManagerError(error_msg)
|
||||
|
||||
except gcp_exceptions.PermissionDenied:
|
||||
error_msg = f"Permission denied accessing secret: {secret_name}"
|
||||
logger.error(error_msg)
|
||||
raise SecretManagerError(error_msg) from None
|
||||
raise SecretManagerError(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to retrieve secret {secret_name}: {e}"
|
||||
logger.error(error_msg)
|
||||
raise SecretManagerError(error_msg) from e
|
||||
raise SecretManagerError(error_msg)
|
||||
|
||||
@trace_async_operation("secrets_manager.get_secrets_batch")
|
||||
async def get_secrets_batch(self, secret_names: list[str]) -> dict[str, str]:
|
||||
"""
|
||||
Retrieve multiple secrets efficiently.
|
||||
|
||||
|
||||
Args:
|
||||
secret_names: List of secret names to retrieve
|
||||
|
||||
|
||||
Returns:
|
||||
Dictionary mapping secret names to their values
|
||||
"""
|
||||
|
|
@ -137,12 +137,12 @@ class SecretsManager:
|
|||
async def create_secret(self, secret_name: str, secret_value: str, labels: dict[str, str] | None = None) -> str:
|
||||
"""
|
||||
Create a new secret in Secret Manager.
|
||||
|
||||
|
||||
Args:
|
||||
secret_name: Name of the secret
|
||||
secret_value: Value to store
|
||||
labels: Optional labels for the secret
|
||||
|
||||
|
||||
Returns:
|
||||
The full secret resource name
|
||||
"""
|
||||
|
|
@ -186,12 +186,12 @@ class SecretsManager:
|
|||
except gcp_exceptions.AlreadyExists:
|
||||
error_msg = f"Secret already exists: {secret_name}"
|
||||
logger.error(error_msg)
|
||||
raise SecretManagerError(error_msg) from None
|
||||
raise SecretManagerError(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to create secret {secret_name}: {e}"
|
||||
logger.error(error_msg)
|
||||
raise SecretManagerError(error_msg) from e
|
||||
raise SecretManagerError(error_msg)
|
||||
|
||||
def clear_cache(self) -> None:
|
||||
"""Clear the secrets cache."""
|
||||
|
|
@ -217,7 +217,7 @@ async def get_database_url() -> str:
|
|||
# Fallback to environment variable
|
||||
url = os.getenv("MONGODB_URL")
|
||||
if not url:
|
||||
raise SecretManagerError("MongoDB URL not available in secrets or environment") from None
|
||||
raise SecretManagerError("MongoDB URL not available in secrets or environment")
|
||||
return url
|
||||
|
||||
|
||||
|
|
@ -229,7 +229,7 @@ async def get_redis_url() -> str:
|
|||
# Fallback to environment variable
|
||||
url = os.getenv("REDIS_URL")
|
||||
if not url:
|
||||
raise SecretManagerError("Redis URL not available in secrets or environment") from None
|
||||
raise SecretManagerError("Redis URL not available in secrets or environment")
|
||||
return url
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -232,7 +232,7 @@ class TTSService:
|
|||
audio_segments = []
|
||||
current_audio_position = 0.0 # Track actual audio timeline position
|
||||
|
||||
for _i, cue in enumerate(cues):
|
||||
for i, cue in enumerate(cues):
|
||||
# Calculate where this cue should start (anchored to VTT timing)
|
||||
target_start_time = cue["start_time"]
|
||||
|
||||
|
|
@ -298,7 +298,7 @@ class TTSService:
|
|||
audio_segments = []
|
||||
current_audio_position = 0.0 # Track actual audio timeline position
|
||||
|
||||
for _i, cue in enumerate(cues):
|
||||
for i, cue in enumerate(cues):
|
||||
# Calculate where this cue should start (anchored to VTT timing)
|
||||
target_start_time = cue["start_time"]
|
||||
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ class VideoRendererService:
|
|||
error_detail = e.response.json().get("detail", str(e))
|
||||
except Exception:
|
||||
error_detail = str(e)
|
||||
raise FFmpegExecutionError(f"Cloud Run {endpoint} failed: {error_detail}") from e
|
||||
raise FFmpegExecutionError(f"Cloud Run {endpoint} failed: {error_detail}")
|
||||
|
||||
async def _dispatch_ffmpeg(self, cmd: list[str], timeout: int = 3600) -> dict[str, Any]:
|
||||
"""
|
||||
|
|
@ -391,7 +391,8 @@ class VideoRendererService:
|
|||
logger.info(f"Starting overlay render for {source_video_path}")
|
||||
placements = analysis.get("placements", [])
|
||||
|
||||
with tempfile.TemporaryDirectory() as _temp_dir:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_dir_path = Path(temp_dir)
|
||||
|
||||
# Get source video duration
|
||||
duration = await self._get_video_duration(source_video_path)
|
||||
|
|
@ -414,7 +415,7 @@ class VideoRendererService:
|
|||
filter_parts = []
|
||||
|
||||
# Add each AD segment as input
|
||||
for _cue_index, mp3_path in ad_segments:
|
||||
for cue_index, mp3_path in ad_segments:
|
||||
inputs.extend(["-i", mp3_path])
|
||||
|
||||
# Build complex filter
|
||||
|
|
@ -428,7 +429,7 @@ class VideoRendererService:
|
|||
|
||||
# Add delay to each AD segment and mix
|
||||
ad_labels = []
|
||||
for i, (cue_index, _mp3_path) in enumerate(ad_segments):
|
||||
for i, (cue_index, mp3_path) in enumerate(ad_segments):
|
||||
# Find the placement for this cue
|
||||
placement = next(
|
||||
(p for p in placements if p.get("ad_cue_index") == cue_index),
|
||||
|
|
@ -563,7 +564,7 @@ class VideoRendererService:
|
|||
logger.info(f"Source Properties: {video_props}, Duration: {source_duration:.2f}s")
|
||||
|
||||
# Create a mapping of cue_index to mp3_path
|
||||
cue_to_mp3 = dict(ad_segments)
|
||||
cue_to_mp3 = {cue_index: mp3_path for cue_index, mp3_path in ad_segments}
|
||||
|
||||
# Pre-process placements and validate
|
||||
valid_placements = []
|
||||
|
|
@ -883,6 +884,9 @@ class VideoRendererService:
|
|||
# Pause point is at the START of the freeze frame in the rendered timeline
|
||||
pause_ms = freeze_frame_starts.get(cue_index, p["pause_point"] * 1000)
|
||||
|
||||
# Find the freeze segment for this cue to get its end position
|
||||
freeze_seg = next((s for s in segment_metadata_list if s.is_freeze_frame and s.cue_index == cue_index), None)
|
||||
|
||||
# Compute min bound: end of previous AD segment (or 0 for first)
|
||||
if idx == 0:
|
||||
min_bound_ms = 0.0
|
||||
|
|
|
|||
|
|
@ -190,7 +190,7 @@ async def transcribe(request: TranscribeRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Transcription failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
# Clean up temp file
|
||||
if os.path.exists(tmp_path):
|
||||
|
|
@ -252,7 +252,7 @@ async def transcribe_with_gaps(request: TranscribeWithGapsRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Transcription with gaps failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
# Clean up temp file
|
||||
if os.path.exists(tmp_path):
|
||||
|
|
@ -297,7 +297,7 @@ async def refine_pause_points(request: RefinePausePointsRequest):
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Pause point refinement failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
# Startup event to pre-load Whisper model
|
||||
|
|
|
|||
|
|
@ -1,15 +1,5 @@
|
|||
import threading
|
||||
import time
|
||||
|
||||
from celery import Celery
|
||||
from celery.signals import (
|
||||
task_failure,
|
||||
task_prerun,
|
||||
task_received,
|
||||
task_retry,
|
||||
task_success,
|
||||
worker_ready,
|
||||
)
|
||||
from celery.signals import task_failure, task_retry, task_success
|
||||
|
||||
from ..core.config import settings
|
||||
from ..core.logging import get_logger
|
||||
|
|
@ -59,6 +49,13 @@ def test_task(message="test"):
|
|||
return f"Test task completed: {message}"
|
||||
|
||||
|
||||
# Add task received handler for debugging
|
||||
import threading
|
||||
import time
|
||||
|
||||
from celery.signals import task_prerun, task_received, worker_ready
|
||||
|
||||
|
||||
@worker_ready.connect
|
||||
def worker_ready_handler(sender=None, **kwargs):
|
||||
"""Log when worker is ready and start heartbeat"""
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ from . import celery_app
|
|||
logger = get_logger(__name__)
|
||||
|
||||
_BATCH_SIZE = 250
|
||||
_CONCURRENCY = 2
|
||||
_CONCURRENCY = 5
|
||||
|
||||
|
||||
@celery_app.task(name="embed_glossary_version", bind=True, max_retries=3)
|
||||
|
|
|
|||
|
|
@ -1,25 +1,20 @@
|
|||
import asyncio
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
|
||||
import ffmpeg
|
||||
from celery import Task
|
||||
from celery.result import allow_join_result
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
|
||||
from ..core.config import settings
|
||||
from ..core.logging import get_logger
|
||||
from ..lib.vtt import VTTEditor
|
||||
from ..models.job import JobStatus
|
||||
from ..services import caption_aligner, cost_tracker
|
||||
from ..services import cost_tracker
|
||||
from ..services.gcs import gcs_path, gcs_service, upload_vtt_to_gcs
|
||||
from ..services.gemini import gemini_service
|
||||
from ..services.whisper_service import WordTimestamp
|
||||
from . import celery_app
|
||||
from ._websocket_bridge import broadcast_status_update
|
||||
from .whisper_transcribe import transcribe_video_audio_task
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
|
@ -158,7 +153,6 @@ async def ingest_and_ai_task_impl(job_id: str):
|
|||
# Process with Gemini
|
||||
brand_context = job_doc.get("brand_context")
|
||||
sdh_requested = job_doc.get("requested_outputs", {}).get("sdh_vtt", False)
|
||||
source_has_ad = job_doc.get("source", {}).get("source_has_ad", False)
|
||||
_cost_ctx = {
|
||||
"user_id": job_doc.get("client_id", "system"),
|
||||
"job_id": job_id,
|
||||
|
|
@ -169,25 +163,12 @@ async def ingest_and_ai_task_impl(job_id: str):
|
|||
user_external_id=_cost_ctx["user_id"],
|
||||
project_id=_cost_ctx["project_id"],
|
||||
)
|
||||
# Load glossary for source language — use title + brand context for term matching
|
||||
from ..services.glossary_service import get_glossary_block_for_job
|
||||
_source_lang = job_doc.get("source", {}).get("language", "en")
|
||||
_job_title = job_doc.get("title") or ""
|
||||
_source_for_glossary = " ".join(filter(None, [_job_title, brand_context]))
|
||||
_job_for_glossary = {**job_doc, "_glossary_source_text": _source_for_glossary}
|
||||
glossary_block = await get_glossary_block_for_job(_job_for_glossary, _source_lang, db)
|
||||
ai_result = await gemini_service.extract_accessibility(
|
||||
temp_path,
|
||||
brand_context=brand_context,
|
||||
sdh_requested=sdh_requested,
|
||||
source_has_ad=source_has_ad,
|
||||
glossary_block=glossary_block,
|
||||
_cost_ctx=_cost_ctx,
|
||||
)
|
||||
# Enforce: if source already has AD, discard any AI-generated AD
|
||||
if source_has_ad:
|
||||
ai_result["audio_description_vtt"] = "WEBVTT\n"
|
||||
logger.info(f"source_has_ad=True for job {job_id}: skipping AD generation")
|
||||
|
||||
# Final safety check for required fields
|
||||
required_fields = ["captions_vtt", "audio_description_vtt"]
|
||||
|
|
@ -221,20 +202,6 @@ async def ingest_and_ai_task_impl(job_id: str):
|
|||
source_language = detected_language
|
||||
logger.info(f"Using detected language '{source_language}' for job {job_id}")
|
||||
|
||||
# Post-process: remove filler words per DCMP §6.01
|
||||
captions_vtt = VTTEditor.clean_disfluencies(ai_result["captions_vtt"], source_language)
|
||||
|
||||
# Align caption timings with Whisper word-level timestamps (Bug 5)
|
||||
captions_vtt = await _align_captions_with_whisper(captions_vtt, temp_path, job_id)
|
||||
# Fix overlapping cues that Gemini occasionally produces
|
||||
captions_vtt = VTTEditor.fix_overlapping_cues(captions_vtt)
|
||||
ai_result["captions_vtt"] = captions_vtt
|
||||
|
||||
# Fix overlapping cues in AD VTT as well
|
||||
ai_result["audio_description_vtt"] = VTTEditor.fix_overlapping_cues(
|
||||
ai_result["audio_description_vtt"]
|
||||
)
|
||||
|
||||
# Upload VTT files to GCS using detected language
|
||||
captions_gcs_uri = await upload_vtt_to_gcs(
|
||||
ai_result["captions_vtt"],
|
||||
|
|
@ -366,47 +333,3 @@ async def _get_video_duration(video_path: str) -> float:
|
|||
except Exception as e:
|
||||
logger.warning(f"Could not determine video duration: {e}")
|
||||
return 0.0
|
||||
|
||||
|
||||
async def _align_captions_with_whisper(captions_vtt: str, video_path: str, job_id: str) -> str:
|
||||
"""Align caption VTT timings with Whisper word timestamps. Returns original VTT on failure."""
|
||||
audio_path = video_path.replace(".mp4", "_captions_align.mp3")
|
||||
try:
|
||||
# Extract audio at 16kHz mono (optimal for Whisper)
|
||||
def _extract():
|
||||
result = subprocess.run(
|
||||
["ffmpeg", "-y", "-i", video_path, "-vn", "-acodec", "libmp3lame",
|
||||
"-ar", "16000", "-ac", "1", "-q:a", "5", audio_path],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"FFmpeg failed: {result.stderr}")
|
||||
|
||||
await asyncio.to_thread(_extract)
|
||||
|
||||
task_result = transcribe_video_audio_task.apply_async(
|
||||
args=[job_id, audio_path], queue="whisper"
|
||||
)
|
||||
poll_count = 0
|
||||
while not task_result.ready():
|
||||
await asyncio.sleep(1.0)
|
||||
poll_count += 1
|
||||
if poll_count > 600:
|
||||
logger.warning(f"Whisper timeout for job {job_id}, skipping alignment")
|
||||
return captions_vtt
|
||||
|
||||
with allow_join_result():
|
||||
result_data = task_result.get(timeout=10)
|
||||
|
||||
words = [
|
||||
WordTimestamp(word=w["word"], start=w["start"], end=w["end"])
|
||||
for w in result_data.get("words", [])
|
||||
]
|
||||
return caption_aligner.align(captions_vtt, words)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Whisper caption alignment failed for job {job_id}: {e} — using Gemini timestamps")
|
||||
return captions_vtt
|
||||
finally:
|
||||
if os.path.exists(audio_path):
|
||||
os.unlink(audio_path)
|
||||
|
|
|
|||
|
|
@ -135,15 +135,6 @@ async def _async_render_accessible_video(job_id: str, language: str):
|
|||
if not lang_output:
|
||||
raise ValueError(f"No outputs found for language {language}")
|
||||
|
||||
# When source already has professional AD, render captions-only accessible video
|
||||
source_has_ad = job_doc.get("source", {}).get("source_has_ad", False)
|
||||
if source_has_ad:
|
||||
await _render_source_has_ad_video(
|
||||
job_id, job_doc, language, lang_output,
|
||||
source_video_path, temp_dir, db, job_title
|
||||
)
|
||||
return
|
||||
|
||||
# 3. Download AD VTT content
|
||||
ad_vtt_gcs = lang_output.get("ad_vtt_gcs")
|
||||
if not ad_vtt_gcs:
|
||||
|
|
@ -376,83 +367,6 @@ async def _async_render_accessible_video(job_id: str, language: str):
|
|||
client.close()
|
||||
|
||||
|
||||
async def _render_source_has_ad_video(
|
||||
job_id: str,
|
||||
job_doc: dict,
|
||||
language: str,
|
||||
lang_output: dict,
|
||||
source_video_path: str,
|
||||
temp_dir: str,
|
||||
db,
|
||||
job_title: str,
|
||||
) -> None:
|
||||
"""Render accessible video for jobs where the source already has professional AD.
|
||||
|
||||
Embeds the captions VTT as a soft subtitle track — no AD audio injection needed
|
||||
since the original audio track already contains the AD narration.
|
||||
"""
|
||||
captions_vtt_gcs = lang_output.get("captions_vtt_gcs")
|
||||
if not captions_vtt_gcs:
|
||||
raise ValueError(f"No captions VTT found for language {language}")
|
||||
|
||||
# Download captions VTT
|
||||
captions_blob_path = captions_vtt_gcs.replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
captions_vtt_content = gcs_service.bucket.blob(captions_blob_path).download_as_text()
|
||||
|
||||
# Write VTT to temp file
|
||||
vtt_path = os.path.join(temp_dir, "captions.vtt")
|
||||
with open(vtt_path, "w", encoding="utf-8") as f:
|
||||
f.write(captions_vtt_content)
|
||||
|
||||
# Embed captions as soft subtitle track — no re-encode needed
|
||||
output_video_path = os.path.join(temp_dir, "accessible_video.mp4")
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", source_video_path,
|
||||
"-i", vtt_path,
|
||||
"-c", "copy",
|
||||
"-c:s", "webvtt",
|
||||
"-metadata:s:s:0", f"language={language}",
|
||||
output_video_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"ffmpeg caption embed failed: {result.stderr[-500:]}")
|
||||
|
||||
# Upload rendered video
|
||||
video_blob_path = gcs_path(job_doc, language, "accessible_video.mp4")
|
||||
video_blob = gcs_service.bucket.blob(video_blob_path)
|
||||
video_blob.content_type = "video/mp4"
|
||||
video_blob.upload_from_filename(output_video_path)
|
||||
video_gcs_uri = f"gs://{settings.gcs_bucket}/{video_blob_path}"
|
||||
logger.info(f"Uploaded source-has-ad accessible video to {video_gcs_uri}")
|
||||
|
||||
# Update job document
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
f"outputs.{language}.accessible_video_gcs": video_gcs_uri,
|
||||
f"outputs.{language}.accessible_video_method": "caption_embed",
|
||||
f"accessible_video_progress.{language}": {
|
||||
"status": "completed",
|
||||
"method": "caption_embed",
|
||||
"started_at": job_doc.get("accessible_video_progress", {}).get(language, {}).get("started_at"),
|
||||
"completed_at": datetime.utcnow(),
|
||||
},
|
||||
"updated_at": datetime.utcnow(),
|
||||
}
|
||||
},
|
||||
)
|
||||
broadcast_status_update(
|
||||
job_id,
|
||||
"asset_ready",
|
||||
job_title=job_title,
|
||||
message=f"Accessible video ready for {language.upper()} (caption embed)",
|
||||
)
|
||||
await _check_accessible_video_completion(job_id, db)
|
||||
|
||||
|
||||
def _build_placements_from_ad_vtt(ad_vtt_content: str, cue_durations: list[float]) -> list[dict]:
|
||||
"""
|
||||
Build placement instructions from AD VTT cues and TTS durations.
|
||||
|
|
|
|||
|
|
@ -272,7 +272,7 @@ async def _async_rerender_accessible_video(
|
|||
|
||||
# Validate VTT cue count matches MP3 count
|
||||
vtt_cues = VTTParser.parse(ad_vtt_content)
|
||||
downloaded_indices = {idx for idx, _ in ad_segments}
|
||||
downloaded_indices = set(idx for idx, _ in ad_segments)
|
||||
if len(vtt_cues) != len(ad_segments):
|
||||
missing_indices = set(range(len(vtt_cues))) - downloaded_indices
|
||||
logger.warning(
|
||||
|
|
|
|||
|
|
@ -189,7 +189,6 @@ async def _async_translate_and_synthesize(job_id: str, languages: list[str] | No
|
|||
|
||||
updated_outputs = job_doc.get("outputs", {})
|
||||
_source_text_for_glossary = " ".join(filter(None, [source_captions_vtt, source_ad_vtt]))
|
||||
_failed_languages: list[str] = []
|
||||
|
||||
try:
|
||||
target_languages = [lang for lang in requested_languages if lang != source_language]
|
||||
|
|
@ -255,9 +254,7 @@ async def _async_translate_and_synthesize(job_id: str, languages: list[str] | No
|
|||
lang_out["sdh_captions_vtt_gcs"] = sdh_gcs_uri
|
||||
|
||||
try:
|
||||
from ..services.descriptive_transcript import (
|
||||
generate_descriptive_transcript,
|
||||
)
|
||||
from ..services.descriptive_transcript import generate_descriptive_transcript
|
||||
transcript_text = generate_descriptive_transcript(translated_captions, translated_ad)
|
||||
if transcript_text:
|
||||
transcript_gcs_uri = await upload_vtt_to_gcs(
|
||||
|
|
@ -271,39 +268,24 @@ async def _async_translate_and_synthesize(job_id: str, languages: list[str] | No
|
|||
logger.info(f"Processed language: {language} (origin: {origin})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process language {language}: {e}", exc_info=True)
|
||||
_failed_languages.append(language)
|
||||
# Preserve existing GCS URIs and origin so retranslation failure
|
||||
# doesn't destroy captions the user can still view
|
||||
existing = updated_outputs.get(language, {})
|
||||
logger.error(f"Failed to process language {language}: {e}")
|
||||
updated_outputs[language] = {
|
||||
**existing,
|
||||
"qa_notes": f"Translation failed: {str(e)[:200]}",
|
||||
"origin": "transcreate" if _style == "transcreate" else "gemini_translate",
|
||||
"qa_notes": f"Translation failed: {str(e)}",
|
||||
}
|
||||
|
||||
finally:
|
||||
pass
|
||||
|
||||
# Update status to TTS generating.
|
||||
# Use per-language dot-notation so concurrent single-language retranslation tasks
|
||||
# don't overwrite each other's results (concurrency:2 race condition).
|
||||
per_lang_updates = {
|
||||
f"outputs.{lang}": updated_outputs[lang]
|
||||
for lang in target_languages
|
||||
if lang in updated_outputs
|
||||
}
|
||||
_status_update: dict = {
|
||||
"status": JobStatus.TTS_GENERATING.value,
|
||||
"updated_at": datetime.utcnow(),
|
||||
**per_lang_updates,
|
||||
}
|
||||
if _failed_languages:
|
||||
_status_update["translation_errors"] = _failed_languages
|
||||
logger.warning(f"Job {job_id}: translation failed for languages: {_failed_languages}")
|
||||
# Update status to TTS generating
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": _status_update,
|
||||
"$set": {
|
||||
"status": JobStatus.TTS_GENERATING.value,
|
||||
"outputs": updated_outputs,
|
||||
"updated_at": datetime.utcnow()
|
||||
},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": datetime.utcnow(),
|
||||
|
|
@ -314,23 +296,14 @@ async def _async_translate_and_synthesize(job_id: str, languages: list[str] | No
|
|||
}
|
||||
)
|
||||
|
||||
# Generate TTS when MP3 output or accessible video is requested.
|
||||
# accessible_video rendering requires per-cue MP3s (ad_cue_manifest) produced by TTS,
|
||||
# so TTS must run even when the final assembled ad.mp3 is not requested as a download.
|
||||
# Generate TTS for languages that need MP3
|
||||
accessible_video_requested = job_doc["requested_outputs"].get("accessible_video_mp4", False)
|
||||
|
||||
if job_doc["requested_outputs"]["audio_description_mp3"] or accessible_video_requested:
|
||||
if job_doc["requested_outputs"]["audio_description_mp3"]:
|
||||
# Get TTS preferences from job
|
||||
tts_preferences = job_doc["requested_outputs"].get("tts_preferences", {})
|
||||
# For retranslation, only regenerate TTS for the specific target languages —
|
||||
# not all languages. Regenerating all is wasteful and triggers unwanted renders.
|
||||
tts_outputs = (
|
||||
{lang: updated_outputs[lang] for lang in target_languages if lang in updated_outputs}
|
||||
if retranslate
|
||||
else updated_outputs
|
||||
)
|
||||
await _generate_tts_for_languages(
|
||||
job_id, tts_outputs, db, source_language, tts_preferences, accessible_video_requested,
|
||||
job_id, updated_outputs, db, source_language, tts_preferences, accessible_video_requested,
|
||||
user_id=_cost_ctx["user_id"], cost_project_id=_cost_ctx["project_id"],
|
||||
)
|
||||
|
||||
|
|
@ -390,11 +363,10 @@ async def _async_translate_and_synthesize(job_id: str, languages: list[str] | No
|
|||
message=f"{job_title} has finished translation and audio generation - ready for QC Review"
|
||||
)
|
||||
else:
|
||||
# accessible_video_mp4=True: render tasks were dispatched from within
|
||||
# _generate_language_tts for each language. Stay in TTS_GENERATING;
|
||||
# render_accessible_video_task will transition to PENDING_QC when all videos complete.
|
||||
# When accessible video is requested, stay in TTS_GENERATING
|
||||
# The render_accessible_video task will transition to PENDING_QC when all videos complete
|
||||
logger.info(
|
||||
f"Accessible video rendering dispatched for job {job_id}. "
|
||||
f"Accessible video rendering triggered for job {job_id}. "
|
||||
f"Staying in TTS_GENERATING until all videos are complete."
|
||||
)
|
||||
|
||||
|
|
@ -585,6 +557,7 @@ async def _generate_language_tts(job_id: str, language: str, lang_output: dict,
|
|||
)
|
||||
|
||||
# Preflight budget check before dispatching TTS
|
||||
tts_provider = tts_preferences.get("provider", "gemini")
|
||||
from .tts_synthesis import _TTS_MODEL_STRINGS
|
||||
tts_model_key = tts_preferences.get("model", "flash")
|
||||
await cost_tracker.aio_preflight(
|
||||
|
|
@ -647,7 +620,7 @@ async def _generate_language_tts(job_id: str, language: str, lang_output: dict,
|
|||
cue_index=0,
|
||||
cue_text="",
|
||||
api_response_info=str(e)
|
||||
) from e
|
||||
)
|
||||
|
||||
# Handle case where results contain exceptions (shouldn't happen with new task design, but safety net)
|
||||
processed_results = []
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ in parallel using a dedicated TTS worker with concurrency=8.
|
|||
import asyncio
|
||||
import hashlib
|
||||
import io
|
||||
import re
|
||||
import time
|
||||
|
||||
from celery import group
|
||||
|
|
@ -24,21 +23,6 @@ from . import celery_app
|
|||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _extract_retry_after(error: Exception) -> float | None:
|
||||
"""Return seconds to wait from a Google API 429 retryDelay, or None."""
|
||||
msg = str(error)
|
||||
# "Please retry in 37.65s" pattern from the message text
|
||||
m = re.search(r"retry in ([0-9.]+)s", msg, re.IGNORECASE)
|
||||
if m:
|
||||
return float(m.group(1)) + 5
|
||||
# 'retryDelay': '37s' pattern in the JSON body
|
||||
m = re.search(r"'retryDelay':\s*'([0-9.]+)s'", msg)
|
||||
if m:
|
||||
return float(m.group(1)) + 5
|
||||
return None
|
||||
|
||||
|
||||
_TTS_PROVIDER_MODEL_MAP = {
|
||||
# (provider, model) → cost-tracker provider + model strings
|
||||
"gemini": "google",
|
||||
|
|
@ -185,15 +169,14 @@ def synthesize_cue_task(
|
|||
|
||||
# Check if we have retries left
|
||||
if self.request.retries < self.max_retries:
|
||||
# Calculate backoff delay with jitter
|
||||
import random
|
||||
# Honour the API-provided retry delay on 429; fall back to exponential backoff
|
||||
api_delay = _extract_retry_after(e)
|
||||
delay = api_delay if api_delay else (2 ** self.request.retries) + random.uniform(0, 1)
|
||||
delay = (2 ** self.request.retries) + random.uniform(0, 1)
|
||||
logger.info(
|
||||
f"Retrying TTS cue {cue_index} in {delay:.1f}s "
|
||||
f"(attempt {self.request.retries + 2}/{self.max_retries + 1})"
|
||||
)
|
||||
raise self.retry(exc=e, countdown=delay) from e
|
||||
raise self.retry(exc=e, countdown=delay)
|
||||
else:
|
||||
# Max retries exhausted - return failure result instead of raising
|
||||
logger.error(
|
||||
|
|
@ -454,8 +437,7 @@ def parse_ad_cues(vtt_content: str) -> list[dict]:
|
|||
if " --> " in line:
|
||||
timing_parts = line.split(" --> ")
|
||||
start_time = _parse_timestamp(timing_parts[0].strip())
|
||||
# Strip cue settings (e.g. "line:0% align:start") from end timestamp
|
||||
end_time = _parse_timestamp(timing_parts[1].strip().split()[0])
|
||||
end_time = _parse_timestamp(timing_parts[1].strip())
|
||||
|
||||
# Get text from next line(s)
|
||||
i += 1
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
"""Celery task for Whisper transcription with Cloud Run fallback."""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
|
||||
import google.auth.transport.requests
|
||||
|
|
@ -10,11 +8,9 @@ import httpx
|
|||
from google.auth import default
|
||||
from google.cloud import storage
|
||||
from google.oauth2 import id_token
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
|
||||
from ..core.config import settings
|
||||
from ..core.logging import get_logger
|
||||
from ..services import cost_tracker
|
||||
from ..services.whisper_service import whisper_service
|
||||
from . import celery_app
|
||||
|
||||
|
|
@ -184,15 +180,14 @@ def transcribe_video_audio_task(self, job_id: str, audio_path: str) -> dict:
|
|||
"""
|
||||
logger.info(f"Starting Whisper transcription task for job {job_id}")
|
||||
|
||||
t_start = time.monotonic()
|
||||
try:
|
||||
# Use Cloud Run if configured, otherwise local
|
||||
if settings.whisper_service_url:
|
||||
logger.info(f"Using Cloud Run Whisper service: {settings.whisper_service_url}")
|
||||
result = _transcribe_via_cloud_run(job_id, audio_path)
|
||||
return _transcribe_via_cloud_run(job_id, audio_path)
|
||||
else:
|
||||
logger.info("Using local Whisper service")
|
||||
result = _transcribe_locally(job_id, audio_path)
|
||||
return _transcribe_locally(job_id, audio_path)
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Cloud Run transcription failed for job {job_id}: {e.response.status_code} - {e.response.text}")
|
||||
|
|
@ -200,36 +195,3 @@ def transcribe_video_audio_task(self, job_id: str, audio_path: str) -> dict:
|
|||
except Exception as e:
|
||||
logger.error(f"Whisper transcription failed for job {job_id}: {e}")
|
||||
raise
|
||||
|
||||
latency_ms = int((time.monotonic() - t_start) * 1000)
|
||||
audio_duration = result.get("audio_duration", 0.0)
|
||||
if audio_duration:
|
||||
try:
|
||||
async def _fetch_job():
|
||||
client = AsyncIOMotorClient(settings.mongodb_uri)
|
||||
try:
|
||||
return await client[settings.mongodb_db].jobs.find_one({"_id": job_id})
|
||||
finally:
|
||||
client.close()
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
job_doc = loop.run_until_complete(_fetch_job())
|
||||
finally:
|
||||
loop.close()
|
||||
user_id = str(job_doc.get("created_by", "")) if job_doc else ""
|
||||
project_id = str(job_doc.get("cost_tracker_project_id", "")) if job_doc else ""
|
||||
cost_tracker.record(
|
||||
model="whisper-1",
|
||||
provider="openai",
|
||||
user_external_id=user_id,
|
||||
project_id=project_id or None,
|
||||
job_external_id=job_id,
|
||||
chars=int(audio_duration),
|
||||
latency_ms=latency_ms,
|
||||
status="success",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Cost tracking failed for job {job_id} (non-fatal): {e}")
|
||||
|
||||
return result
|
||||
|
|
|
|||
169
backend/poetry.lock
generated
169
backend/poetry.lock
generated
|
|
@ -1,4 +1,4 @@
|
|||
# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 2.3.4 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohappyeyeballs"
|
||||
|
|
@ -1392,11 +1392,11 @@ files = [
|
|||
]
|
||||
|
||||
[package.dependencies]
|
||||
google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev"
|
||||
google-auth = ">=1.25.0,<3.0dev"
|
||||
google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0.dev0"
|
||||
google-auth = ">=1.25.0,<3.0.dev0"
|
||||
|
||||
[package.extras]
|
||||
grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"]
|
||||
grpc = ["grpcio (>=1.38.0,<2.0.dev0)", "grpcio-status (>=1.38.0,<2.0.dev0)"]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-secret-manager"
|
||||
|
|
@ -1433,41 +1433,37 @@ files = [
|
|||
]
|
||||
|
||||
[package.dependencies]
|
||||
google-api-core = ">=2.15.0,<3.0.0dev"
|
||||
google-auth = ">=2.26.1,<3.0dev"
|
||||
google-cloud-core = ">=2.3.0,<3.0dev"
|
||||
google-crc32c = ">=1.0,<2.0dev"
|
||||
google-api-core = ">=2.15.0,<3.0.0.dev0"
|
||||
google-auth = ">=2.26.1,<3.0.dev0"
|
||||
google-cloud-core = ">=2.3.0,<3.0.dev0"
|
||||
google-crc32c = ">=1.0,<2.0.dev0"
|
||||
google-resumable-media = ">=2.7.2"
|
||||
requests = ">=2.18.0,<3.0.0dev"
|
||||
requests = ">=2.18.0,<3.0.0.dev0"
|
||||
|
||||
[package.extras]
|
||||
protobuf = ["protobuf (<6.0.0dev)"]
|
||||
protobuf = ["protobuf (<6.0.0.dev0)"]
|
||||
tracing = ["opentelemetry-api (>=1.1.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-texttospeech"
|
||||
version = "2.36.0"
|
||||
version = "2.27.0"
|
||||
description = "Google Cloud Texttospeech API client library"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "google_cloud_texttospeech-2.36.0-py3-none-any.whl", hash = "sha256:03f76162543e9d77ecbab823c1cc3728c42ef40547353bcfdbd9ac0e71cb8121"},
|
||||
{file = "google_cloud_texttospeech-2.36.0.tar.gz", hash = "sha256:6c605af7e4774c1bac99fcaaf4538f152b10bba7738a23f42184557f444dc6b7"},
|
||||
{file = "google_cloud_texttospeech-2.27.0-py3-none-any.whl", hash = "sha256:0f7c5fe05281beb6d005ea191f61c913085e8439e5ffd2d5d21e29d106150b54"},
|
||||
{file = "google_cloud_texttospeech-2.27.0.tar.gz", hash = "sha256:94a382c95b7cc58efd2505a24c2968e2614fc6bdf9d76fb9a819d4ed29ae188e"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
google-api-core = {version = ">=2.11.0,<3.0.0", extras = ["grpc"]}
|
||||
google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0", extras = ["grpc"]}
|
||||
google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0"
|
||||
grpcio = [
|
||||
{version = ">=1.75.1,<2.0.0", markers = "python_version >= \"3.14\""},
|
||||
{version = ">=1.33.2,<2.0.0", markers = "python_version < \"3.14\""},
|
||||
]
|
||||
proto-plus = [
|
||||
{version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
|
||||
{version = ">=1.22.3,<2.0.0", markers = "python_version < \"3.13\""},
|
||||
{version = ">=1.22.3,<2.0.0"},
|
||||
]
|
||||
protobuf = ">=4.25.8,<8.0.0"
|
||||
protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-trace"
|
||||
|
|
@ -1509,7 +1505,7 @@ google-cloud-core = ">=1.4.4,<3.0.0"
|
|||
grpc-google-iam-v1 = ">=0.14.0,<1.0.0"
|
||||
proto-plus = [
|
||||
{version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
|
||||
{version = ">=1.22.3,<2.0.0"},
|
||||
{version = ">=1.22.3,<2.0.0", markers = "python_version < \"3.13\""},
|
||||
]
|
||||
protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
|
||||
|
||||
|
|
@ -1601,11 +1597,11 @@ files = [
|
|||
]
|
||||
|
||||
[package.dependencies]
|
||||
google-crc32c = ">=1.0,<2.0dev"
|
||||
google-crc32c = ">=1.0,<2.0.dev0"
|
||||
|
||||
[package.extras]
|
||||
aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "google-auth (>=1.22.0,<2.0dev)"]
|
||||
requests = ["requests (>=2.18.0,<3.0.0dev)"]
|
||||
aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "google-auth (>=1.22.0,<2.0.dev0)"]
|
||||
requests = ["requests (>=2.18.0,<3.0.0.dev0)"]
|
||||
|
||||
[[package]]
|
||||
name = "googleapis-common-protos"
|
||||
|
|
@ -1645,80 +1641,67 @@ protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4
|
|||
|
||||
[[package]]
|
||||
name = "grpcio"
|
||||
version = "1.80.0"
|
||||
version = "1.74.0"
|
||||
description = "HTTP/2-based RPC framework"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "grpcio-1.80.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:886457a7768e408cdce226ad1ca67d2958917d306523a0e21e1a2fdaa75c9c9c"},
|
||||
{file = "grpcio-1.80.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:7b641fc3f1dc647bfd80bd713addc68f6d145956f64677e56d9ebafc0bd72388"},
|
||||
{file = "grpcio-1.80.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:33eb763f18f006dc7fee1e69831d38d23f5eccd15b2e0f92a13ee1d9242e5e02"},
|
||||
{file = "grpcio-1.80.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:52d143637e3872633fc7dd7c3c6a1c84e396b359f3a72e215f8bf69fd82084fc"},
|
||||
{file = "grpcio-1.80.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c51bf8ac4575af2e0678bccfb07e47321fc7acb5049b4482832c5c195e04e13a"},
|
||||
{file = "grpcio-1.80.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:50a9871536d71c4fba24ee856abc03a87764570f0c457dd8db0b4018f379fed9"},
|
||||
{file = "grpcio-1.80.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a72d84ad0514db063e21887fbacd1fd7acb4d494a564cae22227cd45c7fbf199"},
|
||||
{file = "grpcio-1.80.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f7691a6788ad9196872f95716df5bc643ebba13c97140b7a5ee5c8e75d1dea81"},
|
||||
{file = "grpcio-1.80.0-cp310-cp310-win32.whl", hash = "sha256:46c2390b59d67f84e882694d489f5b45707c657832d7934859ceb8c33f467069"},
|
||||
{file = "grpcio-1.80.0-cp310-cp310-win_amd64.whl", hash = "sha256:dc053420fc75749c961e2a4c906398d7c15725d36ccc04ae6d16093167223b58"},
|
||||
{file = "grpcio-1.80.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:dfab85db094068ff42e2a3563f60ab3dddcc9d6488a35abf0132daec13209c8a"},
|
||||
{file = "grpcio-1.80.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5c07e82e822e1161354e32da2662f741a4944ea955f9f580ec8fb409dd6f6060"},
|
||||
{file = "grpcio-1.80.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba0915d51fd4ced2db5ff719f84e270afe0e2d4c45a7bdb1e8d036e4502928c2"},
|
||||
{file = "grpcio-1.80.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3cb8130ba457d2aa09fa6b7c3ed6b6e4e6a2685fce63cb803d479576c4d80e21"},
|
||||
{file = "grpcio-1.80.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:09e5e478b3d14afd23f12e49e8b44c8684ac3c5f08561c43a5b9691c54d136ab"},
|
||||
{file = "grpcio-1.80.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:00168469238b022500e486c1c33916acf2f2a9b2c022202cf8a1885d2e3073c1"},
|
||||
{file = "grpcio-1.80.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8502122a3cc1714038e39a0b071acb1207ca7844208d5ea0d091317555ee7106"},
|
||||
{file = "grpcio-1.80.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ce1794f4ea6cc3ca29463f42d665c32ba1b964b48958a66497917fe9069f26e6"},
|
||||
{file = "grpcio-1.80.0-cp311-cp311-win32.whl", hash = "sha256:51b4a7189b0bef2aa30adce3c78f09c83526cf3dddb24c6a96555e3b97340440"},
|
||||
{file = "grpcio-1.80.0-cp311-cp311-win_amd64.whl", hash = "sha256:02e64bb0bb2da14d947a49e6f120a75e947250aebe65f9629b62bb1f5c14e6e9"},
|
||||
{file = "grpcio-1.80.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0"},
|
||||
{file = "grpcio-1.80.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2"},
|
||||
{file = "grpcio-1.80.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de"},
|
||||
{file = "grpcio-1.80.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0cb517eb1d0d0aaf1d87af7cc5b801d686557c1d88b2619f5e31fab3c2315921"},
|
||||
{file = "grpcio-1.80.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e78c4ac0d97dc2e569b2f4bcbbb447491167cb358d1a389fc4af71ab6f70411"},
|
||||
{file = "grpcio-1.80.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ed770b4c06984f3b47eb0517b1c69ad0b84ef3f40128f51448433be904634cd"},
|
||||
{file = "grpcio-1.80.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:256507e2f524092f1473071a05e65a5b10d84b82e3ff24c5b571513cfaa61e2f"},
|
||||
{file = "grpcio-1.80.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f"},
|
||||
{file = "grpcio-1.80.0-cp312-cp312-win32.whl", hash = "sha256:c71309cfce2f22be26aa4a847357c502db6c621f1a49825ae98aa0907595b193"},
|
||||
{file = "grpcio-1.80.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe648599c0e37594c4809d81a9e77bd138cc82eb8baa71b6a86af65426723ff"},
|
||||
{file = "grpcio-1.80.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad"},
|
||||
{file = "grpcio-1.80.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0"},
|
||||
{file = "grpcio-1.80.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac393b58aa16991a2f1144ec578084d544038c12242da3a215966b512904d0f"},
|
||||
{file = "grpcio-1.80.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:68e5851ac4b9afe07e7f84483803ad167852570d65326b34d54ca560bfa53fb6"},
|
||||
{file = "grpcio-1.80.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:873ff5d17d68992ef6605330127425d2fc4e77e612fa3c3e0ed4e668685e3140"},
|
||||
{file = "grpcio-1.80.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2bea16af2750fd0a899bf1abd9022244418b55d1f37da2202249ba4ba673838d"},
|
||||
{file = "grpcio-1.80.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba0db34f7e1d803a878284cd70e4c63cb6ae2510ba51937bf8f45ba997cefcf7"},
|
||||
{file = "grpcio-1.80.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8eb613f02d34721f1acf3626dfdb3545bd3c8505b0e52bf8b5710a28d02e8aa7"},
|
||||
{file = "grpcio-1.80.0-cp313-cp313-win32.whl", hash = "sha256:93b6f823810720912fd131f561f91f5fed0fda372b6b7028a2681b8194d5d294"},
|
||||
{file = "grpcio-1.80.0-cp313-cp313-win_amd64.whl", hash = "sha256:e172cf795a3ba5246d3529e4d34c53db70e888fa582a8ffebd2e6e48bc0cba50"},
|
||||
{file = "grpcio-1.80.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:3d4147a97c8344d065d01bbf8b6acec2cf86fb0400d40696c8bdad34a64ffc0e"},
|
||||
{file = "grpcio-1.80.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8e11f167935b3eb089ac9038e1a063e6d7dbe995c0bb4a661e614583352e76f"},
|
||||
{file = "grpcio-1.80.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f14b618fc30de822681ee986cfdcc2d9327229dc4c98aed16896761cacd468b9"},
|
||||
{file = "grpcio-1.80.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4ed39fbdcf9b87370f6e8df4e39ca7b38b3e5e9d1b0013c7b6be9639d6578d14"},
|
||||
{file = "grpcio-1.80.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dcc70e9f0ba987526e8e8603a610fb4f460e42899e74e7a518bf3c68fe1bf05"},
|
||||
{file = "grpcio-1.80.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448c884b668b868562b1bda833c5fce6272d26e1926ec46747cda05741d302c1"},
|
||||
{file = "grpcio-1.80.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a1dc80fe55685b4a543555e6eef975303b36c8db1023b1599b094b92aa77965f"},
|
||||
{file = "grpcio-1.80.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:31b9ac4ad1aa28ffee5503821fafd09e4da0a261ce1c1281c6c8da0423c83b6e"},
|
||||
{file = "grpcio-1.80.0-cp314-cp314-win32.whl", hash = "sha256:367ce30ba67d05e0592470428f0ec1c31714cab9ef19b8f2e37be1f4c7d32fae"},
|
||||
{file = "grpcio-1.80.0-cp314-cp314-win_amd64.whl", hash = "sha256:3b01e1f5464c583d2f567b2e46ff0d516ef979978f72091fd81f5ab7fa6e2e7f"},
|
||||
{file = "grpcio-1.80.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:aacdfb4ed3eb919ca997504d27e03d5dba403c85130b8ed450308590a738f7a4"},
|
||||
{file = "grpcio-1.80.0-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:a361c20ec1ccd3c3953d20fb6d7b4125093bdd10dff44c5e2bbb39e58917cedc"},
|
||||
{file = "grpcio-1.80.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:43168871f170d1e4ed16ae03d10cd21efa29f190e710a624cee7e5ae07da6f4f"},
|
||||
{file = "grpcio-1.80.0-cp39-cp39-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:1b97cd29a8eda100b559b455331c487a80915b6ea6bd91cf3e89836c4ee8d957"},
|
||||
{file = "grpcio-1.80.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bac1d573dfa84ce59a5547073e28fa7326d53352adda6912e362da0b917fcef4"},
|
||||
{file = "grpcio-1.80.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4560cf0e86514595dbbd330cd65b7afad4b5c4b8c4905c041cfffa138d45e6fd"},
|
||||
{file = "grpcio-1.80.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ec0a592e926071b4abad50c1495cd0d0d513324b3ff5e7267067c33ba27506e4"},
|
||||
{file = "grpcio-1.80.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:deb10a1528473c11f72a0939eed36d83e847d7cbb63e8cc5611fb7a912d38614"},
|
||||
{file = "grpcio-1.80.0-cp39-cp39-win32.whl", hash = "sha256:627fb7312171cdc52828bd6fac8d7028ff2a64b89f1957b6f3416caa2218d141"},
|
||||
{file = "grpcio-1.80.0-cp39-cp39-win_amd64.whl", hash = "sha256:05d55e1798756282cddd52d56c896b3e7d673e3a8798c2f1cd05ba249a3bb4de"},
|
||||
{file = "grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257"},
|
||||
{file = "grpcio-1.74.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:85bd5cdf4ed7b2d6438871adf6afff9af7096486fcf51818a81b77ef4dd30907"},
|
||||
{file = "grpcio-1.74.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:68c8ebcca945efff9d86d8d6d7bfb0841cf0071024417e2d7f45c5e46b5b08eb"},
|
||||
{file = "grpcio-1.74.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:e154d230dc1bbbd78ad2fdc3039fa50ad7ffcf438e4eb2fa30bce223a70c7486"},
|
||||
{file = "grpcio-1.74.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8978003816c7b9eabe217f88c78bc26adc8f9304bf6a594b02e5a49b2ef9c11"},
|
||||
{file = "grpcio-1.74.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3d7bd6e3929fd2ea7fbc3f562e4987229ead70c9ae5f01501a46701e08f1ad9"},
|
||||
{file = "grpcio-1.74.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:136b53c91ac1d02c8c24201bfdeb56f8b3ac3278668cbb8e0ba49c88069e1bdc"},
|
||||
{file = "grpcio-1.74.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fe0f540750a13fd8e5da4b3eaba91a785eea8dca5ccd2bc2ffe978caa403090e"},
|
||||
{file = "grpcio-1.74.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4e4181bfc24413d1e3a37a0b7889bea68d973d4b45dd2bc68bb766c140718f82"},
|
||||
{file = "grpcio-1.74.0-cp310-cp310-win32.whl", hash = "sha256:1733969040989f7acc3d94c22f55b4a9501a30f6aaacdbccfaba0a3ffb255ab7"},
|
||||
{file = "grpcio-1.74.0-cp310-cp310-win_amd64.whl", hash = "sha256:9e912d3c993a29df6c627459af58975b2e5c897d93287939b9d5065f000249b5"},
|
||||
{file = "grpcio-1.74.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:69e1a8180868a2576f02356565f16635b99088da7df3d45aaa7e24e73a054e31"},
|
||||
{file = "grpcio-1.74.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:8efe72fde5500f47aca1ef59495cb59c885afe04ac89dd11d810f2de87d935d4"},
|
||||
{file = "grpcio-1.74.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:a8f0302f9ac4e9923f98d8e243939a6fb627cd048f5cd38595c97e38020dffce"},
|
||||
{file = "grpcio-1.74.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f609a39f62a6f6f05c7512746798282546358a37ea93c1fcbadf8b2fed162e3"},
|
||||
{file = "grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98e0b7434a7fa4e3e63f250456eaef52499fba5ae661c58cc5b5477d11e7182"},
|
||||
{file = "grpcio-1.74.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:662456c4513e298db6d7bd9c3b8df6f75f8752f0ba01fb653e252ed4a59b5a5d"},
|
||||
{file = "grpcio-1.74.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3d14e3c4d65e19d8430a4e28ceb71ace4728776fd6c3ce34016947474479683f"},
|
||||
{file = "grpcio-1.74.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1bf949792cee20d2078323a9b02bacbbae002b9e3b9e2433f2741c15bdeba1c4"},
|
||||
{file = "grpcio-1.74.0-cp311-cp311-win32.whl", hash = "sha256:55b453812fa7c7ce2f5c88be3018fb4a490519b6ce80788d5913f3f9d7da8c7b"},
|
||||
{file = "grpcio-1.74.0-cp311-cp311-win_amd64.whl", hash = "sha256:86ad489db097141a907c559988c29718719aa3e13370d40e20506f11b4de0d11"},
|
||||
{file = "grpcio-1.74.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:8533e6e9c5bd630ca98062e3a1326249e6ada07d05acf191a77bc33f8948f3d8"},
|
||||
{file = "grpcio-1.74.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:2918948864fec2a11721d91568effffbe0a02b23ecd57f281391d986847982f6"},
|
||||
{file = "grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:60d2d48b0580e70d2e1954d0d19fa3c2e60dd7cbed826aca104fff518310d1c5"},
|
||||
{file = "grpcio-1.74.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3601274bc0523f6dc07666c0e01682c94472402ac2fd1226fd96e079863bfa49"},
|
||||
{file = "grpcio-1.74.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:176d60a5168d7948539def20b2a3adcce67d72454d9ae05969a2e73f3a0feee7"},
|
||||
{file = "grpcio-1.74.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e759f9e8bc908aaae0412642afe5416c9f983a80499448fcc7fab8692ae044c3"},
|
||||
{file = "grpcio-1.74.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9e7c4389771855a92934b2846bd807fc25a3dfa820fd912fe6bd8136026b2707"},
|
||||
{file = "grpcio-1.74.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cce634b10aeab37010449124814b05a62fb5f18928ca878f1bf4750d1f0c815b"},
|
||||
{file = "grpcio-1.74.0-cp312-cp312-win32.whl", hash = "sha256:885912559974df35d92219e2dc98f51a16a48395f37b92865ad45186f294096c"},
|
||||
{file = "grpcio-1.74.0-cp312-cp312-win_amd64.whl", hash = "sha256:42f8fee287427b94be63d916c90399ed310ed10aadbf9e2e5538b3e497d269bc"},
|
||||
{file = "grpcio-1.74.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:2bc2d7d8d184e2362b53905cb1708c84cb16354771c04b490485fa07ce3a1d89"},
|
||||
{file = "grpcio-1.74.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:c14e803037e572c177ba54a3e090d6eb12efd795d49327c5ee2b3bddb836bf01"},
|
||||
{file = "grpcio-1.74.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f6ec94f0e50eb8fa1744a731088b966427575e40c2944a980049798b127a687e"},
|
||||
{file = "grpcio-1.74.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:566b9395b90cc3d0d0c6404bc8572c7c18786ede549cdb540ae27b58afe0fb91"},
|
||||
{file = "grpcio-1.74.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1ea6176d7dfd5b941ea01c2ec34de9531ba494d541fe2057c904e601879f249"},
|
||||
{file = "grpcio-1.74.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:64229c1e9cea079420527fa8ac45d80fc1e8d3f94deaa35643c381fa8d98f362"},
|
||||
{file = "grpcio-1.74.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:0f87bddd6e27fc776aacf7ebfec367b6d49cad0455123951e4488ea99d9b9b8f"},
|
||||
{file = "grpcio-1.74.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3b03d8f2a07f0fea8c8f74deb59f8352b770e3900d143b3d1475effcb08eec20"},
|
||||
{file = "grpcio-1.74.0-cp313-cp313-win32.whl", hash = "sha256:b6a73b2ba83e663b2480a90b82fdae6a7aa6427f62bf43b29912c0cfd1aa2bfa"},
|
||||
{file = "grpcio-1.74.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd3c71aeee838299c5887230b8a1822795325ddfea635edd82954c1eaa831e24"},
|
||||
{file = "grpcio-1.74.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:4bc5fca10aaf74779081e16c2bcc3d5ec643ffd528d9e7b1c9039000ead73bae"},
|
||||
{file = "grpcio-1.74.0-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:6bab67d15ad617aff094c382c882e0177637da73cbc5532d52c07b4ee887a87b"},
|
||||
{file = "grpcio-1.74.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:655726919b75ab3c34cdad39da5c530ac6fa32696fb23119e36b64adcfca174a"},
|
||||
{file = "grpcio-1.74.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a2b06afe2e50ebfd46247ac3ba60cac523f54ec7792ae9ba6073c12daf26f0a"},
|
||||
{file = "grpcio-1.74.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f251c355167b2360537cf17bea2cf0197995e551ab9da6a0a59b3da5e8704f9"},
|
||||
{file = "grpcio-1.74.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8f7b5882fb50632ab1e48cb3122d6df55b9afabc265582808036b6e51b9fd6b7"},
|
||||
{file = "grpcio-1.74.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:834988b6c34515545b3edd13e902c1acdd9f2465d386ea5143fb558f153a7176"},
|
||||
{file = "grpcio-1.74.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:22b834cef33429ca6cc28303c9c327ba9a3fafecbf62fae17e9a7b7163cc43ac"},
|
||||
{file = "grpcio-1.74.0-cp39-cp39-win32.whl", hash = "sha256:7d95d71ff35291bab3f1c52f52f474c632db26ea12700c2ff0ea0532cb0b5854"},
|
||||
{file = "grpcio-1.74.0-cp39-cp39-win_amd64.whl", hash = "sha256:ecde9ab49f58433abe02f9ed076c7b5be839cf0153883a6d23995937a82392fa"},
|
||||
{file = "grpcio-1.74.0.tar.gz", hash = "sha256:80d1f4fbb35b0742d3e3d3bb654b7381cd5f015f8497279a1e9c21ba623e01b1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
typing-extensions = ">=4.12,<5.0"
|
||||
|
||||
[package.extras]
|
||||
protobuf = ["grpcio-tools (>=1.80.0)"]
|
||||
protobuf = ["grpcio-tools (>=1.74.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio-status"
|
||||
|
|
@ -2654,7 +2637,7 @@ files = [
|
|||
[package.dependencies]
|
||||
google-cloud-trace = ">=1.1,<2.0"
|
||||
opentelemetry-api = ">=1.0,<2.0"
|
||||
opentelemetry-resourcedetector-gcp = ">=1.5.0dev0,<2.dev0"
|
||||
opentelemetry-resourcedetector-gcp = ">=1.5.0.dev0,<2.dev0"
|
||||
opentelemetry-sdk = ">=1.0,<2.0"
|
||||
|
||||
[[package]]
|
||||
|
|
@ -4657,4 +4640,4 @@ type = ["pytest-mypy"]
|
|||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "9dabfc4908c03691dcd8c0daeeaf57ab8b2e7eeb15544102418b08f5962fa586"
|
||||
content-hash = "1c0e7b5869ed6f8f2da355c78e783891a60da45d5129887c890c80bba63200f9"
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ redis = "^5.0.1"
|
|||
celery = {extras = ["redis"], version = "^5.3.4"}
|
||||
google-cloud-storage = "^2.10.0"
|
||||
google-cloud-translate = "^3.12.1"
|
||||
google-cloud-texttospeech = "^2.36.0"
|
||||
google-cloud-texttospeech = "^2.16.3"
|
||||
google-cloud-secret-manager = "^2.18.1"
|
||||
google-genai = "^1.56.0"
|
||||
python-jose = {extras = ["cryptography"], version = "^3.3.0"}
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 197 KiB |
|
|
@ -53,20 +53,6 @@ export function VideoReviewPlayer({ job, downloads }: VideoReviewPlayerProps) {
|
|||
}
|
||||
}, [assetTabs, activeTabKey]);
|
||||
|
||||
// Disable browser-native text tracks so they don't compete with our React overlay
|
||||
useEffect(() => {
|
||||
const video = videoRef.current;
|
||||
if (!video) return;
|
||||
const disableTracks = () => {
|
||||
for (let i = 0; i < video.textTracks.length; i++) {
|
||||
video.textTracks[i].mode = 'disabled';
|
||||
}
|
||||
};
|
||||
disableTracks();
|
||||
video.addEventListener('loadedmetadata', disableTracks);
|
||||
return () => video.removeEventListener('loadedmetadata', disableTracks);
|
||||
}, [videoRef.current]);
|
||||
|
||||
// Get current tab
|
||||
const activeTab = assetTabs.find((t) => t.key === activeTabKey);
|
||||
|
||||
|
|
@ -319,9 +305,9 @@ export function VideoReviewPlayer({ job, downloads }: VideoReviewPlayerProps) {
|
|||
</div>
|
||||
)}
|
||||
|
||||
{/* Caption Overlay — position at top when cue has line:0% setting */}
|
||||
{/* Caption Overlay — always at the bottom, above native controls */}
|
||||
{showCaptions && currentCaption && (
|
||||
<div className={`absolute ${currentCaption.positionTop ? 'top-4' : 'bottom-14'} left-1/2 transform -translate-x-1/2 bg-black bg-opacity-80 text-white px-4 py-2 rounded max-w-[90%]`}>
|
||||
<div className="absolute bottom-14 left-1/2 transform -translate-x-1/2 bg-black bg-opacity-80 text-white px-4 py-2 rounded max-w-[90%]">
|
||||
<div className="text-center whitespace-pre-wrap">
|
||||
{currentCaption.text}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -184,8 +184,6 @@ All translations are generated from the **approved English master VTT** after En
|
|||
|
||||
If a language shows the red **⚠ video-native** badge in QC Detail, its translation was generated directly from the video (legacy behaviour) and its cue structure may differ from English. Production or Admin can use the **↺ Re-translate from EN** button to regenerate it from the approved English master.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## 8. Uploading on Behalf of a Client
|
||||
|
|
|
|||
|
|
@ -3,8 +3,6 @@ export interface VTTCue {
|
|||
endTime: number; // seconds
|
||||
text: string;
|
||||
identifier?: string;
|
||||
/** Raw cue settings string from the VTT timing line (e.g. "line:0% align:start") */
|
||||
settings?: string;
|
||||
/** When true, caption should be rendered at the top of the video (line:0% cue setting) */
|
||||
positionTop?: boolean;
|
||||
}
|
||||
|
|
@ -56,7 +54,6 @@ export class VTTParser {
|
|||
endTime,
|
||||
text: textLines.join('\n'),
|
||||
identifier,
|
||||
settings: cueSettings.trim() || undefined,
|
||||
...(positionTop ? { positionTop: true } : {})
|
||||
});
|
||||
}
|
||||
|
|
@ -78,13 +75,10 @@ export class VTTParser {
|
|||
lines.push(cue.identifier);
|
||||
}
|
||||
|
||||
// Add timing line (preserve cue settings like line:0%)
|
||||
// Add timing line
|
||||
const startTimestamp = this.formatTimestamp(cue.startTime);
|
||||
const endTimestamp = this.formatTimestamp(cue.endTime);
|
||||
const timingLine = cue.settings
|
||||
? `${startTimestamp} --> ${endTimestamp} ${cue.settings}`
|
||||
: `${startTimestamp} --> ${endTimestamp}`;
|
||||
lines.push(timingLine);
|
||||
lines.push(`${startTimestamp} --> ${endTimestamp}`);
|
||||
|
||||
// Add text (can be multi-line)
|
||||
lines.push(cue.text);
|
||||
|
|
|
|||
|
|
@ -163,8 +163,6 @@ export function QCDetail() {
|
|||
const [retranslateLang, setRetranslateLang] = useState<string | null>(null);
|
||||
const [retranslateReason, setRetranslateReason] = useState('');
|
||||
const [retranslateLoading, setRetranslateLoading] = useState(false);
|
||||
const [showBulkRetranslateConfirm, setShowBulkRetranslateConfirm] = useState(false);
|
||||
const [bulkRetranslateLoading, setBulkRetranslateLoading] = useState(false);
|
||||
|
||||
const canAssign = authUser?.role === 'project_manager' || authUser?.role === 'production' || authUser?.role === 'admin';
|
||||
const canApproveAll = authUser?.role === 'production' || authUser?.role === 'admin';
|
||||
|
|
@ -307,11 +305,6 @@ export function QCDetail() {
|
|||
// Total QC progress
|
||||
const totalLangs = availableLanguages.length;
|
||||
const approvedLangs = availableLanguages.filter(l => langQcMap[l]?.status === 'approved').length;
|
||||
const brokenLanguages = availableLanguages.filter(lang => {
|
||||
if (lang === sourceLanguage) return false;
|
||||
const out = job?.outputs?.[lang];
|
||||
return out?.origin === 'video_native' || !out?.captions_vtt_gcs;
|
||||
});
|
||||
const [costProjectIdSaved, setCostProjectIdSaved] = useState(false);
|
||||
const [showRejectForm, setShowRejectForm] = useState(false);
|
||||
const [captionsVtt, setCaptionsVtt] = useState('');
|
||||
|
|
@ -613,25 +606,6 @@ export function QCDetail() {
|
|||
await _doSaveVtt(false, captionsVtt || undefined, adVtt || undefined);
|
||||
};
|
||||
|
||||
const handleBulkRetranslate = async () => {
|
||||
if (!id || brokenLanguages.length === 0) return;
|
||||
setBulkRetranslateLoading(true);
|
||||
setShowBulkRetranslateConfirm(false);
|
||||
let succeeded = 0;
|
||||
for (const lang of brokenLanguages) {
|
||||
try {
|
||||
await apiClient.retranslateLanguage(id, lang, 'Bulk retranslate broken languages');
|
||||
succeeded++;
|
||||
} catch {
|
||||
toast.toastOnly.error(`Failed to queue retranslation for ${lang.toUpperCase()}`);
|
||||
}
|
||||
}
|
||||
setBulkRetranslateLoading(false);
|
||||
queryClient.invalidateQueries({ queryKey: ['jobs', id] });
|
||||
queryClient.invalidateQueries({ queryKey: ['language-qc', id] });
|
||||
if (succeeded > 0) toast.toastOnly.success(`Queued retranslation for ${succeeded} language(s) — check back in a few minutes`);
|
||||
};
|
||||
|
||||
// Immediate save handlers for individual cue edits
|
||||
const handleCaptionsCueSave = async (cueIndex: number, vttContent: string) => {
|
||||
if (!id) return;
|
||||
|
|
@ -1065,25 +1039,14 @@ export function QCDetail() {
|
|||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
{canApproveAll && isSourceApproved && brokenLanguages.length > 0 && (
|
||||
<button
|
||||
onClick={() => setShowBulkRetranslateConfirm(true)}
|
||||
disabled={bulkRetranslateLoading}
|
||||
className="text-xs px-3 py-1.5 bg-orange-50 text-orange-700 border border-orange-200 rounded-lg hover:bg-orange-100 disabled:opacity-50"
|
||||
>
|
||||
{bulkRetranslateLoading ? 'Queuing…' : `↺ Retranslate broken (${brokenLanguages.length})`}
|
||||
</button>
|
||||
)}
|
||||
{canAssign && totalLangs > 1 && (
|
||||
<button
|
||||
onClick={() => { setBulkLinguistId(''); setBulkReviewerId(''); setBulkDeadline(''); setBulkOnlyUnassigned(true); setShowBulkAssignModal(true); }}
|
||||
className="text-xs px-3 py-1.5 bg-indigo-50 text-indigo-700 border border-indigo-200 rounded-lg hover:bg-indigo-100"
|
||||
>
|
||||
Assign all languages
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{canAssign && totalLangs > 1 && (
|
||||
<button
|
||||
onClick={() => { setBulkLinguistId(''); setBulkReviewerId(''); setBulkDeadline(''); setBulkOnlyUnassigned(true); setShowBulkAssignModal(true); }}
|
||||
className="text-xs px-3 py-1.5 bg-indigo-50 text-indigo-700 border border-indigo-200 rounded-lg hover:bg-indigo-100"
|
||||
>
|
||||
Assign all languages
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Progress bar */}
|
||||
|
|
@ -1559,43 +1522,6 @@ export function QCDetail() {
|
|||
</div>
|
||||
)}
|
||||
|
||||
{/* Bulk retranslate broken languages confirmation modal */}
|
||||
{showBulkRetranslateConfirm && (
|
||||
<div className="fixed inset-0 bg-black/40 flex items-center justify-center z-50">
|
||||
<div className="bg-white rounded-lg shadow-xl p-6 max-w-md w-full mx-4 space-y-4">
|
||||
<h3 className="text-lg font-semibold text-gray-900">Retranslate {brokenLanguages.length} broken language(s)?</h3>
|
||||
<p className="text-sm text-gray-600">
|
||||
The following languages will be regenerated from the approved EN master:
|
||||
</p>
|
||||
<div className="flex flex-wrap gap-1.5">
|
||||
{brokenLanguages.map(l => {
|
||||
const origin = job?.outputs?.[l]?.origin;
|
||||
return (
|
||||
<span key={l} className={`text-xs px-2 py-0.5 rounded-full font-medium ${origin === 'video_native' ? 'bg-red-100 text-red-700' : 'bg-orange-100 text-orange-700'}`}>
|
||||
{l.toUpperCase()} {origin === 'video_native' ? '(video-native)' : '(missing VTT)'}
|
||||
</span>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
<p className="text-xs text-gray-400">Each language is queued separately. Translation may take a few minutes.</p>
|
||||
<div className="flex gap-3 justify-end pt-2">
|
||||
<button
|
||||
onClick={() => setShowBulkRetranslateConfirm(false)}
|
||||
className="px-4 py-2 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded-md hover:bg-gray-50"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
onClick={handleBulkRetranslate}
|
||||
className="px-4 py-2 text-sm font-medium text-white bg-orange-600 rounded-md hover:bg-orange-700"
|
||||
>
|
||||
Yes, retranslate all
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Per-language retranslate confirmation modal */}
|
||||
{retranslateLang && (
|
||||
<div className="fixed inset-0 bg-black/40 flex items-center justify-center z-50">
|
||||
|
|
|
|||
|
|
@ -11,17 +11,12 @@ function statusBadge(status: string) {
|
|||
: 'bg-gray-100 text-gray-500';
|
||||
}
|
||||
|
||||
function embeddingBadge(g: import('../../../types/api').Glossary) {
|
||||
const status = g.current_version_embedding_status;
|
||||
const pct = g.current_version_term_count && g.current_version_term_count > 0
|
||||
? Math.round(((g.current_version_embedded_count ?? 0) / g.current_version_term_count) * 100)
|
||||
: 0;
|
||||
function embeddingBadge(status: string) {
|
||||
switch (status) {
|
||||
case 'done': return <span className="text-xs text-green-600">Embedded ✓ ({g.current_version_term_count?.toLocaleString()})</span>;
|
||||
case 'in_progress': return <span className="text-xs text-blue-600 animate-pulse">Embedding… {pct}%</span>;
|
||||
case 'done': return <span className="text-xs text-green-600">Embedded ✓</span>;
|
||||
case 'in_progress': return <span className="text-xs text-blue-600 animate-pulse">Embedding…</span>;
|
||||
case 'failed': return <span className="text-xs text-red-500">Embed failed</span>;
|
||||
case 'pending': return <span className="text-xs text-gray-400">Pending embed</span>;
|
||||
default: return null;
|
||||
default: return <span className="text-xs text-gray-400">Pending embed</span>;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -112,7 +107,7 @@ export function GlossaryList() {
|
|||
</div>
|
||||
<div className="flex items-center gap-4 shrink-0">
|
||||
<div className="text-right text-xs text-gray-400">
|
||||
{g.current_version_id ? embeddingBadge(g) : null}
|
||||
{g.current_version_id ? embeddingBadge('') : null}
|
||||
</div>
|
||||
{isAdmin && g.status === 'active' && (
|
||||
<button
|
||||
|
|
|
|||
|
|
@ -77,7 +77,6 @@ export function NewBrief() {
|
|||
const [accessibleMethod, setAccessibleMethod] = useState<'overlay' | 'pause_insert'>('pause_insert');
|
||||
const [sdhVtt, setSdhVtt] = useState(false);
|
||||
const [descriptiveTranscript, setDescriptiveTranscript] = useState(false);
|
||||
const [sourceHasAd, setSourceHasAd] = useState(false);
|
||||
|
||||
const { data: projects = [] } = useAllProjects();
|
||||
const { data: assignees = [] } = useBriefAssignees();
|
||||
|
|
@ -114,7 +113,6 @@ export function NewBrief() {
|
|||
deadline: deadline || undefined,
|
||||
project_id: projectId || undefined,
|
||||
assignee_id: assigneeId || undefined,
|
||||
source_has_ad: sourceHasAd,
|
||||
});
|
||||
toast.toastOnly.success('Brief created');
|
||||
navigate(`/briefs/${brief.id}`);
|
||||
|
|
@ -242,22 +240,6 @@ export function NewBrief() {
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">Source Video</label>
|
||||
<label className="flex items-start gap-2 text-sm text-gray-700 cursor-pointer">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={sourceHasAd}
|
||||
onChange={e => setSourceHasAd(e.target.checked)}
|
||||
className="rounded mt-0.5 flex-shrink-0"
|
||||
/>
|
||||
<span>
|
||||
<span className="font-medium">Source video already contains audio descriptions</span>
|
||||
<span className="text-gray-400 ml-1">— AI will not generate new AD for this job</span>
|
||||
</span>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
Languages
|
||||
|
|
|
|||
|
|
@ -291,8 +291,6 @@ export function JobDetail() {
|
|||
{job.requested_outputs?.captions_vtt && <div>✓ Captions (VTT)</div>}
|
||||
{job.requested_outputs?.audio_description_vtt && <div>✓ Audio Descriptions (VTT)</div>}
|
||||
{job.requested_outputs?.audio_description_mp3 && <div>✓ Audio Descriptions (MP3)</div>}
|
||||
{job.requested_outputs?.accessible_video_mp4 && <div>✓ Accessible Video (MP4)</div>}
|
||||
{job.requested_outputs?.sdh_vtt && <div>✓ SDH Captions (VTT)</div>}
|
||||
</div>
|
||||
</dd>
|
||||
</div>
|
||||
|
|
@ -701,7 +699,7 @@ export function JobDetail() {
|
|||
)}
|
||||
|
||||
{/* Error Display */}
|
||||
{job.error && isFailedStatus && (
|
||||
{job.error && (
|
||||
<div className="bg-red-50 border border-red-200 rounded-lg p-4">
|
||||
<div className="flex items-start gap-2 mb-2">
|
||||
<svg className="w-5 h-5 text-red-600 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
|
|
|
|||
|
|
@ -822,9 +822,6 @@ export interface Glossary {
|
|||
source: string;
|
||||
status: GlossaryStatus;
|
||||
current_version_id?: string;
|
||||
current_version_embedding_status?: EmbeddingStatus;
|
||||
current_version_embedded_count?: number;
|
||||
current_version_term_count?: number;
|
||||
created_at: string;
|
||||
created_by: string;
|
||||
}
|
||||
|
|
@ -883,5 +880,4 @@ export interface JobBriefCreate {
|
|||
deadline?: string;
|
||||
project_id?: string;
|
||||
assignee_id?: string;
|
||||
source_has_ad?: boolean;
|
||||
}
|
||||
|
|
@ -6,11 +6,10 @@
|
|||
"http://localhost:5173",
|
||||
"http://localhost:3000"
|
||||
],
|
||||
"method": ["GET", "HEAD", "PUT"],
|
||||
"method": ["PUT"],
|
||||
"responseHeader": [
|
||||
"Content-Type",
|
||||
"Content-Range",
|
||||
"Content-Disposition",
|
||||
"X-Goog-Resumable"
|
||||
],
|
||||
"maxAgeSeconds": 3600
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue