feat: add QC accessible video review and editing capabilities

- Reorder workflow: translations now happen BEFORE QC Review step
- Add language tabs to switch between translated languages in QC
- Add video mode tabs (Original Video / Accessible Video)
- Add interactive timeline preview showing video segments and AD cues
- Enable pause point adjustment with millisecond precision
- Add TTS regeneration queue for selective cue re-synthesis
- Add re-render controls with optional Whisper refinement
- Persist video segments and TTS MP3s to GCS for editability
- Add new RENDERING_QC job status for re-render operations
- Create 5 new API endpoints for accessible video editing
- Add rerender_accessible_video.py Celery task

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
michael 2026-01-11 08:32:27 -06:00
parent c5f59b1079
commit aa6777d2c2
16 changed files with 2277 additions and 59 deletions

View file

@ -32,6 +32,17 @@ from ...schemas.job import (
VttTimingAdjustRequest,
VttUpdateRequest,
)
from ...schemas.accessible_video import (
AccessibleVideoEditStateResponse,
PausePointResponse,
PausePointUpdateRequest,
RerenderAccessibleVideoRequest,
TTSRegenerationItem,
TTSRegenerationQueueRequest,
TTSRegenerationRemoveRequest,
VideoSegmentResponse,
)
from ...models.job import TTSRegenerationRequest
from ...services.websocket import connection_manager
from ...services.gcs import (
gcs_service,
@ -1455,3 +1466,404 @@ async def validate_job_assets(
errors=errors,
warnings=[] # Can be extended for non-blocking warnings
)
# ==============================================================================
# Accessible Video QC Editing Endpoints
# ==============================================================================
@router.get("/{job_id}/accessible-video/{language}/edit-state", response_model=AccessibleVideoEditStateResponse)
async def get_accessible_video_edit_state(
job_id: str,
language: str,
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.PRODUCTION, UserRole.ADMIN)),
db: AsyncIOMotorDatabase = Depends(get_database),
):
"""Get current pause points, segment metadata, and TTS regeneration queue for QC editing."""
job_doc = await db.jobs.find_one({"_id": job_id})
if not job_doc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Job not found"
)
# Check job is in QC status
if job_doc["status"] not in [JobStatus.PENDING_QC.value, JobStatus.RENDERING_QC.value]:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Job must be in pending_qc or rendering_qc status (current: {job_doc['status']})"
)
# Get language outputs
lang_output = job_doc.get("outputs", {}).get(language)
if not lang_output:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"No outputs found for language {language}"
)
# Get edit state
edit_state = lang_output.get("accessible_video_edit_state")
if not edit_state:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"No accessible video edit state found for language {language}"
)
# Calculate total duration from segments
segments = edit_state.get("video_segments", [])
total_duration_ms = sum(s.get("duration_ms", 0) for s in segments)
# Get signed URL for accessible video
accessible_video_gcs = lang_output.get("accessible_video_gcs")
accessible_video_url = None
if accessible_video_gcs:
blob_path = accessible_video_gcs.replace(f"gs://{settings.gcs_bucket}/", "")
blob = gcs_service.bucket.blob(blob_path)
accessible_video_url = blob.generate_signed_url(expiration=3600)
# Convert to response format
return AccessibleVideoEditStateResponse(
pause_points=[
PausePointResponse(
cue_index=pp.get("cue_index"),
original_ms=pp.get("original_ms"),
adjusted_ms=pp.get("adjusted_ms"),
min_bound_ms=pp.get("min_bound_ms"),
max_bound_ms=pp.get("max_bound_ms")
)
for pp in edit_state.get("pause_points", [])
],
video_segments=[
VideoSegmentResponse(
segment_index=seg.get("segment_index"),
start_ms=seg.get("start_ms"),
end_ms=seg.get("end_ms"),
gcs_uri=seg.get("gcs_uri"),
duration_ms=seg.get("duration_ms"),
is_freeze_frame=seg.get("is_freeze_frame", False),
cue_index=seg.get("cue_index")
)
for seg in segments
],
tts_regeneration_queue=[
TTSRegenerationItem(
cue_index=req.get("cue_index"),
requested_at=req.get("requested_at"),
new_text=req.get("new_text"),
status=req.get("status", "pending"),
error_message=req.get("error_message")
)
for req in edit_state.get("tts_regeneration_queue", [])
],
last_render_at=edit_state.get("last_render_at"),
total_duration_ms=total_duration_ms,
accessible_video_url=accessible_video_url
)
@router.patch("/{job_id}/accessible-video/{language}/pause-points/{cue_index}", response_model=PausePointResponse)
async def update_pause_point(
job_id: str,
language: str,
cue_index: int,
request: PausePointUpdateRequest,
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.PRODUCTION, UserRole.ADMIN)),
db: AsyncIOMotorDatabase = Depends(get_database),
):
"""Update a single pause point timing with millisecond precision."""
job_doc = await db.jobs.find_one({"_id": job_id})
if not job_doc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Job not found"
)
# Check job is in QC status
if job_doc["status"] not in [JobStatus.PENDING_QC.value]:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Job must be in pending_qc status for editing (current: {job_doc['status']})"
)
# Get edit state
lang_output = job_doc.get("outputs", {}).get(language)
if not lang_output:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"No outputs found for language {language}"
)
edit_state = lang_output.get("accessible_video_edit_state")
if not edit_state:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"No edit state found for language {language}"
)
# Find the pause point
pause_points = edit_state.get("pause_points", [])
pause_point = next((pp for pp in pause_points if pp.get("cue_index") == cue_index), None)
if not pause_point:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Pause point for cue {cue_index} not found"
)
# Validate bounds
min_bound = pause_point.get("min_bound_ms", 0)
max_bound = pause_point.get("max_bound_ms", float("inf"))
if request.adjusted_ms < min_bound:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Adjusted value {request.adjusted_ms}ms is below minimum bound {min_bound}ms"
)
if request.adjusted_ms > max_bound:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Adjusted value {request.adjusted_ms}ms is above maximum bound {max_bound}ms"
)
# Update the pause point
pause_point["adjusted_ms"] = request.adjusted_ms
# Save back to database
await db.jobs.update_one(
{"_id": job_id},
{
"$set": {
f"outputs.{language}.accessible_video_edit_state.pause_points": pause_points,
"updated_at": datetime.utcnow()
}
}
)
logger.info(f"Updated pause point for cue {cue_index} in job {job_id}/{language}: {request.adjusted_ms}ms")
return PausePointResponse(
cue_index=pause_point["cue_index"],
original_ms=pause_point["original_ms"],
adjusted_ms=pause_point["adjusted_ms"],
min_bound_ms=pause_point["min_bound_ms"],
max_bound_ms=pause_point["max_bound_ms"]
)
@router.post("/{job_id}/accessible-video/{language}/tts-regeneration")
async def queue_tts_regeneration(
job_id: str,
language: str,
request: TTSRegenerationQueueRequest,
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.PRODUCTION, UserRole.ADMIN)),
db: AsyncIOMotorDatabase = Depends(get_database),
):
"""Queue TTS regeneration for specific cues (uses current AD VTT text)."""
job_doc = await db.jobs.find_one({"_id": job_id})
if not job_doc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Job not found"
)
# Check job is in QC status
if job_doc["status"] not in [JobStatus.PENDING_QC.value]:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Job must be in pending_qc status for editing (current: {job_doc['status']})"
)
# Get edit state
lang_output = job_doc.get("outputs", {}).get(language)
if not lang_output:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"No outputs found for language {language}"
)
edit_state = lang_output.get("accessible_video_edit_state")
if not edit_state:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"No edit state found for language {language}"
)
# Get current queue
current_queue = edit_state.get("tts_regeneration_queue", [])
existing_cues = {req.get("cue_index") for req in current_queue}
# Add new requests (avoid duplicates)
added = []
for cue_idx in request.cue_indices:
if cue_idx not in existing_cues:
new_request = {
"cue_index": cue_idx,
"requested_at": datetime.utcnow().isoformat(),
"new_text": None, # Will use current VTT text
"status": "pending",
"error_message": None
}
current_queue.append(new_request)
added.append(cue_idx)
# Save back to database
await db.jobs.update_one(
{"_id": job_id},
{
"$set": {
f"outputs.{language}.accessible_video_edit_state.tts_regeneration_queue": current_queue,
"updated_at": datetime.utcnow()
}
}
)
logger.info(f"Queued TTS regeneration for cues {added} in job {job_id}/{language}")
return {"message": f"Queued {len(added)} cue(s) for regeneration", "queued_cues": added}
@router.delete("/{job_id}/accessible-video/{language}/tts-regeneration/{cue_index}")
async def remove_tts_regeneration(
job_id: str,
language: str,
cue_index: int,
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.PRODUCTION, UserRole.ADMIN)),
db: AsyncIOMotorDatabase = Depends(get_database),
):
"""Remove a cue from the TTS regeneration queue."""
job_doc = await db.jobs.find_one({"_id": job_id})
if not job_doc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Job not found"
)
# Get edit state
lang_output = job_doc.get("outputs", {}).get(language)
if not lang_output or not lang_output.get("accessible_video_edit_state"):
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"No edit state found for language {language}"
)
edit_state = lang_output["accessible_video_edit_state"]
current_queue = edit_state.get("tts_regeneration_queue", [])
# Remove the cue from queue
new_queue = [req for req in current_queue if req.get("cue_index") != cue_index]
if len(new_queue) == len(current_queue):
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Cue {cue_index} not in regeneration queue"
)
# Save back to database
await db.jobs.update_one(
{"_id": job_id},
{
"$set": {
f"outputs.{language}.accessible_video_edit_state.tts_regeneration_queue": new_queue,
"updated_at": datetime.utcnow()
}
}
)
logger.info(f"Removed cue {cue_index} from TTS regeneration queue for job {job_id}/{language}")
return {"message": f"Removed cue {cue_index} from regeneration queue"}
@router.post("/{job_id}/accessible-video/{language}/re-render", response_model=JobResponse)
async def trigger_accessible_video_rerender(
job_id: str,
language: str,
request: RerenderAccessibleVideoRequest,
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.PRODUCTION, UserRole.ADMIN)),
db: AsyncIOMotorDatabase = Depends(get_database),
):
"""
Trigger re-synthesis of accessible video with QC changes.
- Regenerates only queued TTS segments (others reuse existing MP3s)
- Optionally runs Whisper pause point refinement
"""
job_doc = await db.jobs.find_one({"_id": job_id})
if not job_doc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Job not found"
)
# Check job is in QC status
if job_doc["status"] not in [JobStatus.PENDING_QC.value]:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Job must be in pending_qc status to re-render (current: {job_doc['status']})"
)
# Get edit state
lang_output = job_doc.get("outputs", {}).get(language)
if not lang_output or not lang_output.get("accessible_video_edit_state"):
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"No edit state found for language {language}"
)
edit_state = lang_output["accessible_video_edit_state"]
# Get cues to regenerate
regenerate_cues = [
req.get("cue_index")
for req in edit_state.get("tts_regeneration_queue", [])
if req.get("status") == "pending"
]
# Update job status to RENDERING_QC
job_title = job_doc.get("title", "Untitled Job")
await db.jobs.update_one(
{"_id": job_id},
{
"$set": {
"status": JobStatus.RENDERING_QC.value,
"updated_at": datetime.utcnow()
},
"$push": {
"review.history": {
"at": datetime.utcnow(),
"status": JobStatus.RENDERING_QC.value,
"by": str(current_user.id),
"notes": f"Re-rendering {language} with {len(regenerate_cues)} TTS regeneration(s), whisper_refine={request.whisper_refine}"
}
}
}
)
# Trigger re-render task
from ...tasks.rerender_accessible_video import rerender_accessible_video_task
rerender_accessible_video_task.delay(
job_id=job_id,
language=language,
regenerate_cue_indices=regenerate_cues,
whisper_refine=request.whisper_refine
)
logger.info(
f"Triggered accessible video re-render for job {job_id}/{language}: "
f"regenerate_cues={regenerate_cues}, whisper_refine={request.whisper_refine}"
)
# Get updated job
result = await db.jobs.find_one({"_id": job_id})
return JobResponse(
id=str(result["_id"]),
title=result["title"],
status=result["status"],
source=result["source"],
requested_outputs=RequestedOutputs(**result["requested_outputs"]),
review=result.get("review", {"notes": "", "history": []}),
outputs=result.get("outputs"),
created_at=result["created_at"].isoformat(),
updated_at=result["updated_at"].isoformat()
)

View file

@ -19,6 +19,7 @@ class JobStatus(str, Enum):
TTS_FAILED = "tts_failed" # TTS synthesis failed after retries, requires reprocessing
RENDERING_VIDEO = "rendering_video" # Accessible video rendering in progress
RENDER_FAILED = "render_failed" # Accessible video rendering failed, requires reprocessing
RENDERING_QC = "rendering_qc" # Re-rendering accessible video during QC review
PENDING_FINAL_REVIEW = "pending_final_review"
COMPLETED = "completed"
@ -64,6 +65,44 @@ class RequestedOutputs(BaseModel):
translation_mode: Literal["traditional", "video_native"] = "video_native"
class PausePointData(BaseModel):
"""Pause point timing data for accessible video editing during QC."""
cue_index: int # AD cue index this pause point belongs to
original_ms: float # Original pause point timestamp (ms)
adjusted_ms: Optional[float] = None # User-adjusted timestamp (ms), None = use original
min_bound_ms: float # Minimum allowed value (end of previous AD segment)
max_bound_ms: float # Maximum allowed value (start of next AD segment)
class VideoSegmentMetadata(BaseModel):
"""Metadata for a video segment between pause points."""
segment_index: int # 0-based segment index
start_ms: float # Start timestamp in source video (ms)
end_ms: float # End timestamp in source video (ms)
gcs_uri: str # GCS path to segment MP4
duration_ms: float # Actual segment duration (ms)
is_freeze_frame: bool = False # True if this is a freeze frame segment with AD audio
cue_index: Optional[int] = None # AD cue index (only for freeze frame segments)
class TTSRegenerationRequest(BaseModel):
"""Request to regenerate TTS for a specific cue during QC."""
cue_index: int
requested_at: datetime
new_text: Optional[str] = None # If provided, use this text instead of current VTT
status: Literal["pending", "processing", "completed", "failed"] = "pending"
error_message: Optional[str] = None
class AccessibleVideoEditState(BaseModel):
"""Editable state for accessible video during QC review."""
pause_points: list[PausePointData] = []
video_segments: list[VideoSegmentMetadata] = []
tts_regeneration_queue: list[TTSRegenerationRequest] = []
last_render_at: Optional[datetime] = None
whisper_refine_enabled: bool = False # Default: off (user enables if cue positions changed)
class LangOutput(BaseModel):
captions_vtt_gcs: Optional[str] = None
ad_vtt_gcs: Optional[str] = None
@ -73,6 +112,9 @@ class LangOutput(BaseModel):
accessible_video_method: Optional[Literal["overlay", "pause_insert"]] = None
retimed_captions_vtt_gcs: Optional[str] = None # Re-timed captions for pause-insert method
ad_cues_gcs_prefix: Optional[str] = None # GCS path prefix for per-cue MP3 segments
# QC editing state for accessible video
video_segments_gcs_prefix: Optional[str] = None # GCS prefix for persisted video segments
accessible_video_edit_state: Optional[AccessibleVideoEditState] = None
origin: Optional[Literal["translate", "transcreate", "gemini_translate", "video_native"]] = None
qa_notes: Optional[str] = None

View file

@ -122,3 +122,89 @@ class AccessibleVideoProgress(BaseModel):
error_message: Optional[str] = None
started_at: Optional[str] = None
completed_at: Optional[str] = None
# === QC Review Accessible Video Editing Schemas ===
class PausePointResponse(BaseModel):
"""Pause point timing data for QC editing."""
cue_index: int = Field(..., description="AD cue index this pause point belongs to")
original_ms: float = Field(..., description="Original pause point timestamp (ms)")
adjusted_ms: Optional[float] = Field(None, description="User-adjusted timestamp (ms)")
min_bound_ms: float = Field(..., description="Minimum allowed value (ms)")
max_bound_ms: float = Field(..., description="Maximum allowed value (ms)")
class VideoSegmentResponse(BaseModel):
"""Metadata for a video segment."""
segment_index: int = Field(..., description="0-based segment index")
start_ms: float = Field(..., description="Start timestamp in source video (ms)")
end_ms: float = Field(..., description="End timestamp in source video (ms)")
gcs_uri: str = Field(..., description="GCS path to segment MP4")
duration_ms: float = Field(..., description="Actual segment duration (ms)")
is_freeze_frame: bool = Field(False, description="True if freeze frame with AD audio")
cue_index: Optional[int] = Field(None, description="AD cue index (freeze frames only)")
class TTSRegenerationItem(BaseModel):
"""A queued TTS regeneration request."""
cue_index: int = Field(..., description="AD cue index to regenerate")
requested_at: str = Field(..., description="ISO timestamp when requested")
new_text: Optional[str] = Field(None, description="Override text (if provided)")
status: str = Field("pending", description="pending | processing | completed | failed")
error_message: Optional[str] = None
class AccessibleVideoEditStateResponse(BaseModel):
"""Current editable state for accessible video during QC review."""
pause_points: list[PausePointResponse] = Field(
default_factory=list,
description="All pause points with original and adjusted values"
)
video_segments: list[VideoSegmentResponse] = Field(
default_factory=list,
description="Video segment metadata for timeline display"
)
tts_regeneration_queue: list[TTSRegenerationItem] = Field(
default_factory=list,
description="Queued TTS regeneration requests"
)
last_render_at: Optional[str] = Field(
None,
description="ISO timestamp of last accessible video render"
)
total_duration_ms: float = Field(..., description="Total accessible video duration (ms)")
accessible_video_url: Optional[str] = Field(
None,
description="Signed URL for accessible video preview"
)
class PausePointUpdateRequest(BaseModel):
"""Request to update a pause point's adjusted timing."""
adjusted_ms: float = Field(
...,
description="New pause point timestamp in milliseconds"
)
class TTSRegenerationQueueRequest(BaseModel):
"""Request to queue TTS regeneration for specific cues."""
cue_indices: list[int] = Field(
...,
description="List of AD cue indices to regenerate"
)
class TTSRegenerationRemoveRequest(BaseModel):
"""Request to remove a cue from the TTS regeneration queue."""
cue_index: int = Field(..., description="AD cue index to remove from queue")
class RerenderAccessibleVideoRequest(BaseModel):
"""Request to re-render accessible video with QC changes."""
whisper_refine: bool = Field(
False,
description="Run Whisper pause point refinement (enable if cue count/position changed)"
)

View file

@ -22,6 +22,7 @@ from google.oauth2 import id_token
from ..core.config import settings
from ..core.logging import get_logger
from ..models.job import PausePointData, VideoSegmentMetadata
from ..schemas.accessible_video import AccessibleVideoMethod, GeminiAccessibleVideoAnalysis
logger = get_logger(__name__)
@ -150,6 +151,27 @@ class VideoRendererService:
# Log but don't fail on cleanup errors
logger.warning(f"Failed to delete GCS temp file {gcs_uri}: {e}")
def _upload_to_gcs_permanent(self, local_path: str, gcs_path: str) -> str:
"""
Upload local file to permanent GCS location (not temp).
Args:
local_path: Path to local file
gcs_path: Full GCS path within the bucket (e.g., "job_id/en/segments/seg_0.mp4")
Returns:
GCS URI (gs://bucket/gcs_path)
"""
client = self._get_gcs_client()
bucket = client.bucket(settings.gcs_bucket)
blob = bucket.blob(gcs_path)
blob.upload_from_filename(local_path)
gcs_uri = f"gs://{settings.gcs_bucket}/{gcs_path}"
logger.debug(f"Uploaded {local_path} to {gcs_uri} (permanent)")
return gcs_uri
async def _call_cloud_run_probe(self, gcs_uri: str) -> dict[str, Any]:
"""
Call Cloud Run FFmpeg service /probe endpoint.
@ -300,7 +322,9 @@ class VideoRendererService:
ad_segments: list[tuple[int, str]], # [(cue_index, mp3_path), ...]
analysis: dict[str, Any],
output_path: str,
) -> tuple[str, list[dict] | None]:
persist_segments: bool = False,
gcs_segment_prefix: str | None = None,
) -> tuple[str, list[dict] | None, list[VideoSegmentMetadata] | None, list[PausePointData] | None]:
"""
Render accessible video based on Gemini analysis.
@ -309,11 +333,15 @@ class VideoRendererService:
ad_segments: List of (cue_index, mp3_path) tuples for each AD segment
analysis: Gemini analysis dict with method and placements
output_path: Where to save the output MP4
persist_segments: If True, upload video segments to GCS for QC editing
gcs_segment_prefix: GCS path prefix for segments (e.g., "job_id/en/segments/")
Returns:
Tuple of (output_path, updated_placements)
Tuple of (output_path, updated_placements, segment_metadata, pause_points)
- output_path: Path to rendered accessible video
- updated_placements: Placements with actual_freeze_duration added (pause-insert only)
- segment_metadata: List of VideoSegmentMetadata if persist_segments=True, else None
- pause_points: List of PausePointData if persist_segments=True, else None
"""
method = analysis.get("method", "pause_insert")
@ -328,10 +356,12 @@ class VideoRendererService:
result_path = await self._render_overlay_method(
source_video_path, ad_segments, analysis, output_path
)
return (result_path, None)
return (result_path, None, None, None)
else:
return await self._render_pause_insert_method(
source_video_path, ad_segments, analysis, output_path
source_video_path, ad_segments, analysis, output_path,
persist_segments=persist_segments,
gcs_segment_prefix=gcs_segment_prefix
)
finally:
# Clean up cached source video from GCS
@ -460,13 +490,23 @@ class VideoRendererService:
ad_segments: list[tuple[int, str]],
analysis: dict[str, Any],
output_path: str,
) -> tuple[str, list[dict]]:
persist_segments: bool = False,
gcs_segment_prefix: str | None = None,
) -> tuple[str, list[dict], list[VideoSegmentMetadata] | None, list[PausePointData] | None]:
"""
Render with pause-insert method:
1. Split video at each pause point
2. Extract freeze frame at each pause point
3. Create freeze-frame segment with AD audio
4. Concatenate all segments
5. Optionally persist segments to GCS for QC editing
Args:
persist_segments: If True, upload segments to GCS and return metadata
gcs_segment_prefix: GCS path prefix (e.g., "job_id/en/segments/")
Returns:
Tuple of (output_path, updated_placements, segment_metadata, pause_points)
"""
logger.info(f"Starting pause-insert render for {source_video_path}")
placements = analysis.get("placements", [])
@ -489,7 +529,7 @@ class VideoRendererService:
if not sorted_placements:
logger.warning("No pause points found, copying source video")
await self._copy_video(source_video_path, output_path)
return (output_path, [])
return (output_path, [], None, None)
with tempfile.TemporaryDirectory() as temp_dir:
temp_dir_path = Path(temp_dir)
@ -725,7 +765,106 @@ class VideoRendererService:
updated["actual_freeze_duration"] = actual_durations[cue_index]
updated_placements.append(updated)
return (output_path, updated_placements)
# ============================================================
# PHASE 5: Persist segments to GCS for QC editing (optional)
# ============================================================
segment_metadata_list: list[VideoSegmentMetadata] | None = None
pause_point_data_list: list[PausePointData] | None = None
if persist_segments and gcs_segment_prefix:
logger.info(f"Persisting {len(segment_files)} segments to GCS at {gcs_segment_prefix}")
segment_metadata_list = []
segment_idx = 0
cumulative_time_ms = 0.0
for p in valid_placements:
i = p["index"]
# Upload video segment if it exists
if i in video_segment_paths:
local_path = video_segment_paths[i]
gcs_path = f"{gcs_segment_prefix}seg_{segment_idx}.mp4"
gcs_uri = self._upload_to_gcs_permanent(local_path, gcs_path)
segment_duration_ms = (p["pause_point"] - p["segment_start"]) * 1000
segment_metadata_list.append(VideoSegmentMetadata(
segment_index=segment_idx,
start_ms=cumulative_time_ms,
end_ms=cumulative_time_ms + segment_duration_ms,
gcs_uri=gcs_uri,
duration_ms=segment_duration_ms,
is_freeze_frame=False,
cue_index=None
))
cumulative_time_ms += segment_duration_ms
segment_idx += 1
# Upload freeze segment
freeze_local_path = freeze_segment_paths[i]
gcs_path = f"{gcs_segment_prefix}seg_{segment_idx}_freeze.mp4"
gcs_uri = self._upload_to_gcs_permanent(freeze_local_path, gcs_path)
freeze_duration_ms = p["actual_freeze_duration"] * 1000
segment_metadata_list.append(VideoSegmentMetadata(
segment_index=segment_idx,
start_ms=cumulative_time_ms,
end_ms=cumulative_time_ms + freeze_duration_ms,
gcs_uri=gcs_uri,
duration_ms=freeze_duration_ms,
is_freeze_frame=True,
cue_index=p["cue_index"]
))
cumulative_time_ms += freeze_duration_ms
segment_idx += 1
# Upload final segment if exists
if final_segment_path:
gcs_path = f"{gcs_segment_prefix}seg_{segment_idx}.mp4"
gcs_uri = self._upload_to_gcs_permanent(str(final_segment_path), gcs_path)
final_duration_ms = (source_duration - final_segment_start) * 1000
segment_metadata_list.append(VideoSegmentMetadata(
segment_index=segment_idx,
start_ms=cumulative_time_ms,
end_ms=cumulative_time_ms + final_duration_ms,
gcs_uri=gcs_uri,
duration_ms=final_duration_ms,
is_freeze_frame=False,
cue_index=None
))
logger.info(f"Persisted {len(segment_metadata_list)} segments to GCS")
# Build PausePointData list with bounds
pause_point_data_list = []
for idx, p in enumerate(valid_placements):
pause_ms = p["pause_point"] * 1000
# Compute min bound: end of previous AD segment (or 0 for first)
if idx == 0:
min_bound_ms = 0.0
else:
prev_p = valid_placements[idx - 1]
# End of previous freeze = pause_point + freeze_duration
min_bound_ms = (prev_p["pause_point"] + prev_p["actual_freeze_duration"]) * 1000
# Compute max bound: start of next pause point (or video end for last)
if idx == len(valid_placements) - 1:
max_bound_ms = source_duration * 1000
else:
next_p = valid_placements[idx + 1]
max_bound_ms = next_p["pause_point"] * 1000
pause_point_data_list.append(PausePointData(
cue_index=p["cue_index"],
original_ms=pause_ms,
adjusted_ms=None,
min_bound_ms=min_bound_ms,
max_bound_ms=max_bound_ms
))
logger.info(f"Built {len(pause_point_data_list)} pause point data entries")
return (output_path, updated_placements, segment_metadata_list, pause_point_data_list)
async def _get_video_duration(self, video_path: str) -> float:
"""Get video duration in seconds using ffprobe."""

View file

@ -249,11 +249,12 @@ async def ingest_and_ai_task_impl(job_id: str):
)
# Update job with AI results, detected language, and outputs
# Set status to TRANSLATING to trigger translation pipeline before QC
await db.jobs.update_one(
{"_id": job_id},
{
"$set": {
"status": JobStatus.PENDING_QC.value,
"status": JobStatus.TRANSLATING.value,
"source.language": source_language, # Update with detected language
"source.detected_language": detected_language,
"ai.ingestion_json": ai_result,
@ -267,22 +268,27 @@ async def ingest_and_ai_task_impl(job_id: str):
"$push": {
"review.history": {
"at": datetime.utcnow(),
"status": JobStatus.PENDING_QC.value,
"status": JobStatus.TRANSLATING.value,
"by": "system"
}
}
}
)
# Broadcast status update
broadcast_status_update(
job_id,
JobStatus.PENDING_QC.value,
job_id,
JobStatus.TRANSLATING.value,
job_title=job_title,
message=f"{job_title} has completed AI processing and is ready for QC review"
message=f"{job_title} AI processing complete, starting translation pipeline"
)
logger.info(f"Successfully completed ingestion and AI processing for job {job_id}")
logger.info(f"AI processing complete for job {job_id}, triggering translation pipeline")
# Trigger translation and synthesis pipeline
# This will process all translations, TTS, and accessible video BEFORE QC review
from .translate_and_synthesize import translate_and_synthesize_task
translate_and_synthesize_task.delay(job_id)
finally:
# Clean up temp file

View file

@ -11,7 +11,7 @@ from motor.motor_asyncio import AsyncIOMotorClient
from ..core.config import settings
from ..core.logging import get_logger
from ..lib.vtt import VTTParser
from ..models.job import JobStatus
from ..models.job import AccessibleVideoEditState, JobStatus, PausePointData, VideoSegmentMetadata
from ..schemas.whisper import CachedWhisperTranscript, CachedWordTimestamp
from ..services.gcs import gcs_service
from ..services.video_renderer import video_renderer_service
@ -198,15 +198,18 @@ async def _async_render_accessible_video(job_id: str, language: str):
analysis["warnings"] = existing_warnings + whisper_warnings
logger.info(f"Whisper refinement complete with {len(whisper_warnings)} warnings")
# 6. Render accessible video
# 6. Render accessible video with segment persistence for QC editing
output_video_path = os.path.join(temp_dir, "accessible_video.mp4")
gcs_segment_prefix = f"{job_id}/{language}/segments/"
logger.info(f"Rendering accessible video using {method} method...")
rendered_path, updated_placements = await video_renderer_service.render_accessible_video(
logger.info(f"Rendering accessible video using {method} method with segment persistence...")
rendered_path, updated_placements, segment_metadata, pause_points = await video_renderer_service.render_accessible_video(
source_video_path,
ad_segments,
analysis,
output_video_path
output_video_path,
persist_segments=True,
gcs_segment_prefix=gcs_segment_prefix
)
# Update analysis with actual freeze durations for VTT retiming
@ -214,6 +217,18 @@ async def _async_render_accessible_video(job_id: str, language: str):
analysis["placements"] = updated_placements
logger.info(f"Updated {len(updated_placements)} placements with actual freeze durations")
# Build edit state for QC review if segment metadata was returned
edit_state = None
if segment_metadata and pause_points:
edit_state = AccessibleVideoEditState(
pause_points=pause_points,
video_segments=segment_metadata,
tts_regeneration_queue=[],
last_render_at=datetime.utcnow(),
whisper_refine_enabled=False
)
logger.info(f"Built edit state with {len(segment_metadata)} segments and {len(pause_points)} pause points")
# 7. Upload rendered video to GCS
video_blob_path = f"{job_id}/{language}/accessible_video.mp4"
video_blob = gcs_service.bucket.blob(video_blob_path)
@ -248,10 +263,11 @@ async def _async_render_accessible_video(job_id: str, language: str):
retimed_captions_gcs_uri = f"gs://{settings.gcs_bucket}/{retimed_blob_path}"
logger.info(f"Uploaded re-timed captions to {retimed_captions_gcs_uri}")
# 9. Update job document with results
# 9. Update job document with results (including edit state for QC review)
update_fields = {
f"outputs.{language}.accessible_video_gcs": video_gcs_uri,
f"outputs.{language}.accessible_video_method": method,
f"outputs.{language}.video_segments_gcs_prefix": f"gs://{settings.gcs_bucket}/{gcs_segment_prefix}",
f"accessible_video_progress.{language}": {
"status": "completed",
"method": method,
@ -264,6 +280,10 @@ async def _async_render_accessible_video(job_id: str, language: str):
if retimed_captions_gcs_uri:
update_fields[f"outputs.{language}.retimed_captions_vtt_gcs"] = retimed_captions_gcs_uri
# Store edit state for QC review accessible video editing
if edit_state:
update_fields[f"outputs.{language}.accessible_video_edit_state"] = edit_state.model_dump()
await db.jobs.update_one(
{"_id": job_id},
{"$set": update_fields}
@ -423,6 +443,7 @@ async def _check_accessible_video_completion(job_id: str, db):
)
else:
# All videos completed successfully
# NEW WORKFLOW: Go to PENDING_QC for QC review (not PENDING_FINAL_REVIEW)
logger.info(f"All accessible videos complete for job {job_id}")
if job_doc["status"] in [JobStatus.TTS_GENERATING.value, JobStatus.RENDERING_VIDEO.value]:
@ -430,13 +451,13 @@ async def _check_accessible_video_completion(job_id: str, db):
{"_id": job_id},
{
"$set": {
"status": JobStatus.PENDING_FINAL_REVIEW.value,
"status": JobStatus.PENDING_QC.value,
"updated_at": datetime.utcnow()
},
"$push": {
"review.history": {
"at": datetime.utcnow(),
"status": JobStatus.PENDING_FINAL_REVIEW.value,
"status": JobStatus.PENDING_QC.value,
"by": "system"
}
}
@ -445,9 +466,9 @@ async def _check_accessible_video_completion(job_id: str, db):
broadcast_status_update(
job_id,
JobStatus.PENDING_FINAL_REVIEW.value,
JobStatus.PENDING_QC.value,
job_title=job_title,
message=f"{job_title} has all accessible videos complete - ready for Final Review"
message=f"{job_title} has all accessible videos complete - ready for QC Review"
)

View file

@ -0,0 +1,497 @@
"""Celery task for re-rendering accessible video with QC changes."""
import asyncio
import io
import os
import tempfile
from datetime import datetime
from celery.result import allow_join_result
from motor.motor_asyncio import AsyncIOMotorClient
from pydub import AudioSegment
from ..core.config import settings
from ..core.logging import get_logger
from ..lib.vtt import VTTParser
from ..models.job import AccessibleVideoEditState, JobStatus, PausePointData, VideoSegmentMetadata
from ..services.gcs import gcs_service
from ..services.video_renderer import video_renderer_service
from ..services.vtt_retimer import vtt_retimer_service
from ..services.whisper_service import WordTimestamp, whisper_service
from . import celery_app
from .render_accessible_video import _extract_audio_for_whisper, _dispatch_whisper_transcription
from .translate_and_synthesize import broadcast_status_update
from .tts_synthesis import dispatch_language_tts, parse_ad_cues, synthesize_cue_task
logger = get_logger(__name__)
@celery_app.task(bind=True, time_limit=7200, soft_time_limit=7000)
def rerender_accessible_video_task(
self,
job_id: str,
language: str,
regenerate_cue_indices: list[int],
whisper_refine: bool = False
):
"""
Re-render accessible video during QC review with selective TTS regeneration.
This task:
1. If regenerate_cue_indices not empty: synthesize new TTS for those cues
2. Download source video and existing segments/MP3s
3. If whisper_refine: run Whisper pause point refinement
4. Re-render video using updated pause points and new/existing TTS
5. Update job status back to PENDING_QC
Args:
job_id: Job ID
language: Language being re-rendered
regenerate_cue_indices: List of cue indices to regenerate TTS for
whisper_refine: Whether to run Whisper pause point refinement
"""
logger.info(
f"Starting accessible video re-render for job {job_id}/{language}: "
f"regenerate={regenerate_cue_indices}, whisper_refine={whisper_refine}"
)
try:
result = asyncio.run(_async_rerender_accessible_video(
job_id, language, regenerate_cue_indices, whisper_refine
))
logger.info(f"Accessible video re-render completed for job {job_id}/{language}")
return result
except Exception as e:
logger.error(f"Accessible video re-render failed for job {job_id}/{language}: {e}")
import traceback
logger.error(f"Full traceback: {traceback.format_exc()}")
# Update job status back to PENDING_QC with error
asyncio.run(_mark_rerender_failed(job_id, language, str(e)))
raise
async def _mark_rerender_failed(job_id: str, language: str, error_message: str):
"""Mark re-render as failed and return to PENDING_QC."""
client = AsyncIOMotorClient(settings.mongodb_uri)
db = client[settings.mongodb_db]
try:
await db.jobs.update_one(
{"_id": job_id},
{
"$set": {
"status": JobStatus.PENDING_QC.value,
f"outputs.{language}.accessible_video_edit_state.last_render_error": error_message,
"updated_at": datetime.utcnow()
},
"$push": {
"review.history": {
"at": datetime.utcnow(),
"status": JobStatus.PENDING_QC.value,
"by": "system",
"notes": f"Re-render failed for {language}: {error_message[:200]}"
}
}
}
)
job_doc = await db.jobs.find_one({"_id": job_id})
broadcast_status_update(
job_id,
JobStatus.PENDING_QC.value,
job_title=job_doc.get("title") if job_doc else None,
message=f"Re-render failed: {error_message[:100]}"
)
finally:
client.close()
async def _async_rerender_accessible_video(
job_id: str,
language: str,
regenerate_cue_indices: list[int],
whisper_refine: bool
):
"""Async implementation of accessible video re-rendering."""
logger.info(f"Async re-render started for job {job_id}/{language}")
client = AsyncIOMotorClient(settings.mongodb_uri)
db = client[settings.mongodb_db]
try:
# Get job details
job_doc = await db.jobs.find_one({"_id": job_id})
if not job_doc:
raise ValueError(f"Job {job_id} not found")
job_title = job_doc.get("title", "Untitled Job")
lang_output = job_doc.get("outputs", {}).get(language)
if not lang_output:
raise ValueError(f"No outputs found for language {language}")
edit_state = lang_output.get("accessible_video_edit_state")
if not edit_state:
raise ValueError(f"No edit state found for language {language}")
# Use TMPDIR env var if set
temp_base = os.environ.get('TMPDIR', None)
with tempfile.TemporaryDirectory(dir=temp_base) as temp_dir:
# 1. Download source video
source_video_gcs = job_doc["source"]["gcs_uri"]
source_blob_path = source_video_gcs.replace(f"gs://{settings.gcs_bucket}/", "")
source_video_path = os.path.join(temp_dir, "source.mp4")
logger.info(f"Downloading source video from {source_blob_path}")
source_blob = gcs_service.bucket.blob(source_blob_path)
source_blob.download_to_filename(source_video_path)
# 2. Regenerate TTS for queued cues (if any)
if regenerate_cue_indices:
logger.info(f"Regenerating TTS for cues: {regenerate_cue_indices}")
await _regenerate_tts_cues(
job_id, language, regenerate_cue_indices, job_doc, db, temp_dir
)
# Clear regeneration queue after successful synthesis
await db.jobs.update_one(
{"_id": job_id},
{
"$set": {
f"outputs.{language}.accessible_video_edit_state.tts_regeneration_queue": [],
"updated_at": datetime.utcnow()
}
}
)
# 3. Download AD VTT and per-cue MP3s
ad_vtt_gcs = lang_output.get("ad_vtt_gcs")
if not ad_vtt_gcs:
raise ValueError(f"No AD VTT found for language {language}")
ad_blob_path = ad_vtt_gcs.replace(f"gs://{settings.gcs_bucket}/", "")
ad_blob = gcs_service.bucket.blob(ad_blob_path)
ad_vtt_content = ad_blob.download_as_text()
# Download per-cue MP3s
ad_cues_prefix = lang_output.get("ad_cues_gcs_prefix")
if not ad_cues_prefix:
raise ValueError(f"No AD cue segments found for language {language}")
ad_segments = []
cue_durations = []
prefix_path = ad_cues_prefix.replace(f"gs://{settings.gcs_bucket}/", "")
blobs = list(gcs_service.bucket.list_blobs(prefix=prefix_path))
cue_blobs = [(b, int(b.name.split("_")[-1].replace(".mp3", ""))) for b in blobs if b.name.endswith(".mp3")]
cue_blobs.sort(key=lambda x: x[1])
for blob, cue_index in cue_blobs:
local_path = os.path.join(temp_dir, f"cue_{cue_index}.mp3")
blob.download_to_filename(local_path)
ad_segments.append((cue_index, local_path))
audio = AudioSegment.from_mp3(local_path)
duration = len(audio) / 1000.0
cue_durations.append(duration)
logger.info(f"Downloaded {len(ad_segments)} AD cue segments")
# 4. Build placements with adjusted pause points
method = lang_output.get("accessible_video_method", "pause_insert")
pause_points = edit_state.get("pause_points", [])
placements = _build_placements_with_adjustments(
ad_vtt_content, cue_durations, pause_points
)
logger.info(f"Built {len(placements)} placements with adjusted pause points")
analysis = {
"method": method,
"method_rationale": "QC re-render with user adjustments",
"placements": placements,
"total_added_duration": sum(cue_durations) if method == "pause_insert" else 0,
"warnings": []
}
# 5. Optionally run Whisper refinement
if whisper_refine and method == "pause_insert":
logger.info("Running Whisper pause point refinement...")
analysis, whisper_warnings = await _refine_pause_points_for_rerender(
job_id, source_video_path, analysis, db, temp_dir
)
if whisper_warnings:
analysis["warnings"] = analysis.get("warnings", []) + whisper_warnings
logger.info(f"Whisper refinement complete with {len(whisper_warnings)} warnings")
# 6. Render accessible video (persist segments again for future edits)
output_video_path = os.path.join(temp_dir, "accessible_video.mp4")
gcs_segment_prefix = f"{job_id}/{language}/segments/"
logger.info(f"Re-rendering accessible video using {method} method...")
rendered_path, updated_placements, segment_metadata, new_pause_points = await video_renderer_service.render_accessible_video(
source_video_path,
ad_segments,
analysis,
output_video_path,
persist_segments=True,
gcs_segment_prefix=gcs_segment_prefix
)
if updated_placements:
analysis["placements"] = updated_placements
# 7. Upload rendered video
video_blob_path = f"{job_id}/{language}/accessible_video.mp4"
video_blob = gcs_service.bucket.blob(video_blob_path)
video_blob.content_type = "video/mp4"
video_blob.upload_from_filename(output_video_path)
video_gcs_uri = f"gs://{settings.gcs_bucket}/{video_blob_path}"
logger.info(f"Uploaded re-rendered accessible video to {video_gcs_uri}")
# 8. Generate re-timed captions if pause-insert
retimed_captions_gcs_uri = None
if method == "pause_insert":
captions_vtt_gcs = lang_output.get("captions_vtt_gcs")
if captions_vtt_gcs:
captions_blob_path = captions_vtt_gcs.replace(f"gs://{settings.gcs_bucket}/", "")
captions_blob = gcs_service.bucket.blob(captions_blob_path)
original_captions_vtt = captions_blob.download_as_text()
retimed_captions = vtt_retimer_service.retime_for_pause_insert(
original_captions_vtt, analysis
)
retimed_blob_path = f"{job_id}/{language}/accessible_captions.vtt"
retimed_blob = gcs_service.bucket.blob(retimed_blob_path)
retimed_blob.content_type = "text/vtt"
retimed_blob.upload_from_string(retimed_captions, content_type="text/vtt")
retimed_captions_gcs_uri = f"gs://{settings.gcs_bucket}/{retimed_blob_path}"
logger.info(f"Uploaded re-timed captions to {retimed_captions_gcs_uri}")
# 9. Build new edit state
new_edit_state = None
if segment_metadata and new_pause_points:
new_edit_state = AccessibleVideoEditState(
pause_points=new_pause_points,
video_segments=segment_metadata,
tts_regeneration_queue=[],
last_render_at=datetime.utcnow(),
whisper_refine_enabled=whisper_refine
)
# 10. Update job document
update_fields = {
f"outputs.{language}.accessible_video_gcs": video_gcs_uri,
f"outputs.{language}.video_segments_gcs_prefix": f"gs://{settings.gcs_bucket}/{gcs_segment_prefix}",
"status": JobStatus.PENDING_QC.value,
"updated_at": datetime.utcnow()
}
if retimed_captions_gcs_uri:
update_fields[f"outputs.{language}.retimed_captions_vtt_gcs"] = retimed_captions_gcs_uri
if new_edit_state:
update_fields[f"outputs.{language}.accessible_video_edit_state"] = new_edit_state.model_dump()
await db.jobs.update_one(
{"_id": job_id},
{
"$set": update_fields,
"$push": {
"review.history": {
"at": datetime.utcnow(),
"status": JobStatus.PENDING_QC.value,
"by": "system",
"notes": f"Re-render complete for {language}"
}
}
}
)
# Broadcast completion
broadcast_status_update(
job_id,
JobStatus.PENDING_QC.value,
job_title=job_title,
message=f"Accessible video re-render complete for {language.upper()}"
)
logger.info(f"Accessible video re-render complete for job {job_id}/{language}")
finally:
client.close()
async def _regenerate_tts_cues(
job_id: str,
language: str,
cue_indices: list[int],
job_doc: dict,
db,
temp_dir: str
):
"""Regenerate TTS for specific cues using current VTT text."""
logger.info(f"Regenerating TTS for {len(cue_indices)} cues")
# Get AD VTT content
lang_output = job_doc.get("outputs", {}).get(language)
ad_vtt_gcs = lang_output.get("ad_vtt_gcs")
ad_blob_path = ad_vtt_gcs.replace(f"gs://{settings.gcs_bucket}/", "")
ad_blob = gcs_service.bucket.blob(ad_blob_path)
ad_vtt_content = ad_blob.download_as_text()
# Parse cues
cues = parse_ad_cues(ad_vtt_content)
# Get TTS preferences
tts_preferences = job_doc["requested_outputs"].get("tts_preferences", {})
voices_per_language = tts_preferences.get("voices_per_language", {})
voice_name = voices_per_language.get(language, tts_preferences.get("default_voice"))
provider = tts_preferences.get("provider", "gemini")
model = tts_preferences.get("model", "flash")
speed = tts_preferences.get("speed", 1.0)
style_preset = tts_preferences.get("style_preset", "neutral")
custom_style_prompt = tts_preferences.get("custom_style_prompt")
if style_preset == "custom" and custom_style_prompt:
style_prompt = custom_style_prompt
else:
style_prompt = settings.gemini_tts_style_prompts.get(style_preset, "")
# Synthesize each cue
for cue_idx in cue_indices:
if cue_idx >= len(cues):
logger.warning(f"Cue index {cue_idx} out of range, skipping")
continue
cue = cues[cue_idx]
logger.info(f"Synthesizing TTS for cue {cue_idx}: '{cue['text'][:50]}...'")
# Dispatch synthesis task
task_result = synthesize_cue_task.apply_async(
kwargs={
"job_id": job_id,
"language": language,
"cue_index": cue_idx,
"text": cue["text"],
"start_time": cue["start_time"],
"end_time": cue["end_time"],
"voice_name": voice_name,
"provider": provider,
"model": model,
"speed": speed,
"style_prompt": style_prompt
},
queue="tts"
)
# Wait for completion
poll_count = 0
while not task_result.ready():
await asyncio.sleep(1.0)
poll_count += 1
if poll_count % 30 == 0:
logger.info(f"Still waiting for TTS cue {cue_idx}...")
with allow_join_result():
result = task_result.get(timeout=120)
if not result.get("success"):
raise Exception(f"TTS synthesis failed for cue {cue_idx}: {result.get('error_message')}")
logger.info(f"TTS synthesis complete for cue {cue_idx}")
logger.info(f"All {len(cue_indices)} TTS cues regenerated")
def _build_placements_with_adjustments(
ad_vtt_content: str,
cue_durations: list[float],
pause_points: list[dict]
) -> list[dict]:
"""
Build placement instructions using adjusted pause points from QC edits.
Args:
ad_vtt_content: AD VTT content
cue_durations: TTS durations per cue
pause_points: Pause point data with original and adjusted values
Returns:
List of placement dicts
"""
cues = VTTParser.parse(ad_vtt_content)
# Build lookup of adjusted pause points by cue index
adjusted_pause_by_cue = {}
for pp in pause_points:
cue_idx = pp.get("cue_index")
adjusted = pp.get("adjusted_ms")
original = pp.get("original_ms")
# Use adjusted if set, otherwise original (in seconds)
pause_time_s = (adjusted if adjusted is not None else original) / 1000.0
adjusted_pause_by_cue[cue_idx] = pause_time_s
placements = []
for i, cue in enumerate(cues):
if i >= len(cue_durations):
break
# Get pause point: use adjusted value if available
pause_point = adjusted_pause_by_cue.get(i, cue.start_time)
placements.append({
"ad_cue_index": i,
"original_start_time": cue.start_time,
"original_end_time": cue.end_time,
"target_start_time": cue.start_time,
"ad_duration": cue_durations[i],
"pause_point": pause_point,
"resume_from": pause_point,
"pause_point_rationale": "User-adjusted during QC" if i in adjusted_pause_by_cue else "Original from VTT"
})
return placements
async def _refine_pause_points_for_rerender(
job_id: str,
video_path: str,
analysis: dict,
db,
temp_dir: str
) -> tuple[dict, list[str]]:
"""Run Whisper pause point refinement for re-render."""
logger.info(f"Refining pause points with Whisper for re-render of job {job_id}")
audio_path = os.path.join(temp_dir, "source_audio.mp3")
await _extract_audio_for_whisper(video_path, audio_path)
try:
words = await _dispatch_whisper_transcription(job_id, audio_path)
except Exception as e:
logger.error(f"Whisper transcription failed: {e}")
return analysis, [f"Whisper failed: {str(e)} - using current timestamps"]
if not words:
return analysis, ["No speech detected - using current timestamps"]
gaps = whisper_service.identify_speech_gaps(words)
refined_placements, warnings = whisper_service.refine_all_pause_points(
analysis.get("placements", []),
words,
gaps
)
refined_analysis = analysis.copy()
refined_analysis["placements"] = refined_placements
refined_analysis["whisper_refined"] = True
return refined_analysis, warnings

View file

@ -139,9 +139,16 @@ async def _async_translate_and_synthesize(job_id: str):
job_title = job_doc.get("title", "Untitled Job")
logger.info(f"✅ Found job document for {job_id} ({job_title}), status: {job_doc.get('status', 'UNKNOWN')}")
# Check for any approved status (English or non-English source)
if not JobStatus.is_approved(job_doc["status"]):
logger.warning(f"⚠️ Job {job_id} not in approved status (current: {job_doc['status']}), skipping")
# Check for valid status to process translation
# Valid statuses: approved_english, approved_source (legacy), or translating (new workflow)
current_status = job_doc["status"]
valid_statuses = [
JobStatus.APPROVED_ENGLISH.value,
JobStatus.APPROVED_SOURCE.value,
JobStatus.TRANSLATING.value,
]
if current_status not in valid_statuses:
logger.warning(f"⚠️ Job {job_id} not in valid status for translation (current: {current_status}), skipping")
return
# Get source language from job
@ -389,20 +396,21 @@ async def _async_translate_and_synthesize(job_id: str):
)
# Update final status
# NEW WORKFLOW: Translation pipeline now ends at PENDING_QC for QC review
# If accessible video is requested, the render task will handle the transition
# to PENDING_FINAL_REVIEW when all videos are complete
# to PENDING_QC when all videos are complete
if not accessible_video_requested:
await db.jobs.update_one(
{"_id": job_id},
{
"$set": {
"status": JobStatus.PENDING_FINAL_REVIEW.value,
"status": JobStatus.PENDING_QC.value,
"updated_at": datetime.utcnow()
},
"$push": {
"review.history": {
"at": datetime.utcnow(),
"status": JobStatus.PENDING_FINAL_REVIEW.value,
"status": JobStatus.PENDING_QC.value,
"by": "system"
}
}
@ -412,13 +420,13 @@ async def _async_translate_and_synthesize(job_id: str):
# Broadcast status update
broadcast_status_update(
job_id,
JobStatus.PENDING_FINAL_REVIEW.value,
JobStatus.PENDING_QC.value,
job_title=job_title,
message=f"{job_title} has finished translation and audio generation - ready for Final Review"
message=f"{job_title} has finished translation and audio generation - ready for QC Review"
)
else:
# When accessible video is requested, stay in TTS_GENERATING
# The render_accessible_video task will transition to PENDING_FINAL_REVIEW
# The render_accessible_video task will transition to PENDING_QC when all videos complete
logger.info(
f"Accessible video rendering triggered for job {job_id}. "
f"Staying in TTS_GENERATING until all videos are complete."

View file

@ -0,0 +1,112 @@
import { useState } from 'react';
interface RerenderControlsProps {
pendingRegenerations: number[];
pausePointsModified: boolean;
isRendering: boolean;
onRender: (options: { whisperRefine: boolean }) => void;
onClearQueue: () => void;
}
export function RerenderControls({
pendingRegenerations,
pausePointsModified,
isRendering,
onRender,
onClearQueue,
}: RerenderControlsProps) {
const [whisperRefine, setWhisperRefine] = useState(false);
const hasChanges = pendingRegenerations.length > 0 || pausePointsModified;
return (
<div className="bg-purple-50 border border-purple-200 rounded-lg p-4">
<div className="flex items-start justify-between gap-4">
<div className="flex-1">
<h4 className="text-sm font-semibold text-purple-900">
Render Accessible Video Changes
</h4>
{pendingRegenerations.length > 0 && (
<div className="mt-2 flex items-center gap-2">
<span className="text-sm text-purple-700">
{pendingRegenerations.length} TTS cue(s) queued for regeneration
</span>
<button
onClick={onClearQueue}
className="text-xs text-purple-600 hover:text-purple-800 underline"
>
Clear queue
</button>
</div>
)}
{pausePointsModified && (
<p className="mt-1 text-sm text-purple-700">
Pause points have been adjusted
</p>
)}
{!hasChanges && (
<p className="mt-1 text-sm text-gray-500">
No changes to render. Adjust pause points or queue TTS regenerations.
</p>
)}
</div>
<div className="flex flex-col items-end gap-3">
<div className="flex items-center gap-2">
<label className="flex items-center text-sm">
<input
type="checkbox"
checked={whisperRefine}
onChange={(e) => setWhisperRefine(e.target.checked)}
className="mr-2 rounded"
disabled={isRendering}
/>
Run Whisper pause refinement
</label>
</div>
<p className="text-xs text-gray-500 max-w-xs text-right">
Enable if you changed the number or position of AD cues. Skip if you only adjusted pause timing.
</p>
<button
onClick={() => onRender({ whisperRefine })}
disabled={!hasChanges || isRendering}
className={`px-4 py-2 text-sm font-medium rounded transition-colors ${
!hasChanges || isRendering
? 'bg-gray-200 text-gray-500 cursor-not-allowed'
: 'bg-purple-600 text-white hover:bg-purple-700'
}`}
>
{isRendering ? (
<span className="flex items-center gap-2">
<svg className="animate-spin h-4 w-4" viewBox="0 0 24 24">
<circle
className="opacity-25"
cx="12"
cy="12"
r="10"
stroke="currentColor"
strokeWidth="4"
fill="none"
/>
<path
className="opacity-75"
fill="currentColor"
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
/>
</svg>
Rendering...
</span>
) : (
'Render Changes'
)}
</button>
</div>
</div>
</div>
);
}

View file

@ -0,0 +1,165 @@
import { useState, useEffect, useRef } from 'react';
import type { PausePointData } from '../../types/api';
interface PausePointEditorProps {
pausePoint: PausePointData;
position: { x: number; y: number };
onSave: (adjustedMs: number) => void;
onCancel: () => void;
onRegenerateTTS: () => void;
isRegenerationQueued: boolean;
}
export function PausePointEditor({
pausePoint,
position,
onSave,
onCancel,
onRegenerateTTS,
isRegenerationQueued,
}: PausePointEditorProps) {
const effectiveMs = pausePoint.adjusted_ms ?? pausePoint.original_ms;
const [value, setValue] = useState(effectiveMs);
const [error, setError] = useState<string | null>(null);
const inputRef = useRef<HTMLInputElement>(null);
useEffect(() => {
setValue(pausePoint.adjusted_ms ?? pausePoint.original_ms);
}, [pausePoint]);
useEffect(() => {
// Focus input on mount
inputRef.current?.focus();
inputRef.current?.select();
}, []);
const validateValue = (ms: number): string | null => {
if (ms < pausePoint.min_bound_ms) {
return `Cannot be earlier than ${formatMs(pausePoint.min_bound_ms)}`;
}
if (ms > pausePoint.max_bound_ms) {
return `Cannot be later than ${formatMs(pausePoint.max_bound_ms)}`;
}
return null;
};
const handleValueChange = (newValue: number) => {
setValue(newValue);
setError(validateValue(newValue));
};
const handleSave = () => {
const validationError = validateValue(value);
if (validationError) {
setError(validationError);
return;
}
onSave(value);
};
const handleKeyDown = (e: React.KeyboardEvent) => {
if (e.key === 'Enter') {
handleSave();
} else if (e.key === 'Escape') {
onCancel();
}
};
const formatMs = (ms: number) => {
const totalSeconds = Math.floor(ms / 1000);
const minutes = Math.floor(totalSeconds / 60);
const seconds = totalSeconds % 60;
const milliseconds = Math.floor(ms % 1000);
return `${minutes}:${seconds.toString().padStart(2, '0')}.${milliseconds.toString().padStart(3, '0')}`;
};
return (
<div
className="fixed z-50 bg-white border border-gray-200 rounded-lg shadow-lg p-4 min-w-64"
style={{ left: position.x, top: position.y }}
>
<div className="flex justify-between items-center mb-3">
<h4 className="text-sm font-semibold text-gray-900">
Pause Point {pausePoint.cue_index}
</h4>
<button
onClick={onCancel}
className="text-gray-400 hover:text-gray-600"
>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
</svg>
</button>
</div>
<div className="space-y-3">
{/* Current value input */}
<div>
<label className="block text-xs text-gray-500 mb-1">
Pause time (milliseconds)
</label>
<div className="flex items-center gap-2">
<input
ref={inputRef}
type="number"
value={value}
onChange={(e) => handleValueChange(Number(e.target.value))}
onKeyDown={handleKeyDown}
step="1"
className={`flex-1 px-2 py-1 text-sm border rounded ${
error ? 'border-red-500' : 'border-gray-300'
} focus:outline-none focus:ring-2 focus:ring-blue-500`}
/>
<span className="text-xs text-gray-500">{formatMs(value)}</span>
</div>
{error && (
<p className="text-xs text-red-500 mt-1">{error}</p>
)}
</div>
{/* Bounds info */}
<div className="text-xs text-gray-500 bg-gray-50 rounded p-2">
<div>Min: {formatMs(pausePoint.min_bound_ms)}</div>
<div>Max: {formatMs(pausePoint.max_bound_ms)}</div>
<div>Original: {formatMs(pausePoint.original_ms)}</div>
</div>
{/* TTS Regeneration */}
<div className="border-t pt-3">
<button
onClick={onRegenerateTTS}
disabled={isRegenerationQueued}
className={`w-full text-sm py-1.5 rounded ${
isRegenerationQueued
? 'bg-amber-100 text-amber-700 cursor-not-allowed'
: 'bg-orange-100 text-orange-700 hover:bg-orange-200'
}`}
>
{isRegenerationQueued ? 'Regeneration Queued' : 'Queue TTS Regeneration'}
</button>
</div>
{/* Action buttons */}
<div className="flex gap-2">
<button
onClick={onCancel}
className="flex-1 px-3 py-1.5 text-sm border border-gray-300 rounded hover:bg-gray-50"
>
Cancel
</button>
<button
onClick={handleSave}
disabled={!!error}
className={`flex-1 px-3 py-1.5 text-sm text-white rounded ${
error
? 'bg-gray-300 cursor-not-allowed'
: 'bg-blue-600 hover:bg-blue-700'
}`}
>
Save
</button>
</div>
</div>
</div>
);
}

View file

@ -0,0 +1,201 @@
import { useState, useRef, useCallback } from 'react';
import type { VideoSegmentMetadata, PausePointData } from '../../types/api';
import { PausePointEditor } from './PausePointEditor';
interface TimelinePreviewProps {
segments: VideoSegmentMetadata[];
pausePoints: PausePointData[];
totalDurationMs: number;
currentTimeMs: number;
onSegmentClick: (segment: VideoSegmentMetadata) => void;
onPausePointClick: (pausePoint: PausePointData) => void;
onPausePointUpdate: (cueIndex: number, adjustedMs: number) => void;
onRegenerateTTS: (cueIndex: number) => void;
regenerationQueue: number[];
}
export function TimelinePreview({
segments,
pausePoints,
totalDurationMs,
currentTimeMs,
onSegmentClick,
onPausePointClick,
onPausePointUpdate,
onRegenerateTTS,
regenerationQueue,
}: TimelinePreviewProps) {
const [selectedPausePoint, setSelectedPausePoint] = useState<PausePointData | null>(null);
const [editorPosition, setEditorPosition] = useState({ x: 0, y: 0 });
const timelineRef = useRef<HTMLDivElement>(null);
const getPositionPercent = useCallback(
(ms: number) => (totalDurationMs > 0 ? (ms / totalDurationMs) * 100 : 0),
[totalDurationMs]
);
const handlePausePointMarkerClick = (
e: React.MouseEvent,
pausePoint: PausePointData
) => {
e.stopPropagation();
const rect = (e.target as HTMLElement).getBoundingClientRect();
setEditorPosition({ x: rect.left, y: rect.bottom + 8 });
setSelectedPausePoint(pausePoint);
onPausePointClick(pausePoint);
};
const handleSegmentClick = (segment: VideoSegmentMetadata) => {
onSegmentClick(segment);
if (segment.is_freeze_frame && segment.cue_index !== null) {
// Highlight the AD cue
const pausePoint = pausePoints.find(pp => pp.cue_index === segment.cue_index);
if (pausePoint) {
onPausePointClick(pausePoint);
}
}
};
const handleEditorSave = (adjustedMs: number) => {
if (selectedPausePoint) {
onPausePointUpdate(selectedPausePoint.cue_index, adjustedMs);
setSelectedPausePoint(null);
}
};
const handleEditorClose = () => {
setSelectedPausePoint(null);
};
const formatTime = (ms: number) => {
const totalSeconds = Math.floor(ms / 1000);
const minutes = Math.floor(totalSeconds / 60);
const seconds = totalSeconds % 60;
const milliseconds = Math.floor(ms % 1000);
return `${minutes}:${seconds.toString().padStart(2, '0')}.${milliseconds.toString().padStart(3, '0')}`;
};
return (
<div className="relative">
{/* Timeline container */}
<div
ref={timelineRef}
className="relative h-16 bg-gray-100 rounded-lg overflow-hidden"
>
{/* Segments */}
{segments.map((segment) => {
const leftPercent = getPositionPercent(segment.start_ms);
const widthPercent = getPositionPercent(segment.duration_ms);
const isRegenerationQueued =
segment.is_freeze_frame &&
segment.cue_index !== null &&
regenerationQueue.includes(segment.cue_index);
return (
<div
key={segment.segment_index}
className={`absolute top-0 h-full cursor-pointer transition-all hover:opacity-90 ${
segment.is_freeze_frame
? isRegenerationQueued
? 'bg-amber-400'
: 'bg-orange-400'
: 'bg-blue-400'
}`}
style={{
left: `${leftPercent}%`,
width: `${Math.max(widthPercent, 0.5)}%`,
}}
onClick={() => handleSegmentClick(segment)}
title={
segment.is_freeze_frame
? `AD Cue ${segment.cue_index}${isRegenerationQueued ? ' (Regenerate queued)' : ''}`
: `Video segment ${segment.segment_index}`
}
>
{/* Cue index label for freeze frames */}
{segment.is_freeze_frame && segment.cue_index !== null && widthPercent > 2 && (
<div className="absolute inset-0 flex items-center justify-center">
<span className="text-xs font-bold text-white drop-shadow">
{segment.cue_index}
</span>
</div>
)}
</div>
);
})}
{/* Pause point markers */}
{pausePoints.map((pausePoint) => {
const effectiveMs = pausePoint.adjusted_ms ?? pausePoint.original_ms;
const leftPercent = getPositionPercent(effectiveMs);
const isAdjusted = pausePoint.adjusted_ms !== null;
return (
<div
key={`pause-${pausePoint.cue_index}`}
className={`absolute top-0 w-1 h-full cursor-pointer z-10 ${
isAdjusted ? 'bg-purple-600' : 'bg-red-600'
} hover:w-2 transition-all`}
style={{ left: `${leftPercent}%` }}
onClick={(e) => handlePausePointMarkerClick(e, pausePoint)}
title={`Pause point ${pausePoint.cue_index}: ${formatTime(effectiveMs)}${
isAdjusted ? ' (adjusted)' : ''
}`}
/>
);
})}
{/* Current time indicator */}
<div
className="absolute top-0 w-0.5 h-full bg-green-500 z-20 pointer-events-none"
style={{ left: `${getPositionPercent(currentTimeMs)}%` }}
/>
</div>
{/* Time labels */}
<div className="flex justify-between mt-1 text-xs text-gray-500">
<span>0:00</span>
<span>{formatTime(totalDurationMs)}</span>
</div>
{/* Legend */}
<div className="flex gap-4 mt-2 text-xs">
<div className="flex items-center gap-1">
<div className="w-3 h-3 bg-blue-400 rounded" />
<span>Video</span>
</div>
<div className="flex items-center gap-1">
<div className="w-3 h-3 bg-orange-400 rounded" />
<span>AD Audio</span>
</div>
<div className="flex items-center gap-1">
<div className="w-3 h-3 bg-amber-400 rounded" />
<span>Regenerate Queued</span>
</div>
<div className="flex items-center gap-1">
<div className="w-1 h-3 bg-red-600" />
<span>Pause Point</span>
</div>
<div className="flex items-center gap-1">
<div className="w-1 h-3 bg-purple-600" />
<span>Adjusted</span>
</div>
</div>
{/* Pause point editor popover */}
{selectedPausePoint && (
<PausePointEditor
pausePoint={selectedPausePoint}
position={editorPosition}
onSave={handleEditorSave}
onCancel={handleEditorClose}
onRegenerateTTS={() => {
onRegenerateTTS(selectedPausePoint.cue_index);
setSelectedPausePoint(null);
}}
isRegenerationQueued={regenerationQueue.includes(selectedPausePoint.cue_index)}
/>
)}
</div>
);
}

View file

@ -0,0 +1,2 @@
export { TimelinePreview } from './TimelinePreview';
export { PausePointEditor } from './PausePointEditor';

View file

@ -0,0 +1,116 @@
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import { apiClient } from '../lib/api';
/**
* Hook to fetch accessible video edit state for QC review
*/
export function useAccessibleVideoEditState(jobId: string, language: string) {
return useQuery({
queryKey: ['jobs', jobId, 'accessible-video', language, 'edit-state'],
queryFn: () => apiClient.getAccessibleVideoEditState(jobId, language),
enabled: !!jobId && !!language,
staleTime: 30000, // 30 seconds
refetchOnWindowFocus: false,
});
}
/**
* Hook to update a pause point's adjusted timing
*/
export function useUpdatePausePoint() {
const queryClient = useQueryClient();
return useMutation({
mutationFn: ({
jobId,
language,
cueIndex,
adjustedMs,
}: {
jobId: string;
language: string;
cueIndex: number;
adjustedMs: number;
}) => apiClient.updatePausePoint(jobId, language, cueIndex, adjustedMs),
onSuccess: (_, { jobId, language }) => {
queryClient.invalidateQueries({
queryKey: ['jobs', jobId, 'accessible-video', language, 'edit-state'],
});
},
});
}
/**
* Hook to queue TTS regeneration for specific cues
*/
export function useQueueTTSRegeneration() {
const queryClient = useQueryClient();
return useMutation({
mutationFn: ({
jobId,
language,
cueIndices,
}: {
jobId: string;
language: string;
cueIndices: number[];
}) => apiClient.queueTTSRegeneration(jobId, language, cueIndices),
onSuccess: (_, { jobId, language }) => {
queryClient.invalidateQueries({
queryKey: ['jobs', jobId, 'accessible-video', language, 'edit-state'],
});
},
});
}
/**
* Hook to remove a cue from the TTS regeneration queue
*/
export function useRemoveTTSRegeneration() {
const queryClient = useQueryClient();
return useMutation({
mutationFn: ({
jobId,
language,
cueIndex,
}: {
jobId: string;
language: string;
cueIndex: number;
}) => apiClient.removeTTSRegeneration(jobId, language, cueIndex),
onSuccess: (_, { jobId, language }) => {
queryClient.invalidateQueries({
queryKey: ['jobs', jobId, 'accessible-video', language, 'edit-state'],
});
},
});
}
/**
* Hook to trigger accessible video re-render with QC changes
*/
export function useRerenderAccessibleVideo() {
const queryClient = useQueryClient();
return useMutation({
mutationFn: ({
jobId,
language,
whisperRefine = false,
}: {
jobId: string;
language: string;
whisperRefine?: boolean;
}) => apiClient.rerenderAccessibleVideo(jobId, language, whisperRefine),
onSuccess: (_, { jobId, language }) => {
// Invalidate job and edit state queries
queryClient.invalidateQueries({ queryKey: ['jobs', jobId] });
queryClient.invalidateQueries({
queryKey: ['jobs', jobId, 'accessible-video', language, 'edit-state'],
});
queryClient.invalidateQueries({ queryKey: ['jobs'] });
},
});
}

View file

@ -33,6 +33,8 @@ import type {
ReviewNoteCreateRequest,
ReviewNoteUpdateRequest,
ReviewNotesListResponse,
AccessibleVideoEditState,
PausePointData,
} from '../types/api';
const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000';
@ -386,6 +388,60 @@ class ApiClient {
async deleteReviewNote(jobId: string, noteId: string): Promise<void> {
await this.client.delete(`/jobs/${jobId}/review-notes/${noteId}`);
}
// Accessible Video QC Editing endpoints
async getAccessibleVideoEditState(jobId: string, language: string): Promise<AccessibleVideoEditState> {
const response = await this.client.get(`/jobs/${jobId}/accessible-video/${language}/edit-state`);
return response.data;
}
async updatePausePoint(
jobId: string,
language: string,
cueIndex: number,
adjustedMs: number
): Promise<PausePointData> {
const response = await this.client.patch(
`/jobs/${jobId}/accessible-video/${language}/pause-points/${cueIndex}`,
{ adjusted_ms: adjustedMs }
);
return response.data;
}
async queueTTSRegeneration(
jobId: string,
language: string,
cueIndices: number[]
): Promise<{ message: string; queued_cues: number[] }> {
const response = await this.client.post(
`/jobs/${jobId}/accessible-video/${language}/tts-regeneration`,
{ cue_indices: cueIndices }
);
return response.data;
}
async removeTTSRegeneration(
jobId: string,
language: string,
cueIndex: number
): Promise<{ message: string }> {
const response = await this.client.delete(
`/jobs/${jobId}/accessible-video/${language}/tts-regeneration/${cueIndex}`
);
return response.data;
}
async rerenderAccessibleVideo(
jobId: string,
language: string,
whisperRefine: boolean = false
): Promise<Job> {
const response = await this.client.post(
`/jobs/${jobId}/accessible-video/${language}/re-render`,
{ whisper_refine: whisperRefine }
);
return response.data;
}
}
export const apiClient = new ApiClient();

View file

@ -1,12 +1,21 @@
import { useState, useEffect } from 'react';
import { useState, useEffect, useRef } from 'react';
import { useParams, useNavigate } from 'react-router-dom';
import { useJob, useApproveEnglish, useRejectJob, useJobVttContent, useUpdateJobVtt, useJobDownloads, useAdjustVttTiming } from '../../hooks/useJob';
import {
useAccessibleVideoEditState,
useUpdatePausePoint,
useQueueTTSRegeneration,
useRemoveTTSRegeneration,
useRerenderAccessibleVideo,
} from '../../hooks/useAccessibleVideoEdit';
import { StatusBadge } from '../../components/StatusBadge';
import { VttEditor } from '../../components/VttEditor/VttEditor';
import { VideoWithCaptions } from '../../components/VideoWithCaptions';
import { VoiceSelector } from '../../components/VoiceSelector';
import { TimelinePreview } from '../../components/TimelinePreview';
import { RerenderControls } from '../../components/RerenderControls';
import { useToastContext } from '../../contexts/ToastContext';
import type { TTSPreferences, AccessibleVideoMethod } from '../../types/api';
import type { TTSPreferences, AccessibleVideoMethod, VideoSegmentMetadata, PausePointData } from '../../types/api';
export function QCDetail() {
const { id } = useParams<{ id: string }>();
@ -15,13 +24,42 @@ export function QCDetail() {
const { data: job, isLoading, error } = useJob(id!);
// Get source language from job (default to 'en' for backwards compatibility)
const sourceLanguage = job?.source?.language || 'en';
const { data: vttContent, isLoading: vttLoading } = useJobVttContent(id!, sourceLanguage);
// Get all available languages from outputs (after workflow change, translations happen before QC)
const availableLanguages = job?.outputs ? Object.keys(job.outputs) : [sourceLanguage];
// Language selection for QC review
const [selectedLanguage, setSelectedLanguage] = useState<string>(sourceLanguage);
// Video mode: original video or accessible video with AD
const [videoMode, setVideoMode] = useState<'original' | 'accessible'>('original');
// Track video current time for timeline
const [currentVideoTimeMs, setCurrentVideoTimeMs] = useState(0);
const videoRef = useRef<HTMLVideoElement | null>(null);
// Local state for pending regenerations (cleared on successful re-render)
const [pendingRegenerations, setPendingRegenerations] = useState<number[]>([]);
const [pausePointsModified, setPausePointsModified] = useState(false);
// Fetch VTT content for selected language
const { data: vttContent, isLoading: vttLoading } = useJobVttContent(id!, selectedLanguage);
const { data: downloads } = useJobDownloads(id!);
// Fetch accessible video edit state for selected language
const { data: editState, isLoading: editStateLoading } = useAccessibleVideoEditState(id!, selectedLanguage);
// Mutations for accessible video editing
const updatePausePointMutation = useUpdatePausePoint();
const queueTTSRegenerationMutation = useQueueTTSRegeneration();
const removeTTSRegenerationMutation = useRemoveTTSRegeneration();
const rerenderMutation = useRerenderAccessibleVideo();
const approveEnglishMutation = useApproveEnglish();
const rejectJobMutation = useRejectJob();
const updateVttMutation = useUpdateJobVtt();
const adjustTimingMutation = useAdjustVttTiming();
const [reviewNotes, setReviewNotes] = useState('');
const [showRejectForm, setShowRejectForm] = useState(false);
const [captionsVtt, setCaptionsVtt] = useState('');
@ -44,13 +82,44 @@ export function QCDetail() {
});
const [accessibleVideoMethod, setAccessibleVideoMethod] = useState<AccessibleVideoMethod>('pause_insert');
const isProcessing = approveEnglishMutation.isPending || rejectJobMutation.isPending || updateVttMutation.isPending || adjustTimingMutation.isPending;
const isProcessing = approveEnglishMutation.isPending || rejectJobMutation.isPending || updateVttMutation.isPending || adjustTimingMutation.isPending || rerenderMutation.isPending;
const isRendering = rerenderMutation.isPending || job?.status === 'rendering_qc';
// Get video URL from downloads
// Initialize selected language from source language when job loads
useEffect(() => {
if (sourceLanguage) {
setSelectedLanguage(sourceLanguage);
}
}, [sourceLanguage]);
// Reset states when language changes
useEffect(() => {
setPendingRegenerations([]);
setPausePointsModified(false);
}, [selectedLanguage]);
// Sync pending regenerations from server edit state
useEffect(() => {
if (editState?.tts_regeneration_queue) {
const queuedIndices = editState.tts_regeneration_queue
.filter(item => item.status === 'pending')
.map(item => item.cue_index);
setPendingRegenerations(queuedIndices);
}
}, [editState?.tts_regeneration_queue]);
// Get video URL from downloads - support both original and accessible video
const videoUrl = typeof downloads?.downloads?.source_video === 'string'
? downloads.downloads.source_video
: '';
// Get accessible video URL for selected language
const accessibleVideoUrl = editState?.accessible_video_url || (
downloads?.downloads?.[selectedLanguage] &&
typeof downloads.downloads[selectedLanguage] === 'object' &&
(downloads.downloads[selectedLanguage] as { accessible_video_mp4?: string }).accessible_video_mp4
) || '';
// Load VTT content when fetched
useEffect(() => {
if (vttContent) {
@ -128,7 +197,7 @@ export function QCDetail() {
data: {
captions_vtt: captionsVtt,
audio_description_vtt: adVtt,
language: sourceLanguage // Use source language instead of hardcoded 'en'
language: selectedLanguage // Use selected language for multi-language support
}
});
setHasUnsavedChanges(false);
@ -201,7 +270,7 @@ export function QCDetail() {
await adjustTimingMutation.mutateAsync({
id,
offsetSeconds: timingOffset,
language: sourceLanguage, // Use source language instead of hardcoded 'en'
language: selectedLanguage, // Use selected language for multi-language support
adjustCaptions,
adjustAudioDescription,
});
@ -214,6 +283,99 @@ export function QCDetail() {
}
};
// Accessible video editing handlers
const handleSegmentClick = (segment: VideoSegmentMetadata) => {
// Seek video to segment start
if (videoRef.current) {
videoRef.current.currentTime = segment.start_ms / 1000;
}
// If it's an AD segment with a cue, highlight the cue in the VTT editor
if (segment.is_freeze_frame && segment.cue_index !== null) {
// Could dispatch an event or set state to highlight the cue
console.log(`AD cue ${segment.cue_index} clicked`);
}
};
const handlePausePointClick = (pausePoint: PausePointData) => {
// Seek video to pause point
const effectiveMs = pausePoint.adjusted_ms ?? pausePoint.original_ms;
if (videoRef.current) {
videoRef.current.currentTime = effectiveMs / 1000;
}
};
const handlePausePointUpdate = async (cueIndex: number, adjustedMs: number) => {
if (!id) return;
try {
await updatePausePointMutation.mutateAsync({
jobId: id,
language: selectedLanguage,
cueIndex,
adjustedMs,
});
setPausePointsModified(true);
toast.toastOnly.success(`Pause point ${cueIndex} updated to ${adjustedMs}ms`);
} catch (error) {
console.error('Failed to update pause point:', error);
toast.toastOnly.error('Failed to update pause point. Please try again.');
}
};
const handleRegenerateTTS = async (cueIndex: number) => {
if (!id) return;
try {
await queueTTSRegenerationMutation.mutateAsync({
jobId: id,
language: selectedLanguage,
cueIndices: [cueIndex],
});
setPendingRegenerations(prev => [...prev, cueIndex]);
toast.toastOnly.success(`TTS regeneration queued for cue ${cueIndex}`);
} catch (error) {
console.error('Failed to queue TTS regeneration:', error);
toast.toastOnly.error('Failed to queue TTS regeneration. Please try again.');
}
};
const handleClearRegenerationQueue = async () => {
if (!id) return;
// Remove all pending regenerations one by one
for (const cueIndex of pendingRegenerations) {
try {
await removeTTSRegenerationMutation.mutateAsync({
jobId: id,
language: selectedLanguage,
cueIndex,
});
} catch (error) {
console.error(`Failed to remove cue ${cueIndex} from queue:`, error);
}
}
setPendingRegenerations([]);
toast.toastOnly.success('Regeneration queue cleared');
};
const handleRender = async ({ whisperRefine }: { whisperRefine: boolean }) => {
if (!id) return;
try {
await rerenderMutation.mutateAsync({
jobId: id,
language: selectedLanguage,
whisperRefine,
});
toast.toastOnly.success('Accessible video rendering started');
setPendingRegenerations([]);
setPausePointsModified(false);
} catch (error) {
console.error('Failed to start re-render:', error);
toast.toastOnly.error('Failed to start re-render. Please try again.');
}
};
if (isLoading || vttLoading) {
return (
<div className="container mx-auto px-4 py-8">
@ -275,6 +437,49 @@ export function QCDetail() {
</div>
)}
{/* Language Tabs - shown when multiple languages are available */}
{availableLanguages.length > 1 && (
<div className="mb-6">
<div className="flex items-center gap-2 mb-2">
<span className="text-sm font-medium text-gray-700">Language:</span>
<span className="text-xs text-gray-500">({availableLanguages.length} translations available)</span>
</div>
<div className="flex flex-wrap gap-2">
{availableLanguages.map((lang) => (
<button
key={lang}
onClick={() => setSelectedLanguage(lang)}
className={`px-4 py-2 text-sm font-medium rounded-lg transition-colors ${
selectedLanguage === lang
? 'bg-indigo-600 text-white'
: 'bg-gray-100 text-gray-700 hover:bg-gray-200'
}`}
>
{lang.toUpperCase()}
{lang === sourceLanguage && (
<span className="ml-1 text-xs opacity-75">(Source)</span>
)}
</button>
))}
</div>
</div>
)}
{/* Rendering Status Banner */}
{isRendering && (
<div className="mb-6 p-4 bg-purple-50 border border-purple-200 rounded-md">
<div className="flex items-center gap-3">
<svg className="animate-spin h-5 w-5 text-purple-600" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" fill="none" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
<span className="font-medium text-purple-900">
Accessible video is being re-rendered with your changes...
</span>
</div>
</div>
)}
{/* View Mode Toggle and Shortcuts */}
<div className="mb-6 flex items-center justify-between">
<div className="flex items-center space-x-4">
@ -322,21 +527,119 @@ export function QCDetail() {
{(viewMode === 'side-by-side' || viewMode === 'video-only') && (
<div className="mb-8">
<div className="bg-white border border-gray-200 rounded-lg p-6">
<h3 className="text-lg font-medium text-gray-900 mb-4">Video Preview</h3>
{videoUrl ? (
<VideoWithCaptions
videoUrl={videoUrl}
captionsVtt={captionsVtt}
audioDescriptionVtt={adVtt}
sourceLanguage={sourceLanguage}
/>
) : (
<div className="text-center py-8 text-gray-500">
<svg className="w-12 h-12 mx-auto mb-4 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
</svg>
<p>Video preview will be available once processing begins</p>
</div>
<div className="flex items-center justify-between mb-4">
<h3 className="text-lg font-medium text-gray-900">Video Preview</h3>
{/* Video Mode Tabs - only show if accessible video is available */}
{(accessibleVideoUrl || editState?.video_segments?.length) && (
<div className="flex rounded-lg border border-gray-300 overflow-hidden">
<button
onClick={() => setVideoMode('original')}
className={`px-4 py-1.5 text-sm font-medium transition-colors ${
videoMode === 'original'
? 'bg-blue-600 text-white'
: 'bg-white text-gray-700 hover:bg-gray-50'
}`}
>
Original Video
</button>
<button
onClick={() => setVideoMode('accessible')}
className={`px-4 py-1.5 text-sm font-medium border-l border-gray-300 transition-colors ${
videoMode === 'accessible'
? 'bg-purple-600 text-white'
: 'bg-white text-gray-700 hover:bg-gray-50'
}`}
>
Accessible Video
</button>
</div>
)}
</div>
{/* Original Video Mode */}
{videoMode === 'original' && (
<>
{videoUrl ? (
<VideoWithCaptions
videoUrl={videoUrl}
captionsVtt={captionsVtt}
audioDescriptionVtt={adVtt}
sourceLanguage={selectedLanguage}
/>
) : (
<div className="text-center py-8 text-gray-500">
<svg className="w-12 h-12 mx-auto mb-4 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
</svg>
<p>Video preview will be available once processing begins</p>
</div>
)}
</>
)}
{/* Accessible Video Mode */}
{videoMode === 'accessible' && (
<>
{accessibleVideoUrl ? (
<div className="space-y-4">
<video
ref={videoRef}
src={accessibleVideoUrl}
controls
className="w-full rounded-lg"
onTimeUpdate={(e) => setCurrentVideoTimeMs(e.currentTarget.currentTime * 1000)}
>
Your browser does not support the video tag.
</video>
{/* Timeline Preview */}
{editState && editState.video_segments && editState.video_segments.length > 0 && (
<div className="mt-4 p-4 bg-gray-50 rounded-lg">
<h4 className="text-sm font-medium text-gray-700 mb-3">Timeline Preview</h4>
<TimelinePreview
segments={editState.video_segments}
pausePoints={editState.pause_points || []}
totalDurationMs={editState.total_duration_ms || 0}
currentTimeMs={currentVideoTimeMs}
onSegmentClick={handleSegmentClick}
onPausePointClick={handlePausePointClick}
onPausePointUpdate={handlePausePointUpdate}
onRegenerateTTS={handleRegenerateTTS}
regenerationQueue={pendingRegenerations}
/>
</div>
)}
{/* Re-render Controls */}
{editState && (
<RerenderControls
pendingRegenerations={pendingRegenerations}
pausePointsModified={pausePointsModified}
isRendering={isRendering}
onRender={handleRender}
onClearQueue={handleClearRegenerationQueue}
/>
)}
</div>
) : editStateLoading ? (
<div className="text-center py-8 text-gray-500">
<svg className="animate-spin h-8 w-8 mx-auto mb-4 text-purple-600" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" fill="none" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
<p>Loading accessible video...</p>
</div>
) : (
<div className="text-center py-8 text-gray-500">
<svg className="w-12 h-12 mx-auto mb-4 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
</svg>
<p>Accessible video not available for this language yet.</p>
<p className="text-sm mt-1">Translation and TTS synthesis may still be in progress.</p>
</div>
)}
</>
)}
</div>
</div>
@ -349,16 +652,16 @@ export function QCDetail() {
<VttEditor
vttContent={captionsVtt}
onChange={handleCaptionsChange}
title="Closed Captions"
title={`Closed Captions (${selectedLanguage.toUpperCase()})`}
readOnly={isProcessing}
/>
)}
{job.requested_outputs.audio_description_vtt && (
<VttEditor
vttContent={adVtt}
onChange={handleAdChange}
title="Audio Description"
title={`Audio Description (${selectedLanguage.toUpperCase()})`}
readOnly={isProcessing}
/>
)}

View file

@ -12,6 +12,7 @@ export type JobStatus =
| "tts_failed" // TTS synthesis failed after retries, requires reprocessing
| "rendering_video" // Accessible video rendering in progress
| "render_failed" // Accessible video rendering failed, requires reprocessing
| "rendering_qc" // Re-rendering accessible video during QC review
| "pending_final_review"
| "completed";
@ -329,4 +330,55 @@ export interface ReviewNoteUpdateRequest {
export interface ReviewNotesListResponse {
notes: ReviewNote[];
total: number;
}
// Accessible Video QC Editing Types
export interface PausePointData {
cue_index: number;
original_ms: number;
adjusted_ms: number | null;
min_bound_ms: number;
max_bound_ms: number;
}
export interface VideoSegmentMetadata {
segment_index: number;
start_ms: number;
end_ms: number;
gcs_uri: string;
duration_ms: number;
is_freeze_frame: boolean;
cue_index: number | null; // AD cue index for freeze frames
}
export type TTSRegenerationStatus = "pending" | "processing" | "completed" | "failed";
export interface TTSRegenerationItem {
cue_index: number;
requested_at: string;
new_text?: string;
status: TTSRegenerationStatus;
error_message?: string;
}
export interface AccessibleVideoEditState {
pause_points: PausePointData[];
video_segments: VideoSegmentMetadata[];
tts_regeneration_queue: TTSRegenerationItem[];
last_render_at: string | null;
total_duration_ms: number;
accessible_video_url: string | null;
}
export interface PausePointUpdateRequest {
adjusted_ms: number;
}
export interface TTSRegenerationQueueRequest {
cue_indices: number[];
}
export interface RerenderAccessibleVideoRequest {
whisper_refine: boolean;
}