feat: add QC accessible video review and editing capabilities
- Reorder workflow: translations now happen BEFORE QC Review step - Add language tabs to switch between translated languages in QC - Add video mode tabs (Original Video / Accessible Video) - Add interactive timeline preview showing video segments and AD cues - Enable pause point adjustment with millisecond precision - Add TTS regeneration queue for selective cue re-synthesis - Add re-render controls with optional Whisper refinement - Persist video segments and TTS MP3s to GCS for editability - Add new RENDERING_QC job status for re-render operations - Create 5 new API endpoints for accessible video editing - Add rerender_accessible_video.py Celery task Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
c5f59b1079
commit
aa6777d2c2
16 changed files with 2277 additions and 59 deletions
|
|
@ -32,6 +32,17 @@ from ...schemas.job import (
|
|||
VttTimingAdjustRequest,
|
||||
VttUpdateRequest,
|
||||
)
|
||||
from ...schemas.accessible_video import (
|
||||
AccessibleVideoEditStateResponse,
|
||||
PausePointResponse,
|
||||
PausePointUpdateRequest,
|
||||
RerenderAccessibleVideoRequest,
|
||||
TTSRegenerationItem,
|
||||
TTSRegenerationQueueRequest,
|
||||
TTSRegenerationRemoveRequest,
|
||||
VideoSegmentResponse,
|
||||
)
|
||||
from ...models.job import TTSRegenerationRequest
|
||||
from ...services.websocket import connection_manager
|
||||
from ...services.gcs import (
|
||||
gcs_service,
|
||||
|
|
@ -1455,3 +1466,404 @@ async def validate_job_assets(
|
|||
errors=errors,
|
||||
warnings=[] # Can be extended for non-blocking warnings
|
||||
)
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# Accessible Video QC Editing Endpoints
|
||||
# ==============================================================================
|
||||
|
||||
|
||||
@router.get("/{job_id}/accessible-video/{language}/edit-state", response_model=AccessibleVideoEditStateResponse)
|
||||
async def get_accessible_video_edit_state(
|
||||
job_id: str,
|
||||
language: str,
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Get current pause points, segment metadata, and TTS regeneration queue for QC editing."""
|
||||
job_doc = await db.jobs.find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Job not found"
|
||||
)
|
||||
|
||||
# Check job is in QC status
|
||||
if job_doc["status"] not in [JobStatus.PENDING_QC.value, JobStatus.RENDERING_QC.value]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Job must be in pending_qc or rendering_qc status (current: {job_doc['status']})"
|
||||
)
|
||||
|
||||
# Get language outputs
|
||||
lang_output = job_doc.get("outputs", {}).get(language)
|
||||
if not lang_output:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"No outputs found for language {language}"
|
||||
)
|
||||
|
||||
# Get edit state
|
||||
edit_state = lang_output.get("accessible_video_edit_state")
|
||||
if not edit_state:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"No accessible video edit state found for language {language}"
|
||||
)
|
||||
|
||||
# Calculate total duration from segments
|
||||
segments = edit_state.get("video_segments", [])
|
||||
total_duration_ms = sum(s.get("duration_ms", 0) for s in segments)
|
||||
|
||||
# Get signed URL for accessible video
|
||||
accessible_video_gcs = lang_output.get("accessible_video_gcs")
|
||||
accessible_video_url = None
|
||||
if accessible_video_gcs:
|
||||
blob_path = accessible_video_gcs.replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
blob = gcs_service.bucket.blob(blob_path)
|
||||
accessible_video_url = blob.generate_signed_url(expiration=3600)
|
||||
|
||||
# Convert to response format
|
||||
return AccessibleVideoEditStateResponse(
|
||||
pause_points=[
|
||||
PausePointResponse(
|
||||
cue_index=pp.get("cue_index"),
|
||||
original_ms=pp.get("original_ms"),
|
||||
adjusted_ms=pp.get("adjusted_ms"),
|
||||
min_bound_ms=pp.get("min_bound_ms"),
|
||||
max_bound_ms=pp.get("max_bound_ms")
|
||||
)
|
||||
for pp in edit_state.get("pause_points", [])
|
||||
],
|
||||
video_segments=[
|
||||
VideoSegmentResponse(
|
||||
segment_index=seg.get("segment_index"),
|
||||
start_ms=seg.get("start_ms"),
|
||||
end_ms=seg.get("end_ms"),
|
||||
gcs_uri=seg.get("gcs_uri"),
|
||||
duration_ms=seg.get("duration_ms"),
|
||||
is_freeze_frame=seg.get("is_freeze_frame", False),
|
||||
cue_index=seg.get("cue_index")
|
||||
)
|
||||
for seg in segments
|
||||
],
|
||||
tts_regeneration_queue=[
|
||||
TTSRegenerationItem(
|
||||
cue_index=req.get("cue_index"),
|
||||
requested_at=req.get("requested_at"),
|
||||
new_text=req.get("new_text"),
|
||||
status=req.get("status", "pending"),
|
||||
error_message=req.get("error_message")
|
||||
)
|
||||
for req in edit_state.get("tts_regeneration_queue", [])
|
||||
],
|
||||
last_render_at=edit_state.get("last_render_at"),
|
||||
total_duration_ms=total_duration_ms,
|
||||
accessible_video_url=accessible_video_url
|
||||
)
|
||||
|
||||
|
||||
@router.patch("/{job_id}/accessible-video/{language}/pause-points/{cue_index}", response_model=PausePointResponse)
|
||||
async def update_pause_point(
|
||||
job_id: str,
|
||||
language: str,
|
||||
cue_index: int,
|
||||
request: PausePointUpdateRequest,
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Update a single pause point timing with millisecond precision."""
|
||||
job_doc = await db.jobs.find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Job not found"
|
||||
)
|
||||
|
||||
# Check job is in QC status
|
||||
if job_doc["status"] not in [JobStatus.PENDING_QC.value]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Job must be in pending_qc status for editing (current: {job_doc['status']})"
|
||||
)
|
||||
|
||||
# Get edit state
|
||||
lang_output = job_doc.get("outputs", {}).get(language)
|
||||
if not lang_output:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"No outputs found for language {language}"
|
||||
)
|
||||
|
||||
edit_state = lang_output.get("accessible_video_edit_state")
|
||||
if not edit_state:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"No edit state found for language {language}"
|
||||
)
|
||||
|
||||
# Find the pause point
|
||||
pause_points = edit_state.get("pause_points", [])
|
||||
pause_point = next((pp for pp in pause_points if pp.get("cue_index") == cue_index), None)
|
||||
if not pause_point:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Pause point for cue {cue_index} not found"
|
||||
)
|
||||
|
||||
# Validate bounds
|
||||
min_bound = pause_point.get("min_bound_ms", 0)
|
||||
max_bound = pause_point.get("max_bound_ms", float("inf"))
|
||||
|
||||
if request.adjusted_ms < min_bound:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Adjusted value {request.adjusted_ms}ms is below minimum bound {min_bound}ms"
|
||||
)
|
||||
if request.adjusted_ms > max_bound:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Adjusted value {request.adjusted_ms}ms is above maximum bound {max_bound}ms"
|
||||
)
|
||||
|
||||
# Update the pause point
|
||||
pause_point["adjusted_ms"] = request.adjusted_ms
|
||||
|
||||
# Save back to database
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
f"outputs.{language}.accessible_video_edit_state.pause_points": pause_points,
|
||||
"updated_at": datetime.utcnow()
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Updated pause point for cue {cue_index} in job {job_id}/{language}: {request.adjusted_ms}ms")
|
||||
|
||||
return PausePointResponse(
|
||||
cue_index=pause_point["cue_index"],
|
||||
original_ms=pause_point["original_ms"],
|
||||
adjusted_ms=pause_point["adjusted_ms"],
|
||||
min_bound_ms=pause_point["min_bound_ms"],
|
||||
max_bound_ms=pause_point["max_bound_ms"]
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{job_id}/accessible-video/{language}/tts-regeneration")
|
||||
async def queue_tts_regeneration(
|
||||
job_id: str,
|
||||
language: str,
|
||||
request: TTSRegenerationQueueRequest,
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Queue TTS regeneration for specific cues (uses current AD VTT text)."""
|
||||
job_doc = await db.jobs.find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Job not found"
|
||||
)
|
||||
|
||||
# Check job is in QC status
|
||||
if job_doc["status"] not in [JobStatus.PENDING_QC.value]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Job must be in pending_qc status for editing (current: {job_doc['status']})"
|
||||
)
|
||||
|
||||
# Get edit state
|
||||
lang_output = job_doc.get("outputs", {}).get(language)
|
||||
if not lang_output:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"No outputs found for language {language}"
|
||||
)
|
||||
|
||||
edit_state = lang_output.get("accessible_video_edit_state")
|
||||
if not edit_state:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"No edit state found for language {language}"
|
||||
)
|
||||
|
||||
# Get current queue
|
||||
current_queue = edit_state.get("tts_regeneration_queue", [])
|
||||
existing_cues = {req.get("cue_index") for req in current_queue}
|
||||
|
||||
# Add new requests (avoid duplicates)
|
||||
added = []
|
||||
for cue_idx in request.cue_indices:
|
||||
if cue_idx not in existing_cues:
|
||||
new_request = {
|
||||
"cue_index": cue_idx,
|
||||
"requested_at": datetime.utcnow().isoformat(),
|
||||
"new_text": None, # Will use current VTT text
|
||||
"status": "pending",
|
||||
"error_message": None
|
||||
}
|
||||
current_queue.append(new_request)
|
||||
added.append(cue_idx)
|
||||
|
||||
# Save back to database
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
f"outputs.{language}.accessible_video_edit_state.tts_regeneration_queue": current_queue,
|
||||
"updated_at": datetime.utcnow()
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Queued TTS regeneration for cues {added} in job {job_id}/{language}")
|
||||
|
||||
return {"message": f"Queued {len(added)} cue(s) for regeneration", "queued_cues": added}
|
||||
|
||||
|
||||
@router.delete("/{job_id}/accessible-video/{language}/tts-regeneration/{cue_index}")
|
||||
async def remove_tts_regeneration(
|
||||
job_id: str,
|
||||
language: str,
|
||||
cue_index: int,
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Remove a cue from the TTS regeneration queue."""
|
||||
job_doc = await db.jobs.find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Job not found"
|
||||
)
|
||||
|
||||
# Get edit state
|
||||
lang_output = job_doc.get("outputs", {}).get(language)
|
||||
if not lang_output or not lang_output.get("accessible_video_edit_state"):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"No edit state found for language {language}"
|
||||
)
|
||||
|
||||
edit_state = lang_output["accessible_video_edit_state"]
|
||||
current_queue = edit_state.get("tts_regeneration_queue", [])
|
||||
|
||||
# Remove the cue from queue
|
||||
new_queue = [req for req in current_queue if req.get("cue_index") != cue_index]
|
||||
|
||||
if len(new_queue) == len(current_queue):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Cue {cue_index} not in regeneration queue"
|
||||
)
|
||||
|
||||
# Save back to database
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
f"outputs.{language}.accessible_video_edit_state.tts_regeneration_queue": new_queue,
|
||||
"updated_at": datetime.utcnow()
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Removed cue {cue_index} from TTS regeneration queue for job {job_id}/{language}")
|
||||
|
||||
return {"message": f"Removed cue {cue_index} from regeneration queue"}
|
||||
|
||||
|
||||
@router.post("/{job_id}/accessible-video/{language}/re-render", response_model=JobResponse)
|
||||
async def trigger_accessible_video_rerender(
|
||||
job_id: str,
|
||||
language: str,
|
||||
request: RerenderAccessibleVideoRequest,
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""
|
||||
Trigger re-synthesis of accessible video with QC changes.
|
||||
- Regenerates only queued TTS segments (others reuse existing MP3s)
|
||||
- Optionally runs Whisper pause point refinement
|
||||
"""
|
||||
job_doc = await db.jobs.find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Job not found"
|
||||
)
|
||||
|
||||
# Check job is in QC status
|
||||
if job_doc["status"] not in [JobStatus.PENDING_QC.value]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Job must be in pending_qc status to re-render (current: {job_doc['status']})"
|
||||
)
|
||||
|
||||
# Get edit state
|
||||
lang_output = job_doc.get("outputs", {}).get(language)
|
||||
if not lang_output or not lang_output.get("accessible_video_edit_state"):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"No edit state found for language {language}"
|
||||
)
|
||||
|
||||
edit_state = lang_output["accessible_video_edit_state"]
|
||||
|
||||
# Get cues to regenerate
|
||||
regenerate_cues = [
|
||||
req.get("cue_index")
|
||||
for req in edit_state.get("tts_regeneration_queue", [])
|
||||
if req.get("status") == "pending"
|
||||
]
|
||||
|
||||
# Update job status to RENDERING_QC
|
||||
job_title = job_doc.get("title", "Untitled Job")
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": JobStatus.RENDERING_QC.value,
|
||||
"updated_at": datetime.utcnow()
|
||||
},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": datetime.utcnow(),
|
||||
"status": JobStatus.RENDERING_QC.value,
|
||||
"by": str(current_user.id),
|
||||
"notes": f"Re-rendering {language} with {len(regenerate_cues)} TTS regeneration(s), whisper_refine={request.whisper_refine}"
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Trigger re-render task
|
||||
from ...tasks.rerender_accessible_video import rerender_accessible_video_task
|
||||
rerender_accessible_video_task.delay(
|
||||
job_id=job_id,
|
||||
language=language,
|
||||
regenerate_cue_indices=regenerate_cues,
|
||||
whisper_refine=request.whisper_refine
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Triggered accessible video re-render for job {job_id}/{language}: "
|
||||
f"regenerate_cues={regenerate_cues}, whisper_refine={request.whisper_refine}"
|
||||
)
|
||||
|
||||
# Get updated job
|
||||
result = await db.jobs.find_one({"_id": job_id})
|
||||
|
||||
return JobResponse(
|
||||
id=str(result["_id"]),
|
||||
title=result["title"],
|
||||
status=result["status"],
|
||||
source=result["source"],
|
||||
requested_outputs=RequestedOutputs(**result["requested_outputs"]),
|
||||
review=result.get("review", {"notes": "", "history": []}),
|
||||
outputs=result.get("outputs"),
|
||||
created_at=result["created_at"].isoformat(),
|
||||
updated_at=result["updated_at"].isoformat()
|
||||
)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ class JobStatus(str, Enum):
|
|||
TTS_FAILED = "tts_failed" # TTS synthesis failed after retries, requires reprocessing
|
||||
RENDERING_VIDEO = "rendering_video" # Accessible video rendering in progress
|
||||
RENDER_FAILED = "render_failed" # Accessible video rendering failed, requires reprocessing
|
||||
RENDERING_QC = "rendering_qc" # Re-rendering accessible video during QC review
|
||||
PENDING_FINAL_REVIEW = "pending_final_review"
|
||||
COMPLETED = "completed"
|
||||
|
||||
|
|
@ -64,6 +65,44 @@ class RequestedOutputs(BaseModel):
|
|||
translation_mode: Literal["traditional", "video_native"] = "video_native"
|
||||
|
||||
|
||||
class PausePointData(BaseModel):
|
||||
"""Pause point timing data for accessible video editing during QC."""
|
||||
cue_index: int # AD cue index this pause point belongs to
|
||||
original_ms: float # Original pause point timestamp (ms)
|
||||
adjusted_ms: Optional[float] = None # User-adjusted timestamp (ms), None = use original
|
||||
min_bound_ms: float # Minimum allowed value (end of previous AD segment)
|
||||
max_bound_ms: float # Maximum allowed value (start of next AD segment)
|
||||
|
||||
|
||||
class VideoSegmentMetadata(BaseModel):
|
||||
"""Metadata for a video segment between pause points."""
|
||||
segment_index: int # 0-based segment index
|
||||
start_ms: float # Start timestamp in source video (ms)
|
||||
end_ms: float # End timestamp in source video (ms)
|
||||
gcs_uri: str # GCS path to segment MP4
|
||||
duration_ms: float # Actual segment duration (ms)
|
||||
is_freeze_frame: bool = False # True if this is a freeze frame segment with AD audio
|
||||
cue_index: Optional[int] = None # AD cue index (only for freeze frame segments)
|
||||
|
||||
|
||||
class TTSRegenerationRequest(BaseModel):
|
||||
"""Request to regenerate TTS for a specific cue during QC."""
|
||||
cue_index: int
|
||||
requested_at: datetime
|
||||
new_text: Optional[str] = None # If provided, use this text instead of current VTT
|
||||
status: Literal["pending", "processing", "completed", "failed"] = "pending"
|
||||
error_message: Optional[str] = None
|
||||
|
||||
|
||||
class AccessibleVideoEditState(BaseModel):
|
||||
"""Editable state for accessible video during QC review."""
|
||||
pause_points: list[PausePointData] = []
|
||||
video_segments: list[VideoSegmentMetadata] = []
|
||||
tts_regeneration_queue: list[TTSRegenerationRequest] = []
|
||||
last_render_at: Optional[datetime] = None
|
||||
whisper_refine_enabled: bool = False # Default: off (user enables if cue positions changed)
|
||||
|
||||
|
||||
class LangOutput(BaseModel):
|
||||
captions_vtt_gcs: Optional[str] = None
|
||||
ad_vtt_gcs: Optional[str] = None
|
||||
|
|
@ -73,6 +112,9 @@ class LangOutput(BaseModel):
|
|||
accessible_video_method: Optional[Literal["overlay", "pause_insert"]] = None
|
||||
retimed_captions_vtt_gcs: Optional[str] = None # Re-timed captions for pause-insert method
|
||||
ad_cues_gcs_prefix: Optional[str] = None # GCS path prefix for per-cue MP3 segments
|
||||
# QC editing state for accessible video
|
||||
video_segments_gcs_prefix: Optional[str] = None # GCS prefix for persisted video segments
|
||||
accessible_video_edit_state: Optional[AccessibleVideoEditState] = None
|
||||
origin: Optional[Literal["translate", "transcreate", "gemini_translate", "video_native"]] = None
|
||||
qa_notes: Optional[str] = None
|
||||
|
||||
|
|
|
|||
|
|
@ -122,3 +122,89 @@ class AccessibleVideoProgress(BaseModel):
|
|||
error_message: Optional[str] = None
|
||||
started_at: Optional[str] = None
|
||||
completed_at: Optional[str] = None
|
||||
|
||||
|
||||
# === QC Review Accessible Video Editing Schemas ===
|
||||
|
||||
|
||||
class PausePointResponse(BaseModel):
|
||||
"""Pause point timing data for QC editing."""
|
||||
cue_index: int = Field(..., description="AD cue index this pause point belongs to")
|
||||
original_ms: float = Field(..., description="Original pause point timestamp (ms)")
|
||||
adjusted_ms: Optional[float] = Field(None, description="User-adjusted timestamp (ms)")
|
||||
min_bound_ms: float = Field(..., description="Minimum allowed value (ms)")
|
||||
max_bound_ms: float = Field(..., description="Maximum allowed value (ms)")
|
||||
|
||||
|
||||
class VideoSegmentResponse(BaseModel):
|
||||
"""Metadata for a video segment."""
|
||||
segment_index: int = Field(..., description="0-based segment index")
|
||||
start_ms: float = Field(..., description="Start timestamp in source video (ms)")
|
||||
end_ms: float = Field(..., description="End timestamp in source video (ms)")
|
||||
gcs_uri: str = Field(..., description="GCS path to segment MP4")
|
||||
duration_ms: float = Field(..., description="Actual segment duration (ms)")
|
||||
is_freeze_frame: bool = Field(False, description="True if freeze frame with AD audio")
|
||||
cue_index: Optional[int] = Field(None, description="AD cue index (freeze frames only)")
|
||||
|
||||
|
||||
class TTSRegenerationItem(BaseModel):
|
||||
"""A queued TTS regeneration request."""
|
||||
cue_index: int = Field(..., description="AD cue index to regenerate")
|
||||
requested_at: str = Field(..., description="ISO timestamp when requested")
|
||||
new_text: Optional[str] = Field(None, description="Override text (if provided)")
|
||||
status: str = Field("pending", description="pending | processing | completed | failed")
|
||||
error_message: Optional[str] = None
|
||||
|
||||
|
||||
class AccessibleVideoEditStateResponse(BaseModel):
|
||||
"""Current editable state for accessible video during QC review."""
|
||||
pause_points: list[PausePointResponse] = Field(
|
||||
default_factory=list,
|
||||
description="All pause points with original and adjusted values"
|
||||
)
|
||||
video_segments: list[VideoSegmentResponse] = Field(
|
||||
default_factory=list,
|
||||
description="Video segment metadata for timeline display"
|
||||
)
|
||||
tts_regeneration_queue: list[TTSRegenerationItem] = Field(
|
||||
default_factory=list,
|
||||
description="Queued TTS regeneration requests"
|
||||
)
|
||||
last_render_at: Optional[str] = Field(
|
||||
None,
|
||||
description="ISO timestamp of last accessible video render"
|
||||
)
|
||||
total_duration_ms: float = Field(..., description="Total accessible video duration (ms)")
|
||||
accessible_video_url: Optional[str] = Field(
|
||||
None,
|
||||
description="Signed URL for accessible video preview"
|
||||
)
|
||||
|
||||
|
||||
class PausePointUpdateRequest(BaseModel):
|
||||
"""Request to update a pause point's adjusted timing."""
|
||||
adjusted_ms: float = Field(
|
||||
...,
|
||||
description="New pause point timestamp in milliseconds"
|
||||
)
|
||||
|
||||
|
||||
class TTSRegenerationQueueRequest(BaseModel):
|
||||
"""Request to queue TTS regeneration for specific cues."""
|
||||
cue_indices: list[int] = Field(
|
||||
...,
|
||||
description="List of AD cue indices to regenerate"
|
||||
)
|
||||
|
||||
|
||||
class TTSRegenerationRemoveRequest(BaseModel):
|
||||
"""Request to remove a cue from the TTS regeneration queue."""
|
||||
cue_index: int = Field(..., description="AD cue index to remove from queue")
|
||||
|
||||
|
||||
class RerenderAccessibleVideoRequest(BaseModel):
|
||||
"""Request to re-render accessible video with QC changes."""
|
||||
whisper_refine: bool = Field(
|
||||
False,
|
||||
description="Run Whisper pause point refinement (enable if cue count/position changed)"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from google.oauth2 import id_token
|
|||
|
||||
from ..core.config import settings
|
||||
from ..core.logging import get_logger
|
||||
from ..models.job import PausePointData, VideoSegmentMetadata
|
||||
from ..schemas.accessible_video import AccessibleVideoMethod, GeminiAccessibleVideoAnalysis
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
|
@ -150,6 +151,27 @@ class VideoRendererService:
|
|||
# Log but don't fail on cleanup errors
|
||||
logger.warning(f"Failed to delete GCS temp file {gcs_uri}: {e}")
|
||||
|
||||
def _upload_to_gcs_permanent(self, local_path: str, gcs_path: str) -> str:
|
||||
"""
|
||||
Upload local file to permanent GCS location (not temp).
|
||||
|
||||
Args:
|
||||
local_path: Path to local file
|
||||
gcs_path: Full GCS path within the bucket (e.g., "job_id/en/segments/seg_0.mp4")
|
||||
|
||||
Returns:
|
||||
GCS URI (gs://bucket/gcs_path)
|
||||
"""
|
||||
client = self._get_gcs_client()
|
||||
bucket = client.bucket(settings.gcs_bucket)
|
||||
|
||||
blob = bucket.blob(gcs_path)
|
||||
blob.upload_from_filename(local_path)
|
||||
|
||||
gcs_uri = f"gs://{settings.gcs_bucket}/{gcs_path}"
|
||||
logger.debug(f"Uploaded {local_path} to {gcs_uri} (permanent)")
|
||||
return gcs_uri
|
||||
|
||||
async def _call_cloud_run_probe(self, gcs_uri: str) -> dict[str, Any]:
|
||||
"""
|
||||
Call Cloud Run FFmpeg service /probe endpoint.
|
||||
|
|
@ -300,7 +322,9 @@ class VideoRendererService:
|
|||
ad_segments: list[tuple[int, str]], # [(cue_index, mp3_path), ...]
|
||||
analysis: dict[str, Any],
|
||||
output_path: str,
|
||||
) -> tuple[str, list[dict] | None]:
|
||||
persist_segments: bool = False,
|
||||
gcs_segment_prefix: str | None = None,
|
||||
) -> tuple[str, list[dict] | None, list[VideoSegmentMetadata] | None, list[PausePointData] | None]:
|
||||
"""
|
||||
Render accessible video based on Gemini analysis.
|
||||
|
||||
|
|
@ -309,11 +333,15 @@ class VideoRendererService:
|
|||
ad_segments: List of (cue_index, mp3_path) tuples for each AD segment
|
||||
analysis: Gemini analysis dict with method and placements
|
||||
output_path: Where to save the output MP4
|
||||
persist_segments: If True, upload video segments to GCS for QC editing
|
||||
gcs_segment_prefix: GCS path prefix for segments (e.g., "job_id/en/segments/")
|
||||
|
||||
Returns:
|
||||
Tuple of (output_path, updated_placements)
|
||||
Tuple of (output_path, updated_placements, segment_metadata, pause_points)
|
||||
- output_path: Path to rendered accessible video
|
||||
- updated_placements: Placements with actual_freeze_duration added (pause-insert only)
|
||||
- segment_metadata: List of VideoSegmentMetadata if persist_segments=True, else None
|
||||
- pause_points: List of PausePointData if persist_segments=True, else None
|
||||
"""
|
||||
method = analysis.get("method", "pause_insert")
|
||||
|
||||
|
|
@ -328,10 +356,12 @@ class VideoRendererService:
|
|||
result_path = await self._render_overlay_method(
|
||||
source_video_path, ad_segments, analysis, output_path
|
||||
)
|
||||
return (result_path, None)
|
||||
return (result_path, None, None, None)
|
||||
else:
|
||||
return await self._render_pause_insert_method(
|
||||
source_video_path, ad_segments, analysis, output_path
|
||||
source_video_path, ad_segments, analysis, output_path,
|
||||
persist_segments=persist_segments,
|
||||
gcs_segment_prefix=gcs_segment_prefix
|
||||
)
|
||||
finally:
|
||||
# Clean up cached source video from GCS
|
||||
|
|
@ -460,13 +490,23 @@ class VideoRendererService:
|
|||
ad_segments: list[tuple[int, str]],
|
||||
analysis: dict[str, Any],
|
||||
output_path: str,
|
||||
) -> tuple[str, list[dict]]:
|
||||
persist_segments: bool = False,
|
||||
gcs_segment_prefix: str | None = None,
|
||||
) -> tuple[str, list[dict], list[VideoSegmentMetadata] | None, list[PausePointData] | None]:
|
||||
"""
|
||||
Render with pause-insert method:
|
||||
1. Split video at each pause point
|
||||
2. Extract freeze frame at each pause point
|
||||
3. Create freeze-frame segment with AD audio
|
||||
4. Concatenate all segments
|
||||
5. Optionally persist segments to GCS for QC editing
|
||||
|
||||
Args:
|
||||
persist_segments: If True, upload segments to GCS and return metadata
|
||||
gcs_segment_prefix: GCS path prefix (e.g., "job_id/en/segments/")
|
||||
|
||||
Returns:
|
||||
Tuple of (output_path, updated_placements, segment_metadata, pause_points)
|
||||
"""
|
||||
logger.info(f"Starting pause-insert render for {source_video_path}")
|
||||
placements = analysis.get("placements", [])
|
||||
|
|
@ -489,7 +529,7 @@ class VideoRendererService:
|
|||
if not sorted_placements:
|
||||
logger.warning("No pause points found, copying source video")
|
||||
await self._copy_video(source_video_path, output_path)
|
||||
return (output_path, [])
|
||||
return (output_path, [], None, None)
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_dir_path = Path(temp_dir)
|
||||
|
|
@ -725,7 +765,106 @@ class VideoRendererService:
|
|||
updated["actual_freeze_duration"] = actual_durations[cue_index]
|
||||
updated_placements.append(updated)
|
||||
|
||||
return (output_path, updated_placements)
|
||||
# ============================================================
|
||||
# PHASE 5: Persist segments to GCS for QC editing (optional)
|
||||
# ============================================================
|
||||
segment_metadata_list: list[VideoSegmentMetadata] | None = None
|
||||
pause_point_data_list: list[PausePointData] | None = None
|
||||
|
||||
if persist_segments and gcs_segment_prefix:
|
||||
logger.info(f"Persisting {len(segment_files)} segments to GCS at {gcs_segment_prefix}")
|
||||
segment_metadata_list = []
|
||||
segment_idx = 0
|
||||
cumulative_time_ms = 0.0
|
||||
|
||||
for p in valid_placements:
|
||||
i = p["index"]
|
||||
# Upload video segment if it exists
|
||||
if i in video_segment_paths:
|
||||
local_path = video_segment_paths[i]
|
||||
gcs_path = f"{gcs_segment_prefix}seg_{segment_idx}.mp4"
|
||||
gcs_uri = self._upload_to_gcs_permanent(local_path, gcs_path)
|
||||
segment_duration_ms = (p["pause_point"] - p["segment_start"]) * 1000
|
||||
|
||||
segment_metadata_list.append(VideoSegmentMetadata(
|
||||
segment_index=segment_idx,
|
||||
start_ms=cumulative_time_ms,
|
||||
end_ms=cumulative_time_ms + segment_duration_ms,
|
||||
gcs_uri=gcs_uri,
|
||||
duration_ms=segment_duration_ms,
|
||||
is_freeze_frame=False,
|
||||
cue_index=None
|
||||
))
|
||||
cumulative_time_ms += segment_duration_ms
|
||||
segment_idx += 1
|
||||
|
||||
# Upload freeze segment
|
||||
freeze_local_path = freeze_segment_paths[i]
|
||||
gcs_path = f"{gcs_segment_prefix}seg_{segment_idx}_freeze.mp4"
|
||||
gcs_uri = self._upload_to_gcs_permanent(freeze_local_path, gcs_path)
|
||||
freeze_duration_ms = p["actual_freeze_duration"] * 1000
|
||||
|
||||
segment_metadata_list.append(VideoSegmentMetadata(
|
||||
segment_index=segment_idx,
|
||||
start_ms=cumulative_time_ms,
|
||||
end_ms=cumulative_time_ms + freeze_duration_ms,
|
||||
gcs_uri=gcs_uri,
|
||||
duration_ms=freeze_duration_ms,
|
||||
is_freeze_frame=True,
|
||||
cue_index=p["cue_index"]
|
||||
))
|
||||
cumulative_time_ms += freeze_duration_ms
|
||||
segment_idx += 1
|
||||
|
||||
# Upload final segment if exists
|
||||
if final_segment_path:
|
||||
gcs_path = f"{gcs_segment_prefix}seg_{segment_idx}.mp4"
|
||||
gcs_uri = self._upload_to_gcs_permanent(str(final_segment_path), gcs_path)
|
||||
final_duration_ms = (source_duration - final_segment_start) * 1000
|
||||
|
||||
segment_metadata_list.append(VideoSegmentMetadata(
|
||||
segment_index=segment_idx,
|
||||
start_ms=cumulative_time_ms,
|
||||
end_ms=cumulative_time_ms + final_duration_ms,
|
||||
gcs_uri=gcs_uri,
|
||||
duration_ms=final_duration_ms,
|
||||
is_freeze_frame=False,
|
||||
cue_index=None
|
||||
))
|
||||
|
||||
logger.info(f"Persisted {len(segment_metadata_list)} segments to GCS")
|
||||
|
||||
# Build PausePointData list with bounds
|
||||
pause_point_data_list = []
|
||||
for idx, p in enumerate(valid_placements):
|
||||
pause_ms = p["pause_point"] * 1000
|
||||
|
||||
# Compute min bound: end of previous AD segment (or 0 for first)
|
||||
if idx == 0:
|
||||
min_bound_ms = 0.0
|
||||
else:
|
||||
prev_p = valid_placements[idx - 1]
|
||||
# End of previous freeze = pause_point + freeze_duration
|
||||
min_bound_ms = (prev_p["pause_point"] + prev_p["actual_freeze_duration"]) * 1000
|
||||
|
||||
# Compute max bound: start of next pause point (or video end for last)
|
||||
if idx == len(valid_placements) - 1:
|
||||
max_bound_ms = source_duration * 1000
|
||||
else:
|
||||
next_p = valid_placements[idx + 1]
|
||||
max_bound_ms = next_p["pause_point"] * 1000
|
||||
|
||||
pause_point_data_list.append(PausePointData(
|
||||
cue_index=p["cue_index"],
|
||||
original_ms=pause_ms,
|
||||
adjusted_ms=None,
|
||||
min_bound_ms=min_bound_ms,
|
||||
max_bound_ms=max_bound_ms
|
||||
))
|
||||
|
||||
logger.info(f"Built {len(pause_point_data_list)} pause point data entries")
|
||||
|
||||
return (output_path, updated_placements, segment_metadata_list, pause_point_data_list)
|
||||
|
||||
async def _get_video_duration(self, video_path: str) -> float:
|
||||
"""Get video duration in seconds using ffprobe."""
|
||||
|
|
|
|||
|
|
@ -249,11 +249,12 @@ async def ingest_and_ai_task_impl(job_id: str):
|
|||
)
|
||||
|
||||
# Update job with AI results, detected language, and outputs
|
||||
# Set status to TRANSLATING to trigger translation pipeline before QC
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": JobStatus.PENDING_QC.value,
|
||||
"status": JobStatus.TRANSLATING.value,
|
||||
"source.language": source_language, # Update with detected language
|
||||
"source.detected_language": detected_language,
|
||||
"ai.ingestion_json": ai_result,
|
||||
|
|
@ -267,22 +268,27 @@ async def ingest_and_ai_task_impl(job_id: str):
|
|||
"$push": {
|
||||
"review.history": {
|
||||
"at": datetime.utcnow(),
|
||||
"status": JobStatus.PENDING_QC.value,
|
||||
"status": JobStatus.TRANSLATING.value,
|
||||
"by": "system"
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# Broadcast status update
|
||||
broadcast_status_update(
|
||||
job_id,
|
||||
JobStatus.PENDING_QC.value,
|
||||
job_id,
|
||||
JobStatus.TRANSLATING.value,
|
||||
job_title=job_title,
|
||||
message=f"{job_title} has completed AI processing and is ready for QC review"
|
||||
message=f"{job_title} AI processing complete, starting translation pipeline"
|
||||
)
|
||||
|
||||
logger.info(f"Successfully completed ingestion and AI processing for job {job_id}")
|
||||
logger.info(f"AI processing complete for job {job_id}, triggering translation pipeline")
|
||||
|
||||
# Trigger translation and synthesis pipeline
|
||||
# This will process all translations, TTS, and accessible video BEFORE QC review
|
||||
from .translate_and_synthesize import translate_and_synthesize_task
|
||||
translate_and_synthesize_task.delay(job_id)
|
||||
|
||||
finally:
|
||||
# Clean up temp file
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from motor.motor_asyncio import AsyncIOMotorClient
|
|||
from ..core.config import settings
|
||||
from ..core.logging import get_logger
|
||||
from ..lib.vtt import VTTParser
|
||||
from ..models.job import JobStatus
|
||||
from ..models.job import AccessibleVideoEditState, JobStatus, PausePointData, VideoSegmentMetadata
|
||||
from ..schemas.whisper import CachedWhisperTranscript, CachedWordTimestamp
|
||||
from ..services.gcs import gcs_service
|
||||
from ..services.video_renderer import video_renderer_service
|
||||
|
|
@ -198,15 +198,18 @@ async def _async_render_accessible_video(job_id: str, language: str):
|
|||
analysis["warnings"] = existing_warnings + whisper_warnings
|
||||
logger.info(f"Whisper refinement complete with {len(whisper_warnings)} warnings")
|
||||
|
||||
# 6. Render accessible video
|
||||
# 6. Render accessible video with segment persistence for QC editing
|
||||
output_video_path = os.path.join(temp_dir, "accessible_video.mp4")
|
||||
gcs_segment_prefix = f"{job_id}/{language}/segments/"
|
||||
|
||||
logger.info(f"Rendering accessible video using {method} method...")
|
||||
rendered_path, updated_placements = await video_renderer_service.render_accessible_video(
|
||||
logger.info(f"Rendering accessible video using {method} method with segment persistence...")
|
||||
rendered_path, updated_placements, segment_metadata, pause_points = await video_renderer_service.render_accessible_video(
|
||||
source_video_path,
|
||||
ad_segments,
|
||||
analysis,
|
||||
output_video_path
|
||||
output_video_path,
|
||||
persist_segments=True,
|
||||
gcs_segment_prefix=gcs_segment_prefix
|
||||
)
|
||||
|
||||
# Update analysis with actual freeze durations for VTT retiming
|
||||
|
|
@ -214,6 +217,18 @@ async def _async_render_accessible_video(job_id: str, language: str):
|
|||
analysis["placements"] = updated_placements
|
||||
logger.info(f"Updated {len(updated_placements)} placements with actual freeze durations")
|
||||
|
||||
# Build edit state for QC review if segment metadata was returned
|
||||
edit_state = None
|
||||
if segment_metadata and pause_points:
|
||||
edit_state = AccessibleVideoEditState(
|
||||
pause_points=pause_points,
|
||||
video_segments=segment_metadata,
|
||||
tts_regeneration_queue=[],
|
||||
last_render_at=datetime.utcnow(),
|
||||
whisper_refine_enabled=False
|
||||
)
|
||||
logger.info(f"Built edit state with {len(segment_metadata)} segments and {len(pause_points)} pause points")
|
||||
|
||||
# 7. Upload rendered video to GCS
|
||||
video_blob_path = f"{job_id}/{language}/accessible_video.mp4"
|
||||
video_blob = gcs_service.bucket.blob(video_blob_path)
|
||||
|
|
@ -248,10 +263,11 @@ async def _async_render_accessible_video(job_id: str, language: str):
|
|||
retimed_captions_gcs_uri = f"gs://{settings.gcs_bucket}/{retimed_blob_path}"
|
||||
logger.info(f"Uploaded re-timed captions to {retimed_captions_gcs_uri}")
|
||||
|
||||
# 9. Update job document with results
|
||||
# 9. Update job document with results (including edit state for QC review)
|
||||
update_fields = {
|
||||
f"outputs.{language}.accessible_video_gcs": video_gcs_uri,
|
||||
f"outputs.{language}.accessible_video_method": method,
|
||||
f"outputs.{language}.video_segments_gcs_prefix": f"gs://{settings.gcs_bucket}/{gcs_segment_prefix}",
|
||||
f"accessible_video_progress.{language}": {
|
||||
"status": "completed",
|
||||
"method": method,
|
||||
|
|
@ -264,6 +280,10 @@ async def _async_render_accessible_video(job_id: str, language: str):
|
|||
if retimed_captions_gcs_uri:
|
||||
update_fields[f"outputs.{language}.retimed_captions_vtt_gcs"] = retimed_captions_gcs_uri
|
||||
|
||||
# Store edit state for QC review accessible video editing
|
||||
if edit_state:
|
||||
update_fields[f"outputs.{language}.accessible_video_edit_state"] = edit_state.model_dump()
|
||||
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{"$set": update_fields}
|
||||
|
|
@ -423,6 +443,7 @@ async def _check_accessible_video_completion(job_id: str, db):
|
|||
)
|
||||
else:
|
||||
# All videos completed successfully
|
||||
# NEW WORKFLOW: Go to PENDING_QC for QC review (not PENDING_FINAL_REVIEW)
|
||||
logger.info(f"All accessible videos complete for job {job_id}")
|
||||
|
||||
if job_doc["status"] in [JobStatus.TTS_GENERATING.value, JobStatus.RENDERING_VIDEO.value]:
|
||||
|
|
@ -430,13 +451,13 @@ async def _check_accessible_video_completion(job_id: str, db):
|
|||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": JobStatus.PENDING_FINAL_REVIEW.value,
|
||||
"status": JobStatus.PENDING_QC.value,
|
||||
"updated_at": datetime.utcnow()
|
||||
},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": datetime.utcnow(),
|
||||
"status": JobStatus.PENDING_FINAL_REVIEW.value,
|
||||
"status": JobStatus.PENDING_QC.value,
|
||||
"by": "system"
|
||||
}
|
||||
}
|
||||
|
|
@ -445,9 +466,9 @@ async def _check_accessible_video_completion(job_id: str, db):
|
|||
|
||||
broadcast_status_update(
|
||||
job_id,
|
||||
JobStatus.PENDING_FINAL_REVIEW.value,
|
||||
JobStatus.PENDING_QC.value,
|
||||
job_title=job_title,
|
||||
message=f"{job_title} has all accessible videos complete - ready for Final Review"
|
||||
message=f"{job_title} has all accessible videos complete - ready for QC Review"
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
497
backend/app/tasks/rerender_accessible_video.py
Normal file
497
backend/app/tasks/rerender_accessible_video.py
Normal file
|
|
@ -0,0 +1,497 @@
|
|||
"""Celery task for re-rendering accessible video with QC changes."""
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
|
||||
from celery.result import allow_join_result
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
from pydub import AudioSegment
|
||||
|
||||
from ..core.config import settings
|
||||
from ..core.logging import get_logger
|
||||
from ..lib.vtt import VTTParser
|
||||
from ..models.job import AccessibleVideoEditState, JobStatus, PausePointData, VideoSegmentMetadata
|
||||
from ..services.gcs import gcs_service
|
||||
from ..services.video_renderer import video_renderer_service
|
||||
from ..services.vtt_retimer import vtt_retimer_service
|
||||
from ..services.whisper_service import WordTimestamp, whisper_service
|
||||
from . import celery_app
|
||||
from .render_accessible_video import _extract_audio_for_whisper, _dispatch_whisper_transcription
|
||||
from .translate_and_synthesize import broadcast_status_update
|
||||
from .tts_synthesis import dispatch_language_tts, parse_ad_cues, synthesize_cue_task
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@celery_app.task(bind=True, time_limit=7200, soft_time_limit=7000)
|
||||
def rerender_accessible_video_task(
|
||||
self,
|
||||
job_id: str,
|
||||
language: str,
|
||||
regenerate_cue_indices: list[int],
|
||||
whisper_refine: bool = False
|
||||
):
|
||||
"""
|
||||
Re-render accessible video during QC review with selective TTS regeneration.
|
||||
|
||||
This task:
|
||||
1. If regenerate_cue_indices not empty: synthesize new TTS for those cues
|
||||
2. Download source video and existing segments/MP3s
|
||||
3. If whisper_refine: run Whisper pause point refinement
|
||||
4. Re-render video using updated pause points and new/existing TTS
|
||||
5. Update job status back to PENDING_QC
|
||||
|
||||
Args:
|
||||
job_id: Job ID
|
||||
language: Language being re-rendered
|
||||
regenerate_cue_indices: List of cue indices to regenerate TTS for
|
||||
whisper_refine: Whether to run Whisper pause point refinement
|
||||
"""
|
||||
logger.info(
|
||||
f"Starting accessible video re-render for job {job_id}/{language}: "
|
||||
f"regenerate={regenerate_cue_indices}, whisper_refine={whisper_refine}"
|
||||
)
|
||||
|
||||
try:
|
||||
result = asyncio.run(_async_rerender_accessible_video(
|
||||
job_id, language, regenerate_cue_indices, whisper_refine
|
||||
))
|
||||
logger.info(f"Accessible video re-render completed for job {job_id}/{language}")
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Accessible video re-render failed for job {job_id}/{language}: {e}")
|
||||
import traceback
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
|
||||
# Update job status back to PENDING_QC with error
|
||||
asyncio.run(_mark_rerender_failed(job_id, language, str(e)))
|
||||
raise
|
||||
|
||||
|
||||
async def _mark_rerender_failed(job_id: str, language: str, error_message: str):
|
||||
"""Mark re-render as failed and return to PENDING_QC."""
|
||||
client = AsyncIOMotorClient(settings.mongodb_uri)
|
||||
db = client[settings.mongodb_db]
|
||||
|
||||
try:
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": JobStatus.PENDING_QC.value,
|
||||
f"outputs.{language}.accessible_video_edit_state.last_render_error": error_message,
|
||||
"updated_at": datetime.utcnow()
|
||||
},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": datetime.utcnow(),
|
||||
"status": JobStatus.PENDING_QC.value,
|
||||
"by": "system",
|
||||
"notes": f"Re-render failed for {language}: {error_message[:200]}"
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
job_doc = await db.jobs.find_one({"_id": job_id})
|
||||
broadcast_status_update(
|
||||
job_id,
|
||||
JobStatus.PENDING_QC.value,
|
||||
job_title=job_doc.get("title") if job_doc else None,
|
||||
message=f"Re-render failed: {error_message[:100]}"
|
||||
)
|
||||
finally:
|
||||
client.close()
|
||||
|
||||
|
||||
async def _async_rerender_accessible_video(
|
||||
job_id: str,
|
||||
language: str,
|
||||
regenerate_cue_indices: list[int],
|
||||
whisper_refine: bool
|
||||
):
|
||||
"""Async implementation of accessible video re-rendering."""
|
||||
logger.info(f"Async re-render started for job {job_id}/{language}")
|
||||
|
||||
client = AsyncIOMotorClient(settings.mongodb_uri)
|
||||
db = client[settings.mongodb_db]
|
||||
|
||||
try:
|
||||
# Get job details
|
||||
job_doc = await db.jobs.find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise ValueError(f"Job {job_id} not found")
|
||||
|
||||
job_title = job_doc.get("title", "Untitled Job")
|
||||
lang_output = job_doc.get("outputs", {}).get(language)
|
||||
if not lang_output:
|
||||
raise ValueError(f"No outputs found for language {language}")
|
||||
|
||||
edit_state = lang_output.get("accessible_video_edit_state")
|
||||
if not edit_state:
|
||||
raise ValueError(f"No edit state found for language {language}")
|
||||
|
||||
# Use TMPDIR env var if set
|
||||
temp_base = os.environ.get('TMPDIR', None)
|
||||
with tempfile.TemporaryDirectory(dir=temp_base) as temp_dir:
|
||||
# 1. Download source video
|
||||
source_video_gcs = job_doc["source"]["gcs_uri"]
|
||||
source_blob_path = source_video_gcs.replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
source_video_path = os.path.join(temp_dir, "source.mp4")
|
||||
|
||||
logger.info(f"Downloading source video from {source_blob_path}")
|
||||
source_blob = gcs_service.bucket.blob(source_blob_path)
|
||||
source_blob.download_to_filename(source_video_path)
|
||||
|
||||
# 2. Regenerate TTS for queued cues (if any)
|
||||
if regenerate_cue_indices:
|
||||
logger.info(f"Regenerating TTS for cues: {regenerate_cue_indices}")
|
||||
await _regenerate_tts_cues(
|
||||
job_id, language, regenerate_cue_indices, job_doc, db, temp_dir
|
||||
)
|
||||
|
||||
# Clear regeneration queue after successful synthesis
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
f"outputs.{language}.accessible_video_edit_state.tts_regeneration_queue": [],
|
||||
"updated_at": datetime.utcnow()
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# 3. Download AD VTT and per-cue MP3s
|
||||
ad_vtt_gcs = lang_output.get("ad_vtt_gcs")
|
||||
if not ad_vtt_gcs:
|
||||
raise ValueError(f"No AD VTT found for language {language}")
|
||||
|
||||
ad_blob_path = ad_vtt_gcs.replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
ad_blob = gcs_service.bucket.blob(ad_blob_path)
|
||||
ad_vtt_content = ad_blob.download_as_text()
|
||||
|
||||
# Download per-cue MP3s
|
||||
ad_cues_prefix = lang_output.get("ad_cues_gcs_prefix")
|
||||
if not ad_cues_prefix:
|
||||
raise ValueError(f"No AD cue segments found for language {language}")
|
||||
|
||||
ad_segments = []
|
||||
cue_durations = []
|
||||
|
||||
prefix_path = ad_cues_prefix.replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
blobs = list(gcs_service.bucket.list_blobs(prefix=prefix_path))
|
||||
|
||||
cue_blobs = [(b, int(b.name.split("_")[-1].replace(".mp3", ""))) for b in blobs if b.name.endswith(".mp3")]
|
||||
cue_blobs.sort(key=lambda x: x[1])
|
||||
|
||||
for blob, cue_index in cue_blobs:
|
||||
local_path = os.path.join(temp_dir, f"cue_{cue_index}.mp3")
|
||||
blob.download_to_filename(local_path)
|
||||
ad_segments.append((cue_index, local_path))
|
||||
|
||||
audio = AudioSegment.from_mp3(local_path)
|
||||
duration = len(audio) / 1000.0
|
||||
cue_durations.append(duration)
|
||||
|
||||
logger.info(f"Downloaded {len(ad_segments)} AD cue segments")
|
||||
|
||||
# 4. Build placements with adjusted pause points
|
||||
method = lang_output.get("accessible_video_method", "pause_insert")
|
||||
pause_points = edit_state.get("pause_points", [])
|
||||
|
||||
placements = _build_placements_with_adjustments(
|
||||
ad_vtt_content, cue_durations, pause_points
|
||||
)
|
||||
logger.info(f"Built {len(placements)} placements with adjusted pause points")
|
||||
|
||||
analysis = {
|
||||
"method": method,
|
||||
"method_rationale": "QC re-render with user adjustments",
|
||||
"placements": placements,
|
||||
"total_added_duration": sum(cue_durations) if method == "pause_insert" else 0,
|
||||
"warnings": []
|
||||
}
|
||||
|
||||
# 5. Optionally run Whisper refinement
|
||||
if whisper_refine and method == "pause_insert":
|
||||
logger.info("Running Whisper pause point refinement...")
|
||||
analysis, whisper_warnings = await _refine_pause_points_for_rerender(
|
||||
job_id, source_video_path, analysis, db, temp_dir
|
||||
)
|
||||
if whisper_warnings:
|
||||
analysis["warnings"] = analysis.get("warnings", []) + whisper_warnings
|
||||
logger.info(f"Whisper refinement complete with {len(whisper_warnings)} warnings")
|
||||
|
||||
# 6. Render accessible video (persist segments again for future edits)
|
||||
output_video_path = os.path.join(temp_dir, "accessible_video.mp4")
|
||||
gcs_segment_prefix = f"{job_id}/{language}/segments/"
|
||||
|
||||
logger.info(f"Re-rendering accessible video using {method} method...")
|
||||
rendered_path, updated_placements, segment_metadata, new_pause_points = await video_renderer_service.render_accessible_video(
|
||||
source_video_path,
|
||||
ad_segments,
|
||||
analysis,
|
||||
output_video_path,
|
||||
persist_segments=True,
|
||||
gcs_segment_prefix=gcs_segment_prefix
|
||||
)
|
||||
|
||||
if updated_placements:
|
||||
analysis["placements"] = updated_placements
|
||||
|
||||
# 7. Upload rendered video
|
||||
video_blob_path = f"{job_id}/{language}/accessible_video.mp4"
|
||||
video_blob = gcs_service.bucket.blob(video_blob_path)
|
||||
video_blob.content_type = "video/mp4"
|
||||
video_blob.upload_from_filename(output_video_path)
|
||||
|
||||
video_gcs_uri = f"gs://{settings.gcs_bucket}/{video_blob_path}"
|
||||
logger.info(f"Uploaded re-rendered accessible video to {video_gcs_uri}")
|
||||
|
||||
# 8. Generate re-timed captions if pause-insert
|
||||
retimed_captions_gcs_uri = None
|
||||
if method == "pause_insert":
|
||||
captions_vtt_gcs = lang_output.get("captions_vtt_gcs")
|
||||
if captions_vtt_gcs:
|
||||
captions_blob_path = captions_vtt_gcs.replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
captions_blob = gcs_service.bucket.blob(captions_blob_path)
|
||||
original_captions_vtt = captions_blob.download_as_text()
|
||||
|
||||
retimed_captions = vtt_retimer_service.retime_for_pause_insert(
|
||||
original_captions_vtt, analysis
|
||||
)
|
||||
|
||||
retimed_blob_path = f"{job_id}/{language}/accessible_captions.vtt"
|
||||
retimed_blob = gcs_service.bucket.blob(retimed_blob_path)
|
||||
retimed_blob.content_type = "text/vtt"
|
||||
retimed_blob.upload_from_string(retimed_captions, content_type="text/vtt")
|
||||
|
||||
retimed_captions_gcs_uri = f"gs://{settings.gcs_bucket}/{retimed_blob_path}"
|
||||
logger.info(f"Uploaded re-timed captions to {retimed_captions_gcs_uri}")
|
||||
|
||||
# 9. Build new edit state
|
||||
new_edit_state = None
|
||||
if segment_metadata and new_pause_points:
|
||||
new_edit_state = AccessibleVideoEditState(
|
||||
pause_points=new_pause_points,
|
||||
video_segments=segment_metadata,
|
||||
tts_regeneration_queue=[],
|
||||
last_render_at=datetime.utcnow(),
|
||||
whisper_refine_enabled=whisper_refine
|
||||
)
|
||||
|
||||
# 10. Update job document
|
||||
update_fields = {
|
||||
f"outputs.{language}.accessible_video_gcs": video_gcs_uri,
|
||||
f"outputs.{language}.video_segments_gcs_prefix": f"gs://{settings.gcs_bucket}/{gcs_segment_prefix}",
|
||||
"status": JobStatus.PENDING_QC.value,
|
||||
"updated_at": datetime.utcnow()
|
||||
}
|
||||
|
||||
if retimed_captions_gcs_uri:
|
||||
update_fields[f"outputs.{language}.retimed_captions_vtt_gcs"] = retimed_captions_gcs_uri
|
||||
|
||||
if new_edit_state:
|
||||
update_fields[f"outputs.{language}.accessible_video_edit_state"] = new_edit_state.model_dump()
|
||||
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": update_fields,
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": datetime.utcnow(),
|
||||
"status": JobStatus.PENDING_QC.value,
|
||||
"by": "system",
|
||||
"notes": f"Re-render complete for {language}"
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Broadcast completion
|
||||
broadcast_status_update(
|
||||
job_id,
|
||||
JobStatus.PENDING_QC.value,
|
||||
job_title=job_title,
|
||||
message=f"Accessible video re-render complete for {language.upper()}"
|
||||
)
|
||||
|
||||
logger.info(f"Accessible video re-render complete for job {job_id}/{language}")
|
||||
|
||||
finally:
|
||||
client.close()
|
||||
|
||||
|
||||
async def _regenerate_tts_cues(
|
||||
job_id: str,
|
||||
language: str,
|
||||
cue_indices: list[int],
|
||||
job_doc: dict,
|
||||
db,
|
||||
temp_dir: str
|
||||
):
|
||||
"""Regenerate TTS for specific cues using current VTT text."""
|
||||
logger.info(f"Regenerating TTS for {len(cue_indices)} cues")
|
||||
|
||||
# Get AD VTT content
|
||||
lang_output = job_doc.get("outputs", {}).get(language)
|
||||
ad_vtt_gcs = lang_output.get("ad_vtt_gcs")
|
||||
|
||||
ad_blob_path = ad_vtt_gcs.replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
ad_blob = gcs_service.bucket.blob(ad_blob_path)
|
||||
ad_vtt_content = ad_blob.download_as_text()
|
||||
|
||||
# Parse cues
|
||||
cues = parse_ad_cues(ad_vtt_content)
|
||||
|
||||
# Get TTS preferences
|
||||
tts_preferences = job_doc["requested_outputs"].get("tts_preferences", {})
|
||||
voices_per_language = tts_preferences.get("voices_per_language", {})
|
||||
voice_name = voices_per_language.get(language, tts_preferences.get("default_voice"))
|
||||
provider = tts_preferences.get("provider", "gemini")
|
||||
model = tts_preferences.get("model", "flash")
|
||||
speed = tts_preferences.get("speed", 1.0)
|
||||
style_preset = tts_preferences.get("style_preset", "neutral")
|
||||
custom_style_prompt = tts_preferences.get("custom_style_prompt")
|
||||
|
||||
if style_preset == "custom" and custom_style_prompt:
|
||||
style_prompt = custom_style_prompt
|
||||
else:
|
||||
style_prompt = settings.gemini_tts_style_prompts.get(style_preset, "")
|
||||
|
||||
# Synthesize each cue
|
||||
for cue_idx in cue_indices:
|
||||
if cue_idx >= len(cues):
|
||||
logger.warning(f"Cue index {cue_idx} out of range, skipping")
|
||||
continue
|
||||
|
||||
cue = cues[cue_idx]
|
||||
|
||||
logger.info(f"Synthesizing TTS for cue {cue_idx}: '{cue['text'][:50]}...'")
|
||||
|
||||
# Dispatch synthesis task
|
||||
task_result = synthesize_cue_task.apply_async(
|
||||
kwargs={
|
||||
"job_id": job_id,
|
||||
"language": language,
|
||||
"cue_index": cue_idx,
|
||||
"text": cue["text"],
|
||||
"start_time": cue["start_time"],
|
||||
"end_time": cue["end_time"],
|
||||
"voice_name": voice_name,
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"speed": speed,
|
||||
"style_prompt": style_prompt
|
||||
},
|
||||
queue="tts"
|
||||
)
|
||||
|
||||
# Wait for completion
|
||||
poll_count = 0
|
||||
while not task_result.ready():
|
||||
await asyncio.sleep(1.0)
|
||||
poll_count += 1
|
||||
if poll_count % 30 == 0:
|
||||
logger.info(f"Still waiting for TTS cue {cue_idx}...")
|
||||
|
||||
with allow_join_result():
|
||||
result = task_result.get(timeout=120)
|
||||
|
||||
if not result.get("success"):
|
||||
raise Exception(f"TTS synthesis failed for cue {cue_idx}: {result.get('error_message')}")
|
||||
|
||||
logger.info(f"TTS synthesis complete for cue {cue_idx}")
|
||||
|
||||
logger.info(f"All {len(cue_indices)} TTS cues regenerated")
|
||||
|
||||
|
||||
def _build_placements_with_adjustments(
|
||||
ad_vtt_content: str,
|
||||
cue_durations: list[float],
|
||||
pause_points: list[dict]
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Build placement instructions using adjusted pause points from QC edits.
|
||||
|
||||
Args:
|
||||
ad_vtt_content: AD VTT content
|
||||
cue_durations: TTS durations per cue
|
||||
pause_points: Pause point data with original and adjusted values
|
||||
|
||||
Returns:
|
||||
List of placement dicts
|
||||
"""
|
||||
cues = VTTParser.parse(ad_vtt_content)
|
||||
|
||||
# Build lookup of adjusted pause points by cue index
|
||||
adjusted_pause_by_cue = {}
|
||||
for pp in pause_points:
|
||||
cue_idx = pp.get("cue_index")
|
||||
adjusted = pp.get("adjusted_ms")
|
||||
original = pp.get("original_ms")
|
||||
# Use adjusted if set, otherwise original (in seconds)
|
||||
pause_time_s = (adjusted if adjusted is not None else original) / 1000.0
|
||||
adjusted_pause_by_cue[cue_idx] = pause_time_s
|
||||
|
||||
placements = []
|
||||
for i, cue in enumerate(cues):
|
||||
if i >= len(cue_durations):
|
||||
break
|
||||
|
||||
# Get pause point: use adjusted value if available
|
||||
pause_point = adjusted_pause_by_cue.get(i, cue.start_time)
|
||||
|
||||
placements.append({
|
||||
"ad_cue_index": i,
|
||||
"original_start_time": cue.start_time,
|
||||
"original_end_time": cue.end_time,
|
||||
"target_start_time": cue.start_time,
|
||||
"ad_duration": cue_durations[i],
|
||||
"pause_point": pause_point,
|
||||
"resume_from": pause_point,
|
||||
"pause_point_rationale": "User-adjusted during QC" if i in adjusted_pause_by_cue else "Original from VTT"
|
||||
})
|
||||
|
||||
return placements
|
||||
|
||||
|
||||
async def _refine_pause_points_for_rerender(
|
||||
job_id: str,
|
||||
video_path: str,
|
||||
analysis: dict,
|
||||
db,
|
||||
temp_dir: str
|
||||
) -> tuple[dict, list[str]]:
|
||||
"""Run Whisper pause point refinement for re-render."""
|
||||
logger.info(f"Refining pause points with Whisper for re-render of job {job_id}")
|
||||
|
||||
audio_path = os.path.join(temp_dir, "source_audio.mp3")
|
||||
await _extract_audio_for_whisper(video_path, audio_path)
|
||||
|
||||
try:
|
||||
words = await _dispatch_whisper_transcription(job_id, audio_path)
|
||||
except Exception as e:
|
||||
logger.error(f"Whisper transcription failed: {e}")
|
||||
return analysis, [f"Whisper failed: {str(e)} - using current timestamps"]
|
||||
|
||||
if not words:
|
||||
return analysis, ["No speech detected - using current timestamps"]
|
||||
|
||||
gaps = whisper_service.identify_speech_gaps(words)
|
||||
|
||||
refined_placements, warnings = whisper_service.refine_all_pause_points(
|
||||
analysis.get("placements", []),
|
||||
words,
|
||||
gaps
|
||||
)
|
||||
|
||||
refined_analysis = analysis.copy()
|
||||
refined_analysis["placements"] = refined_placements
|
||||
refined_analysis["whisper_refined"] = True
|
||||
|
||||
return refined_analysis, warnings
|
||||
|
|
@ -139,9 +139,16 @@ async def _async_translate_and_synthesize(job_id: str):
|
|||
job_title = job_doc.get("title", "Untitled Job")
|
||||
logger.info(f"✅ Found job document for {job_id} ({job_title}), status: {job_doc.get('status', 'UNKNOWN')}")
|
||||
|
||||
# Check for any approved status (English or non-English source)
|
||||
if not JobStatus.is_approved(job_doc["status"]):
|
||||
logger.warning(f"⚠️ Job {job_id} not in approved status (current: {job_doc['status']}), skipping")
|
||||
# Check for valid status to process translation
|
||||
# Valid statuses: approved_english, approved_source (legacy), or translating (new workflow)
|
||||
current_status = job_doc["status"]
|
||||
valid_statuses = [
|
||||
JobStatus.APPROVED_ENGLISH.value,
|
||||
JobStatus.APPROVED_SOURCE.value,
|
||||
JobStatus.TRANSLATING.value,
|
||||
]
|
||||
if current_status not in valid_statuses:
|
||||
logger.warning(f"⚠️ Job {job_id} not in valid status for translation (current: {current_status}), skipping")
|
||||
return
|
||||
|
||||
# Get source language from job
|
||||
|
|
@ -389,20 +396,21 @@ async def _async_translate_and_synthesize(job_id: str):
|
|||
)
|
||||
|
||||
# Update final status
|
||||
# NEW WORKFLOW: Translation pipeline now ends at PENDING_QC for QC review
|
||||
# If accessible video is requested, the render task will handle the transition
|
||||
# to PENDING_FINAL_REVIEW when all videos are complete
|
||||
# to PENDING_QC when all videos are complete
|
||||
if not accessible_video_requested:
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": JobStatus.PENDING_FINAL_REVIEW.value,
|
||||
"status": JobStatus.PENDING_QC.value,
|
||||
"updated_at": datetime.utcnow()
|
||||
},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": datetime.utcnow(),
|
||||
"status": JobStatus.PENDING_FINAL_REVIEW.value,
|
||||
"status": JobStatus.PENDING_QC.value,
|
||||
"by": "system"
|
||||
}
|
||||
}
|
||||
|
|
@ -412,13 +420,13 @@ async def _async_translate_and_synthesize(job_id: str):
|
|||
# Broadcast status update
|
||||
broadcast_status_update(
|
||||
job_id,
|
||||
JobStatus.PENDING_FINAL_REVIEW.value,
|
||||
JobStatus.PENDING_QC.value,
|
||||
job_title=job_title,
|
||||
message=f"{job_title} has finished translation and audio generation - ready for Final Review"
|
||||
message=f"{job_title} has finished translation and audio generation - ready for QC Review"
|
||||
)
|
||||
else:
|
||||
# When accessible video is requested, stay in TTS_GENERATING
|
||||
# The render_accessible_video task will transition to PENDING_FINAL_REVIEW
|
||||
# The render_accessible_video task will transition to PENDING_QC when all videos complete
|
||||
logger.info(
|
||||
f"Accessible video rendering triggered for job {job_id}. "
|
||||
f"Staying in TTS_GENERATING until all videos are complete."
|
||||
|
|
|
|||
112
frontend/src/components/RerenderControls.tsx
Normal file
112
frontend/src/components/RerenderControls.tsx
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
import { useState } from 'react';
|
||||
|
||||
interface RerenderControlsProps {
|
||||
pendingRegenerations: number[];
|
||||
pausePointsModified: boolean;
|
||||
isRendering: boolean;
|
||||
onRender: (options: { whisperRefine: boolean }) => void;
|
||||
onClearQueue: () => void;
|
||||
}
|
||||
|
||||
export function RerenderControls({
|
||||
pendingRegenerations,
|
||||
pausePointsModified,
|
||||
isRendering,
|
||||
onRender,
|
||||
onClearQueue,
|
||||
}: RerenderControlsProps) {
|
||||
const [whisperRefine, setWhisperRefine] = useState(false);
|
||||
|
||||
const hasChanges = pendingRegenerations.length > 0 || pausePointsModified;
|
||||
|
||||
return (
|
||||
<div className="bg-purple-50 border border-purple-200 rounded-lg p-4">
|
||||
<div className="flex items-start justify-between gap-4">
|
||||
<div className="flex-1">
|
||||
<h4 className="text-sm font-semibold text-purple-900">
|
||||
Render Accessible Video Changes
|
||||
</h4>
|
||||
|
||||
{pendingRegenerations.length > 0 && (
|
||||
<div className="mt-2 flex items-center gap-2">
|
||||
<span className="text-sm text-purple-700">
|
||||
{pendingRegenerations.length} TTS cue(s) queued for regeneration
|
||||
</span>
|
||||
<button
|
||||
onClick={onClearQueue}
|
||||
className="text-xs text-purple-600 hover:text-purple-800 underline"
|
||||
>
|
||||
Clear queue
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{pausePointsModified && (
|
||||
<p className="mt-1 text-sm text-purple-700">
|
||||
Pause points have been adjusted
|
||||
</p>
|
||||
)}
|
||||
|
||||
{!hasChanges && (
|
||||
<p className="mt-1 text-sm text-gray-500">
|
||||
No changes to render. Adjust pause points or queue TTS regenerations.
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col items-end gap-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<label className="flex items-center text-sm">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={whisperRefine}
|
||||
onChange={(e) => setWhisperRefine(e.target.checked)}
|
||||
className="mr-2 rounded"
|
||||
disabled={isRendering}
|
||||
/>
|
||||
Run Whisper pause refinement
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<p className="text-xs text-gray-500 max-w-xs text-right">
|
||||
Enable if you changed the number or position of AD cues. Skip if you only adjusted pause timing.
|
||||
</p>
|
||||
|
||||
<button
|
||||
onClick={() => onRender({ whisperRefine })}
|
||||
disabled={!hasChanges || isRendering}
|
||||
className={`px-4 py-2 text-sm font-medium rounded transition-colors ${
|
||||
!hasChanges || isRendering
|
||||
? 'bg-gray-200 text-gray-500 cursor-not-allowed'
|
||||
: 'bg-purple-600 text-white hover:bg-purple-700'
|
||||
}`}
|
||||
>
|
||||
{isRendering ? (
|
||||
<span className="flex items-center gap-2">
|
||||
<svg className="animate-spin h-4 w-4" viewBox="0 0 24 24">
|
||||
<circle
|
||||
className="opacity-25"
|
||||
cx="12"
|
||||
cy="12"
|
||||
r="10"
|
||||
stroke="currentColor"
|
||||
strokeWidth="4"
|
||||
fill="none"
|
||||
/>
|
||||
<path
|
||||
className="opacity-75"
|
||||
fill="currentColor"
|
||||
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
|
||||
/>
|
||||
</svg>
|
||||
Rendering...
|
||||
</span>
|
||||
) : (
|
||||
'Render Changes'
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
165
frontend/src/components/TimelinePreview/PausePointEditor.tsx
Normal file
165
frontend/src/components/TimelinePreview/PausePointEditor.tsx
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
import { useState, useEffect, useRef } from 'react';
|
||||
import type { PausePointData } from '../../types/api';
|
||||
|
||||
interface PausePointEditorProps {
|
||||
pausePoint: PausePointData;
|
||||
position: { x: number; y: number };
|
||||
onSave: (adjustedMs: number) => void;
|
||||
onCancel: () => void;
|
||||
onRegenerateTTS: () => void;
|
||||
isRegenerationQueued: boolean;
|
||||
}
|
||||
|
||||
export function PausePointEditor({
|
||||
pausePoint,
|
||||
position,
|
||||
onSave,
|
||||
onCancel,
|
||||
onRegenerateTTS,
|
||||
isRegenerationQueued,
|
||||
}: PausePointEditorProps) {
|
||||
const effectiveMs = pausePoint.adjusted_ms ?? pausePoint.original_ms;
|
||||
const [value, setValue] = useState(effectiveMs);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
setValue(pausePoint.adjusted_ms ?? pausePoint.original_ms);
|
||||
}, [pausePoint]);
|
||||
|
||||
useEffect(() => {
|
||||
// Focus input on mount
|
||||
inputRef.current?.focus();
|
||||
inputRef.current?.select();
|
||||
}, []);
|
||||
|
||||
const validateValue = (ms: number): string | null => {
|
||||
if (ms < pausePoint.min_bound_ms) {
|
||||
return `Cannot be earlier than ${formatMs(pausePoint.min_bound_ms)}`;
|
||||
}
|
||||
if (ms > pausePoint.max_bound_ms) {
|
||||
return `Cannot be later than ${formatMs(pausePoint.max_bound_ms)}`;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const handleValueChange = (newValue: number) => {
|
||||
setValue(newValue);
|
||||
setError(validateValue(newValue));
|
||||
};
|
||||
|
||||
const handleSave = () => {
|
||||
const validationError = validateValue(value);
|
||||
if (validationError) {
|
||||
setError(validationError);
|
||||
return;
|
||||
}
|
||||
onSave(value);
|
||||
};
|
||||
|
||||
const handleKeyDown = (e: React.KeyboardEvent) => {
|
||||
if (e.key === 'Enter') {
|
||||
handleSave();
|
||||
} else if (e.key === 'Escape') {
|
||||
onCancel();
|
||||
}
|
||||
};
|
||||
|
||||
const formatMs = (ms: number) => {
|
||||
const totalSeconds = Math.floor(ms / 1000);
|
||||
const minutes = Math.floor(totalSeconds / 60);
|
||||
const seconds = totalSeconds % 60;
|
||||
const milliseconds = Math.floor(ms % 1000);
|
||||
return `${minutes}:${seconds.toString().padStart(2, '0')}.${milliseconds.toString().padStart(3, '0')}`;
|
||||
};
|
||||
|
||||
return (
|
||||
<div
|
||||
className="fixed z-50 bg-white border border-gray-200 rounded-lg shadow-lg p-4 min-w-64"
|
||||
style={{ left: position.x, top: position.y }}
|
||||
>
|
||||
<div className="flex justify-between items-center mb-3">
|
||||
<h4 className="text-sm font-semibold text-gray-900">
|
||||
Pause Point {pausePoint.cue_index}
|
||||
</h4>
|
||||
<button
|
||||
onClick={onCancel}
|
||||
className="text-gray-400 hover:text-gray-600"
|
||||
>
|
||||
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className="space-y-3">
|
||||
{/* Current value input */}
|
||||
<div>
|
||||
<label className="block text-xs text-gray-500 mb-1">
|
||||
Pause time (milliseconds)
|
||||
</label>
|
||||
<div className="flex items-center gap-2">
|
||||
<input
|
||||
ref={inputRef}
|
||||
type="number"
|
||||
value={value}
|
||||
onChange={(e) => handleValueChange(Number(e.target.value))}
|
||||
onKeyDown={handleKeyDown}
|
||||
step="1"
|
||||
className={`flex-1 px-2 py-1 text-sm border rounded ${
|
||||
error ? 'border-red-500' : 'border-gray-300'
|
||||
} focus:outline-none focus:ring-2 focus:ring-blue-500`}
|
||||
/>
|
||||
<span className="text-xs text-gray-500">{formatMs(value)}</span>
|
||||
</div>
|
||||
{error && (
|
||||
<p className="text-xs text-red-500 mt-1">{error}</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Bounds info */}
|
||||
<div className="text-xs text-gray-500 bg-gray-50 rounded p-2">
|
||||
<div>Min: {formatMs(pausePoint.min_bound_ms)}</div>
|
||||
<div>Max: {formatMs(pausePoint.max_bound_ms)}</div>
|
||||
<div>Original: {formatMs(pausePoint.original_ms)}</div>
|
||||
</div>
|
||||
|
||||
{/* TTS Regeneration */}
|
||||
<div className="border-t pt-3">
|
||||
<button
|
||||
onClick={onRegenerateTTS}
|
||||
disabled={isRegenerationQueued}
|
||||
className={`w-full text-sm py-1.5 rounded ${
|
||||
isRegenerationQueued
|
||||
? 'bg-amber-100 text-amber-700 cursor-not-allowed'
|
||||
: 'bg-orange-100 text-orange-700 hover:bg-orange-200'
|
||||
}`}
|
||||
>
|
||||
{isRegenerationQueued ? 'Regeneration Queued' : 'Queue TTS Regeneration'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={onCancel}
|
||||
className="flex-1 px-3 py-1.5 text-sm border border-gray-300 rounded hover:bg-gray-50"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
onClick={handleSave}
|
||||
disabled={!!error}
|
||||
className={`flex-1 px-3 py-1.5 text-sm text-white rounded ${
|
||||
error
|
||||
? 'bg-gray-300 cursor-not-allowed'
|
||||
: 'bg-blue-600 hover:bg-blue-700'
|
||||
}`}
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
201
frontend/src/components/TimelinePreview/TimelinePreview.tsx
Normal file
201
frontend/src/components/TimelinePreview/TimelinePreview.tsx
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
import { useState, useRef, useCallback } from 'react';
|
||||
import type { VideoSegmentMetadata, PausePointData } from '../../types/api';
|
||||
import { PausePointEditor } from './PausePointEditor';
|
||||
|
||||
interface TimelinePreviewProps {
|
||||
segments: VideoSegmentMetadata[];
|
||||
pausePoints: PausePointData[];
|
||||
totalDurationMs: number;
|
||||
currentTimeMs: number;
|
||||
onSegmentClick: (segment: VideoSegmentMetadata) => void;
|
||||
onPausePointClick: (pausePoint: PausePointData) => void;
|
||||
onPausePointUpdate: (cueIndex: number, adjustedMs: number) => void;
|
||||
onRegenerateTTS: (cueIndex: number) => void;
|
||||
regenerationQueue: number[];
|
||||
}
|
||||
|
||||
export function TimelinePreview({
|
||||
segments,
|
||||
pausePoints,
|
||||
totalDurationMs,
|
||||
currentTimeMs,
|
||||
onSegmentClick,
|
||||
onPausePointClick,
|
||||
onPausePointUpdate,
|
||||
onRegenerateTTS,
|
||||
regenerationQueue,
|
||||
}: TimelinePreviewProps) {
|
||||
const [selectedPausePoint, setSelectedPausePoint] = useState<PausePointData | null>(null);
|
||||
const [editorPosition, setEditorPosition] = useState({ x: 0, y: 0 });
|
||||
const timelineRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
const getPositionPercent = useCallback(
|
||||
(ms: number) => (totalDurationMs > 0 ? (ms / totalDurationMs) * 100 : 0),
|
||||
[totalDurationMs]
|
||||
);
|
||||
|
||||
const handlePausePointMarkerClick = (
|
||||
e: React.MouseEvent,
|
||||
pausePoint: PausePointData
|
||||
) => {
|
||||
e.stopPropagation();
|
||||
const rect = (e.target as HTMLElement).getBoundingClientRect();
|
||||
setEditorPosition({ x: rect.left, y: rect.bottom + 8 });
|
||||
setSelectedPausePoint(pausePoint);
|
||||
onPausePointClick(pausePoint);
|
||||
};
|
||||
|
||||
const handleSegmentClick = (segment: VideoSegmentMetadata) => {
|
||||
onSegmentClick(segment);
|
||||
if (segment.is_freeze_frame && segment.cue_index !== null) {
|
||||
// Highlight the AD cue
|
||||
const pausePoint = pausePoints.find(pp => pp.cue_index === segment.cue_index);
|
||||
if (pausePoint) {
|
||||
onPausePointClick(pausePoint);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const handleEditorSave = (adjustedMs: number) => {
|
||||
if (selectedPausePoint) {
|
||||
onPausePointUpdate(selectedPausePoint.cue_index, adjustedMs);
|
||||
setSelectedPausePoint(null);
|
||||
}
|
||||
};
|
||||
|
||||
const handleEditorClose = () => {
|
||||
setSelectedPausePoint(null);
|
||||
};
|
||||
|
||||
const formatTime = (ms: number) => {
|
||||
const totalSeconds = Math.floor(ms / 1000);
|
||||
const minutes = Math.floor(totalSeconds / 60);
|
||||
const seconds = totalSeconds % 60;
|
||||
const milliseconds = Math.floor(ms % 1000);
|
||||
return `${minutes}:${seconds.toString().padStart(2, '0')}.${milliseconds.toString().padStart(3, '0')}`;
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="relative">
|
||||
{/* Timeline container */}
|
||||
<div
|
||||
ref={timelineRef}
|
||||
className="relative h-16 bg-gray-100 rounded-lg overflow-hidden"
|
||||
>
|
||||
{/* Segments */}
|
||||
{segments.map((segment) => {
|
||||
const leftPercent = getPositionPercent(segment.start_ms);
|
||||
const widthPercent = getPositionPercent(segment.duration_ms);
|
||||
const isRegenerationQueued =
|
||||
segment.is_freeze_frame &&
|
||||
segment.cue_index !== null &&
|
||||
regenerationQueue.includes(segment.cue_index);
|
||||
|
||||
return (
|
||||
<div
|
||||
key={segment.segment_index}
|
||||
className={`absolute top-0 h-full cursor-pointer transition-all hover:opacity-90 ${
|
||||
segment.is_freeze_frame
|
||||
? isRegenerationQueued
|
||||
? 'bg-amber-400'
|
||||
: 'bg-orange-400'
|
||||
: 'bg-blue-400'
|
||||
}`}
|
||||
style={{
|
||||
left: `${leftPercent}%`,
|
||||
width: `${Math.max(widthPercent, 0.5)}%`,
|
||||
}}
|
||||
onClick={() => handleSegmentClick(segment)}
|
||||
title={
|
||||
segment.is_freeze_frame
|
||||
? `AD Cue ${segment.cue_index}${isRegenerationQueued ? ' (Regenerate queued)' : ''}`
|
||||
: `Video segment ${segment.segment_index}`
|
||||
}
|
||||
>
|
||||
{/* Cue index label for freeze frames */}
|
||||
{segment.is_freeze_frame && segment.cue_index !== null && widthPercent > 2 && (
|
||||
<div className="absolute inset-0 flex items-center justify-center">
|
||||
<span className="text-xs font-bold text-white drop-shadow">
|
||||
{segment.cue_index}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
|
||||
{/* Pause point markers */}
|
||||
{pausePoints.map((pausePoint) => {
|
||||
const effectiveMs = pausePoint.adjusted_ms ?? pausePoint.original_ms;
|
||||
const leftPercent = getPositionPercent(effectiveMs);
|
||||
const isAdjusted = pausePoint.adjusted_ms !== null;
|
||||
|
||||
return (
|
||||
<div
|
||||
key={`pause-${pausePoint.cue_index}`}
|
||||
className={`absolute top-0 w-1 h-full cursor-pointer z-10 ${
|
||||
isAdjusted ? 'bg-purple-600' : 'bg-red-600'
|
||||
} hover:w-2 transition-all`}
|
||||
style={{ left: `${leftPercent}%` }}
|
||||
onClick={(e) => handlePausePointMarkerClick(e, pausePoint)}
|
||||
title={`Pause point ${pausePoint.cue_index}: ${formatTime(effectiveMs)}${
|
||||
isAdjusted ? ' (adjusted)' : ''
|
||||
}`}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
|
||||
{/* Current time indicator */}
|
||||
<div
|
||||
className="absolute top-0 w-0.5 h-full bg-green-500 z-20 pointer-events-none"
|
||||
style={{ left: `${getPositionPercent(currentTimeMs)}%` }}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Time labels */}
|
||||
<div className="flex justify-between mt-1 text-xs text-gray-500">
|
||||
<span>0:00</span>
|
||||
<span>{formatTime(totalDurationMs)}</span>
|
||||
</div>
|
||||
|
||||
{/* Legend */}
|
||||
<div className="flex gap-4 mt-2 text-xs">
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-3 h-3 bg-blue-400 rounded" />
|
||||
<span>Video</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-3 h-3 bg-orange-400 rounded" />
|
||||
<span>AD Audio</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-3 h-3 bg-amber-400 rounded" />
|
||||
<span>Regenerate Queued</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-1 h-3 bg-red-600" />
|
||||
<span>Pause Point</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-1 h-3 bg-purple-600" />
|
||||
<span>Adjusted</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Pause point editor popover */}
|
||||
{selectedPausePoint && (
|
||||
<PausePointEditor
|
||||
pausePoint={selectedPausePoint}
|
||||
position={editorPosition}
|
||||
onSave={handleEditorSave}
|
||||
onCancel={handleEditorClose}
|
||||
onRegenerateTTS={() => {
|
||||
onRegenerateTTS(selectedPausePoint.cue_index);
|
||||
setSelectedPausePoint(null);
|
||||
}}
|
||||
isRegenerationQueued={regenerationQueue.includes(selectedPausePoint.cue_index)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
2
frontend/src/components/TimelinePreview/index.ts
Normal file
2
frontend/src/components/TimelinePreview/index.ts
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
export { TimelinePreview } from './TimelinePreview';
|
||||
export { PausePointEditor } from './PausePointEditor';
|
||||
116
frontend/src/hooks/useAccessibleVideoEdit.ts
Normal file
116
frontend/src/hooks/useAccessibleVideoEdit.ts
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { apiClient } from '../lib/api';
|
||||
|
||||
/**
|
||||
* Hook to fetch accessible video edit state for QC review
|
||||
*/
|
||||
export function useAccessibleVideoEditState(jobId: string, language: string) {
|
||||
return useQuery({
|
||||
queryKey: ['jobs', jobId, 'accessible-video', language, 'edit-state'],
|
||||
queryFn: () => apiClient.getAccessibleVideoEditState(jobId, language),
|
||||
enabled: !!jobId && !!language,
|
||||
staleTime: 30000, // 30 seconds
|
||||
refetchOnWindowFocus: false,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook to update a pause point's adjusted timing
|
||||
*/
|
||||
export function useUpdatePausePoint() {
|
||||
const queryClient = useQueryClient();
|
||||
|
||||
return useMutation({
|
||||
mutationFn: ({
|
||||
jobId,
|
||||
language,
|
||||
cueIndex,
|
||||
adjustedMs,
|
||||
}: {
|
||||
jobId: string;
|
||||
language: string;
|
||||
cueIndex: number;
|
||||
adjustedMs: number;
|
||||
}) => apiClient.updatePausePoint(jobId, language, cueIndex, adjustedMs),
|
||||
onSuccess: (_, { jobId, language }) => {
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: ['jobs', jobId, 'accessible-video', language, 'edit-state'],
|
||||
});
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook to queue TTS regeneration for specific cues
|
||||
*/
|
||||
export function useQueueTTSRegeneration() {
|
||||
const queryClient = useQueryClient();
|
||||
|
||||
return useMutation({
|
||||
mutationFn: ({
|
||||
jobId,
|
||||
language,
|
||||
cueIndices,
|
||||
}: {
|
||||
jobId: string;
|
||||
language: string;
|
||||
cueIndices: number[];
|
||||
}) => apiClient.queueTTSRegeneration(jobId, language, cueIndices),
|
||||
onSuccess: (_, { jobId, language }) => {
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: ['jobs', jobId, 'accessible-video', language, 'edit-state'],
|
||||
});
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook to remove a cue from the TTS regeneration queue
|
||||
*/
|
||||
export function useRemoveTTSRegeneration() {
|
||||
const queryClient = useQueryClient();
|
||||
|
||||
return useMutation({
|
||||
mutationFn: ({
|
||||
jobId,
|
||||
language,
|
||||
cueIndex,
|
||||
}: {
|
||||
jobId: string;
|
||||
language: string;
|
||||
cueIndex: number;
|
||||
}) => apiClient.removeTTSRegeneration(jobId, language, cueIndex),
|
||||
onSuccess: (_, { jobId, language }) => {
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: ['jobs', jobId, 'accessible-video', language, 'edit-state'],
|
||||
});
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook to trigger accessible video re-render with QC changes
|
||||
*/
|
||||
export function useRerenderAccessibleVideo() {
|
||||
const queryClient = useQueryClient();
|
||||
|
||||
return useMutation({
|
||||
mutationFn: ({
|
||||
jobId,
|
||||
language,
|
||||
whisperRefine = false,
|
||||
}: {
|
||||
jobId: string;
|
||||
language: string;
|
||||
whisperRefine?: boolean;
|
||||
}) => apiClient.rerenderAccessibleVideo(jobId, language, whisperRefine),
|
||||
onSuccess: (_, { jobId, language }) => {
|
||||
// Invalidate job and edit state queries
|
||||
queryClient.invalidateQueries({ queryKey: ['jobs', jobId] });
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: ['jobs', jobId, 'accessible-video', language, 'edit-state'],
|
||||
});
|
||||
queryClient.invalidateQueries({ queryKey: ['jobs'] });
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
@ -33,6 +33,8 @@ import type {
|
|||
ReviewNoteCreateRequest,
|
||||
ReviewNoteUpdateRequest,
|
||||
ReviewNotesListResponse,
|
||||
AccessibleVideoEditState,
|
||||
PausePointData,
|
||||
} from '../types/api';
|
||||
|
||||
const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000';
|
||||
|
|
@ -386,6 +388,60 @@ class ApiClient {
|
|||
async deleteReviewNote(jobId: string, noteId: string): Promise<void> {
|
||||
await this.client.delete(`/jobs/${jobId}/review-notes/${noteId}`);
|
||||
}
|
||||
|
||||
// Accessible Video QC Editing endpoints
|
||||
async getAccessibleVideoEditState(jobId: string, language: string): Promise<AccessibleVideoEditState> {
|
||||
const response = await this.client.get(`/jobs/${jobId}/accessible-video/${language}/edit-state`);
|
||||
return response.data;
|
||||
}
|
||||
|
||||
async updatePausePoint(
|
||||
jobId: string,
|
||||
language: string,
|
||||
cueIndex: number,
|
||||
adjustedMs: number
|
||||
): Promise<PausePointData> {
|
||||
const response = await this.client.patch(
|
||||
`/jobs/${jobId}/accessible-video/${language}/pause-points/${cueIndex}`,
|
||||
{ adjusted_ms: adjustedMs }
|
||||
);
|
||||
return response.data;
|
||||
}
|
||||
|
||||
async queueTTSRegeneration(
|
||||
jobId: string,
|
||||
language: string,
|
||||
cueIndices: number[]
|
||||
): Promise<{ message: string; queued_cues: number[] }> {
|
||||
const response = await this.client.post(
|
||||
`/jobs/${jobId}/accessible-video/${language}/tts-regeneration`,
|
||||
{ cue_indices: cueIndices }
|
||||
);
|
||||
return response.data;
|
||||
}
|
||||
|
||||
async removeTTSRegeneration(
|
||||
jobId: string,
|
||||
language: string,
|
||||
cueIndex: number
|
||||
): Promise<{ message: string }> {
|
||||
const response = await this.client.delete(
|
||||
`/jobs/${jobId}/accessible-video/${language}/tts-regeneration/${cueIndex}`
|
||||
);
|
||||
return response.data;
|
||||
}
|
||||
|
||||
async rerenderAccessibleVideo(
|
||||
jobId: string,
|
||||
language: string,
|
||||
whisperRefine: boolean = false
|
||||
): Promise<Job> {
|
||||
const response = await this.client.post(
|
||||
`/jobs/${jobId}/accessible-video/${language}/re-render`,
|
||||
{ whisper_refine: whisperRefine }
|
||||
);
|
||||
return response.data;
|
||||
}
|
||||
}
|
||||
|
||||
export const apiClient = new ApiClient();
|
||||
|
|
|
|||
|
|
@ -1,12 +1,21 @@
|
|||
import { useState, useEffect } from 'react';
|
||||
import { useState, useEffect, useRef } from 'react';
|
||||
import { useParams, useNavigate } from 'react-router-dom';
|
||||
import { useJob, useApproveEnglish, useRejectJob, useJobVttContent, useUpdateJobVtt, useJobDownloads, useAdjustVttTiming } from '../../hooks/useJob';
|
||||
import {
|
||||
useAccessibleVideoEditState,
|
||||
useUpdatePausePoint,
|
||||
useQueueTTSRegeneration,
|
||||
useRemoveTTSRegeneration,
|
||||
useRerenderAccessibleVideo,
|
||||
} from '../../hooks/useAccessibleVideoEdit';
|
||||
import { StatusBadge } from '../../components/StatusBadge';
|
||||
import { VttEditor } from '../../components/VttEditor/VttEditor';
|
||||
import { VideoWithCaptions } from '../../components/VideoWithCaptions';
|
||||
import { VoiceSelector } from '../../components/VoiceSelector';
|
||||
import { TimelinePreview } from '../../components/TimelinePreview';
|
||||
import { RerenderControls } from '../../components/RerenderControls';
|
||||
import { useToastContext } from '../../contexts/ToastContext';
|
||||
import type { TTSPreferences, AccessibleVideoMethod } from '../../types/api';
|
||||
import type { TTSPreferences, AccessibleVideoMethod, VideoSegmentMetadata, PausePointData } from '../../types/api';
|
||||
|
||||
export function QCDetail() {
|
||||
const { id } = useParams<{ id: string }>();
|
||||
|
|
@ -15,13 +24,42 @@ export function QCDetail() {
|
|||
const { data: job, isLoading, error } = useJob(id!);
|
||||
// Get source language from job (default to 'en' for backwards compatibility)
|
||||
const sourceLanguage = job?.source?.language || 'en';
|
||||
const { data: vttContent, isLoading: vttLoading } = useJobVttContent(id!, sourceLanguage);
|
||||
|
||||
// Get all available languages from outputs (after workflow change, translations happen before QC)
|
||||
const availableLanguages = job?.outputs ? Object.keys(job.outputs) : [sourceLanguage];
|
||||
|
||||
// Language selection for QC review
|
||||
const [selectedLanguage, setSelectedLanguage] = useState<string>(sourceLanguage);
|
||||
|
||||
// Video mode: original video or accessible video with AD
|
||||
const [videoMode, setVideoMode] = useState<'original' | 'accessible'>('original');
|
||||
|
||||
// Track video current time for timeline
|
||||
const [currentVideoTimeMs, setCurrentVideoTimeMs] = useState(0);
|
||||
const videoRef = useRef<HTMLVideoElement | null>(null);
|
||||
|
||||
// Local state for pending regenerations (cleared on successful re-render)
|
||||
const [pendingRegenerations, setPendingRegenerations] = useState<number[]>([]);
|
||||
const [pausePointsModified, setPausePointsModified] = useState(false);
|
||||
|
||||
// Fetch VTT content for selected language
|
||||
const { data: vttContent, isLoading: vttLoading } = useJobVttContent(id!, selectedLanguage);
|
||||
const { data: downloads } = useJobDownloads(id!);
|
||||
|
||||
// Fetch accessible video edit state for selected language
|
||||
const { data: editState, isLoading: editStateLoading } = useAccessibleVideoEditState(id!, selectedLanguage);
|
||||
|
||||
// Mutations for accessible video editing
|
||||
const updatePausePointMutation = useUpdatePausePoint();
|
||||
const queueTTSRegenerationMutation = useQueueTTSRegeneration();
|
||||
const removeTTSRegenerationMutation = useRemoveTTSRegeneration();
|
||||
const rerenderMutation = useRerenderAccessibleVideo();
|
||||
|
||||
const approveEnglishMutation = useApproveEnglish();
|
||||
const rejectJobMutation = useRejectJob();
|
||||
const updateVttMutation = useUpdateJobVtt();
|
||||
const adjustTimingMutation = useAdjustVttTiming();
|
||||
|
||||
|
||||
const [reviewNotes, setReviewNotes] = useState('');
|
||||
const [showRejectForm, setShowRejectForm] = useState(false);
|
||||
const [captionsVtt, setCaptionsVtt] = useState('');
|
||||
|
|
@ -44,13 +82,44 @@ export function QCDetail() {
|
|||
});
|
||||
const [accessibleVideoMethod, setAccessibleVideoMethod] = useState<AccessibleVideoMethod>('pause_insert');
|
||||
|
||||
const isProcessing = approveEnglishMutation.isPending || rejectJobMutation.isPending || updateVttMutation.isPending || adjustTimingMutation.isPending;
|
||||
const isProcessing = approveEnglishMutation.isPending || rejectJobMutation.isPending || updateVttMutation.isPending || adjustTimingMutation.isPending || rerenderMutation.isPending;
|
||||
const isRendering = rerenderMutation.isPending || job?.status === 'rendering_qc';
|
||||
|
||||
// Get video URL from downloads
|
||||
// Initialize selected language from source language when job loads
|
||||
useEffect(() => {
|
||||
if (sourceLanguage) {
|
||||
setSelectedLanguage(sourceLanguage);
|
||||
}
|
||||
}, [sourceLanguage]);
|
||||
|
||||
// Reset states when language changes
|
||||
useEffect(() => {
|
||||
setPendingRegenerations([]);
|
||||
setPausePointsModified(false);
|
||||
}, [selectedLanguage]);
|
||||
|
||||
// Sync pending regenerations from server edit state
|
||||
useEffect(() => {
|
||||
if (editState?.tts_regeneration_queue) {
|
||||
const queuedIndices = editState.tts_regeneration_queue
|
||||
.filter(item => item.status === 'pending')
|
||||
.map(item => item.cue_index);
|
||||
setPendingRegenerations(queuedIndices);
|
||||
}
|
||||
}, [editState?.tts_regeneration_queue]);
|
||||
|
||||
// Get video URL from downloads - support both original and accessible video
|
||||
const videoUrl = typeof downloads?.downloads?.source_video === 'string'
|
||||
? downloads.downloads.source_video
|
||||
: '';
|
||||
|
||||
// Get accessible video URL for selected language
|
||||
const accessibleVideoUrl = editState?.accessible_video_url || (
|
||||
downloads?.downloads?.[selectedLanguage] &&
|
||||
typeof downloads.downloads[selectedLanguage] === 'object' &&
|
||||
(downloads.downloads[selectedLanguage] as { accessible_video_mp4?: string }).accessible_video_mp4
|
||||
) || '';
|
||||
|
||||
// Load VTT content when fetched
|
||||
useEffect(() => {
|
||||
if (vttContent) {
|
||||
|
|
@ -128,7 +197,7 @@ export function QCDetail() {
|
|||
data: {
|
||||
captions_vtt: captionsVtt,
|
||||
audio_description_vtt: adVtt,
|
||||
language: sourceLanguage // Use source language instead of hardcoded 'en'
|
||||
language: selectedLanguage // Use selected language for multi-language support
|
||||
}
|
||||
});
|
||||
setHasUnsavedChanges(false);
|
||||
|
|
@ -201,7 +270,7 @@ export function QCDetail() {
|
|||
await adjustTimingMutation.mutateAsync({
|
||||
id,
|
||||
offsetSeconds: timingOffset,
|
||||
language: sourceLanguage, // Use source language instead of hardcoded 'en'
|
||||
language: selectedLanguage, // Use selected language for multi-language support
|
||||
adjustCaptions,
|
||||
adjustAudioDescription,
|
||||
});
|
||||
|
|
@ -214,6 +283,99 @@ export function QCDetail() {
|
|||
}
|
||||
};
|
||||
|
||||
// Accessible video editing handlers
|
||||
const handleSegmentClick = (segment: VideoSegmentMetadata) => {
|
||||
// Seek video to segment start
|
||||
if (videoRef.current) {
|
||||
videoRef.current.currentTime = segment.start_ms / 1000;
|
||||
}
|
||||
// If it's an AD segment with a cue, highlight the cue in the VTT editor
|
||||
if (segment.is_freeze_frame && segment.cue_index !== null) {
|
||||
// Could dispatch an event or set state to highlight the cue
|
||||
console.log(`AD cue ${segment.cue_index} clicked`);
|
||||
}
|
||||
};
|
||||
|
||||
const handlePausePointClick = (pausePoint: PausePointData) => {
|
||||
// Seek video to pause point
|
||||
const effectiveMs = pausePoint.adjusted_ms ?? pausePoint.original_ms;
|
||||
if (videoRef.current) {
|
||||
videoRef.current.currentTime = effectiveMs / 1000;
|
||||
}
|
||||
};
|
||||
|
||||
const handlePausePointUpdate = async (cueIndex: number, adjustedMs: number) => {
|
||||
if (!id) return;
|
||||
|
||||
try {
|
||||
await updatePausePointMutation.mutateAsync({
|
||||
jobId: id,
|
||||
language: selectedLanguage,
|
||||
cueIndex,
|
||||
adjustedMs,
|
||||
});
|
||||
setPausePointsModified(true);
|
||||
toast.toastOnly.success(`Pause point ${cueIndex} updated to ${adjustedMs}ms`);
|
||||
} catch (error) {
|
||||
console.error('Failed to update pause point:', error);
|
||||
toast.toastOnly.error('Failed to update pause point. Please try again.');
|
||||
}
|
||||
};
|
||||
|
||||
const handleRegenerateTTS = async (cueIndex: number) => {
|
||||
if (!id) return;
|
||||
|
||||
try {
|
||||
await queueTTSRegenerationMutation.mutateAsync({
|
||||
jobId: id,
|
||||
language: selectedLanguage,
|
||||
cueIndices: [cueIndex],
|
||||
});
|
||||
setPendingRegenerations(prev => [...prev, cueIndex]);
|
||||
toast.toastOnly.success(`TTS regeneration queued for cue ${cueIndex}`);
|
||||
} catch (error) {
|
||||
console.error('Failed to queue TTS regeneration:', error);
|
||||
toast.toastOnly.error('Failed to queue TTS regeneration. Please try again.');
|
||||
}
|
||||
};
|
||||
|
||||
const handleClearRegenerationQueue = async () => {
|
||||
if (!id) return;
|
||||
|
||||
// Remove all pending regenerations one by one
|
||||
for (const cueIndex of pendingRegenerations) {
|
||||
try {
|
||||
await removeTTSRegenerationMutation.mutateAsync({
|
||||
jobId: id,
|
||||
language: selectedLanguage,
|
||||
cueIndex,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error(`Failed to remove cue ${cueIndex} from queue:`, error);
|
||||
}
|
||||
}
|
||||
setPendingRegenerations([]);
|
||||
toast.toastOnly.success('Regeneration queue cleared');
|
||||
};
|
||||
|
||||
const handleRender = async ({ whisperRefine }: { whisperRefine: boolean }) => {
|
||||
if (!id) return;
|
||||
|
||||
try {
|
||||
await rerenderMutation.mutateAsync({
|
||||
jobId: id,
|
||||
language: selectedLanguage,
|
||||
whisperRefine,
|
||||
});
|
||||
toast.toastOnly.success('Accessible video rendering started');
|
||||
setPendingRegenerations([]);
|
||||
setPausePointsModified(false);
|
||||
} catch (error) {
|
||||
console.error('Failed to start re-render:', error);
|
||||
toast.toastOnly.error('Failed to start re-render. Please try again.');
|
||||
}
|
||||
};
|
||||
|
||||
if (isLoading || vttLoading) {
|
||||
return (
|
||||
<div className="container mx-auto px-4 py-8">
|
||||
|
|
@ -275,6 +437,49 @@ export function QCDetail() {
|
|||
</div>
|
||||
)}
|
||||
|
||||
{/* Language Tabs - shown when multiple languages are available */}
|
||||
{availableLanguages.length > 1 && (
|
||||
<div className="mb-6">
|
||||
<div className="flex items-center gap-2 mb-2">
|
||||
<span className="text-sm font-medium text-gray-700">Language:</span>
|
||||
<span className="text-xs text-gray-500">({availableLanguages.length} translations available)</span>
|
||||
</div>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{availableLanguages.map((lang) => (
|
||||
<button
|
||||
key={lang}
|
||||
onClick={() => setSelectedLanguage(lang)}
|
||||
className={`px-4 py-2 text-sm font-medium rounded-lg transition-colors ${
|
||||
selectedLanguage === lang
|
||||
? 'bg-indigo-600 text-white'
|
||||
: 'bg-gray-100 text-gray-700 hover:bg-gray-200'
|
||||
}`}
|
||||
>
|
||||
{lang.toUpperCase()}
|
||||
{lang === sourceLanguage && (
|
||||
<span className="ml-1 text-xs opacity-75">(Source)</span>
|
||||
)}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Rendering Status Banner */}
|
||||
{isRendering && (
|
||||
<div className="mb-6 p-4 bg-purple-50 border border-purple-200 rounded-md">
|
||||
<div className="flex items-center gap-3">
|
||||
<svg className="animate-spin h-5 w-5 text-purple-600" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" fill="none" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
<span className="font-medium text-purple-900">
|
||||
Accessible video is being re-rendered with your changes...
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* View Mode Toggle and Shortcuts */}
|
||||
<div className="mb-6 flex items-center justify-between">
|
||||
<div className="flex items-center space-x-4">
|
||||
|
|
@ -322,21 +527,119 @@ export function QCDetail() {
|
|||
{(viewMode === 'side-by-side' || viewMode === 'video-only') && (
|
||||
<div className="mb-8">
|
||||
<div className="bg-white border border-gray-200 rounded-lg p-6">
|
||||
<h3 className="text-lg font-medium text-gray-900 mb-4">Video Preview</h3>
|
||||
{videoUrl ? (
|
||||
<VideoWithCaptions
|
||||
videoUrl={videoUrl}
|
||||
captionsVtt={captionsVtt}
|
||||
audioDescriptionVtt={adVtt}
|
||||
sourceLanguage={sourceLanguage}
|
||||
/>
|
||||
) : (
|
||||
<div className="text-center py-8 text-gray-500">
|
||||
<svg className="w-12 h-12 mx-auto mb-4 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
|
||||
</svg>
|
||||
<p>Video preview will be available once processing begins</p>
|
||||
</div>
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<h3 className="text-lg font-medium text-gray-900">Video Preview</h3>
|
||||
|
||||
{/* Video Mode Tabs - only show if accessible video is available */}
|
||||
{(accessibleVideoUrl || editState?.video_segments?.length) && (
|
||||
<div className="flex rounded-lg border border-gray-300 overflow-hidden">
|
||||
<button
|
||||
onClick={() => setVideoMode('original')}
|
||||
className={`px-4 py-1.5 text-sm font-medium transition-colors ${
|
||||
videoMode === 'original'
|
||||
? 'bg-blue-600 text-white'
|
||||
: 'bg-white text-gray-700 hover:bg-gray-50'
|
||||
}`}
|
||||
>
|
||||
Original Video
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setVideoMode('accessible')}
|
||||
className={`px-4 py-1.5 text-sm font-medium border-l border-gray-300 transition-colors ${
|
||||
videoMode === 'accessible'
|
||||
? 'bg-purple-600 text-white'
|
||||
: 'bg-white text-gray-700 hover:bg-gray-50'
|
||||
}`}
|
||||
>
|
||||
Accessible Video
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Original Video Mode */}
|
||||
{videoMode === 'original' && (
|
||||
<>
|
||||
{videoUrl ? (
|
||||
<VideoWithCaptions
|
||||
videoUrl={videoUrl}
|
||||
captionsVtt={captionsVtt}
|
||||
audioDescriptionVtt={adVtt}
|
||||
sourceLanguage={selectedLanguage}
|
||||
/>
|
||||
) : (
|
||||
<div className="text-center py-8 text-gray-500">
|
||||
<svg className="w-12 h-12 mx-auto mb-4 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
|
||||
</svg>
|
||||
<p>Video preview will be available once processing begins</p>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Accessible Video Mode */}
|
||||
{videoMode === 'accessible' && (
|
||||
<>
|
||||
{accessibleVideoUrl ? (
|
||||
<div className="space-y-4">
|
||||
<video
|
||||
ref={videoRef}
|
||||
src={accessibleVideoUrl}
|
||||
controls
|
||||
className="w-full rounded-lg"
|
||||
onTimeUpdate={(e) => setCurrentVideoTimeMs(e.currentTarget.currentTime * 1000)}
|
||||
>
|
||||
Your browser does not support the video tag.
|
||||
</video>
|
||||
|
||||
{/* Timeline Preview */}
|
||||
{editState && editState.video_segments && editState.video_segments.length > 0 && (
|
||||
<div className="mt-4 p-4 bg-gray-50 rounded-lg">
|
||||
<h4 className="text-sm font-medium text-gray-700 mb-3">Timeline Preview</h4>
|
||||
<TimelinePreview
|
||||
segments={editState.video_segments}
|
||||
pausePoints={editState.pause_points || []}
|
||||
totalDurationMs={editState.total_duration_ms || 0}
|
||||
currentTimeMs={currentVideoTimeMs}
|
||||
onSegmentClick={handleSegmentClick}
|
||||
onPausePointClick={handlePausePointClick}
|
||||
onPausePointUpdate={handlePausePointUpdate}
|
||||
onRegenerateTTS={handleRegenerateTTS}
|
||||
regenerationQueue={pendingRegenerations}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Re-render Controls */}
|
||||
{editState && (
|
||||
<RerenderControls
|
||||
pendingRegenerations={pendingRegenerations}
|
||||
pausePointsModified={pausePointsModified}
|
||||
isRendering={isRendering}
|
||||
onRender={handleRender}
|
||||
onClearQueue={handleClearRegenerationQueue}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
) : editStateLoading ? (
|
||||
<div className="text-center py-8 text-gray-500">
|
||||
<svg className="animate-spin h-8 w-8 mx-auto mb-4 text-purple-600" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" fill="none" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
<p>Loading accessible video...</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-center py-8 text-gray-500">
|
||||
<svg className="w-12 h-12 mx-auto mb-4 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
|
||||
</svg>
|
||||
<p>Accessible video not available for this language yet.</p>
|
||||
<p className="text-sm mt-1">Translation and TTS synthesis may still be in progress.</p>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -349,16 +652,16 @@ export function QCDetail() {
|
|||
<VttEditor
|
||||
vttContent={captionsVtt}
|
||||
onChange={handleCaptionsChange}
|
||||
title="Closed Captions"
|
||||
title={`Closed Captions (${selectedLanguage.toUpperCase()})`}
|
||||
readOnly={isProcessing}
|
||||
/>
|
||||
)}
|
||||
|
||||
|
||||
{job.requested_outputs.audio_description_vtt && (
|
||||
<VttEditor
|
||||
vttContent={adVtt}
|
||||
onChange={handleAdChange}
|
||||
title="Audio Description"
|
||||
title={`Audio Description (${selectedLanguage.toUpperCase()})`}
|
||||
readOnly={isProcessing}
|
||||
/>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ export type JobStatus =
|
|||
| "tts_failed" // TTS synthesis failed after retries, requires reprocessing
|
||||
| "rendering_video" // Accessible video rendering in progress
|
||||
| "render_failed" // Accessible video rendering failed, requires reprocessing
|
||||
| "rendering_qc" // Re-rendering accessible video during QC review
|
||||
| "pending_final_review"
|
||||
| "completed";
|
||||
|
||||
|
|
@ -329,4 +330,55 @@ export interface ReviewNoteUpdateRequest {
|
|||
export interface ReviewNotesListResponse {
|
||||
notes: ReviewNote[];
|
||||
total: number;
|
||||
}
|
||||
|
||||
// Accessible Video QC Editing Types
|
||||
|
||||
export interface PausePointData {
|
||||
cue_index: number;
|
||||
original_ms: number;
|
||||
adjusted_ms: number | null;
|
||||
min_bound_ms: number;
|
||||
max_bound_ms: number;
|
||||
}
|
||||
|
||||
export interface VideoSegmentMetadata {
|
||||
segment_index: number;
|
||||
start_ms: number;
|
||||
end_ms: number;
|
||||
gcs_uri: string;
|
||||
duration_ms: number;
|
||||
is_freeze_frame: boolean;
|
||||
cue_index: number | null; // AD cue index for freeze frames
|
||||
}
|
||||
|
||||
export type TTSRegenerationStatus = "pending" | "processing" | "completed" | "failed";
|
||||
|
||||
export interface TTSRegenerationItem {
|
||||
cue_index: number;
|
||||
requested_at: string;
|
||||
new_text?: string;
|
||||
status: TTSRegenerationStatus;
|
||||
error_message?: string;
|
||||
}
|
||||
|
||||
export interface AccessibleVideoEditState {
|
||||
pause_points: PausePointData[];
|
||||
video_segments: VideoSegmentMetadata[];
|
||||
tts_regeneration_queue: TTSRegenerationItem[];
|
||||
last_render_at: string | null;
|
||||
total_duration_ms: number;
|
||||
accessible_video_url: string | null;
|
||||
}
|
||||
|
||||
export interface PausePointUpdateRequest {
|
||||
adjusted_ms: number;
|
||||
}
|
||||
|
||||
export interface TTSRegenerationQueueRequest {
|
||||
cue_indices: number[];
|
||||
}
|
||||
|
||||
export interface RerenderAccessibleVideoRequest {
|
||||
whisper_refine: boolean;
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue