video-accessibility/backend/app/schemas/whisper.py

"""Schemas for Whisper transcript caching."""

from pydantic import BaseModel, Field


class CachedWordTimestamp(BaseModel):
    """Word timestamp for MongoDB storage."""
    word: str
    start: float
    end: float


class CachedWhisperTranscript(BaseModel):
    """Cached Whisper transcript stored in job document."""
    words: list[CachedWordTimestamp] = Field(
        default_factory=list,
        description="Word-level timestamps from Whisper"
    )
    model_name: str = Field(..., description="Whisper model used")
    audio_duration: float = Field(..., description="Source audio duration in seconds")
    created_at: str = Field(..., description="ISO timestamp when transcript was created")