- Upload form now has "English / Different language" radio with optional language hint
- Gemini auto-detects language and saves outputs to outputs.{detected_language}
- QC review dynamically loads/saves VTT for source language
- New APPROVED_SOURCE status for non-English videos (APPROVED_ENGLISH kept for backwards compat)
- Translation pipeline reads from source language and passes source_language to Google Translate
- All existing English jobs continue to work unchanged
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
117 lines
3.9 KiB
Python
117 lines
3.9 KiB
Python
|
|
from google.cloud import translate_v2 as translate
|
|
|
|
from ..core.config import settings
|
|
from ..core.logging import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
class TranslateService:
|
|
def __init__(self):
|
|
if settings.translate_api_key:
|
|
self.client = translate.Client()
|
|
else:
|
|
logger.warning("Google Translate API key not configured")
|
|
self.client = None
|
|
|
|
async def translate_vtt(
|
|
self, vtt_content: str, target_language: str, source_language: str = "en"
|
|
) -> str:
|
|
"""
|
|
Translate VTT content while preserving timing and structure.
|
|
|
|
Args:
|
|
vtt_content: The VTT file content to translate
|
|
target_language: The language code to translate to (e.g., 'es', 'fr')
|
|
source_language: The source language code (default: 'en')
|
|
"""
|
|
if not self.client:
|
|
raise ValueError("Google Translate not configured")
|
|
|
|
# Parse VTT to extract cues
|
|
cues = self._parse_vtt_cues(vtt_content)
|
|
|
|
# Extract text for translation
|
|
texts_to_translate = [cue["text"] for cue in cues]
|
|
|
|
if not texts_to_translate:
|
|
return vtt_content
|
|
|
|
try:
|
|
# Translate all texts in batch
|
|
results = self.client.translate(
|
|
texts_to_translate,
|
|
target_language=target_language,
|
|
source_language=source_language # Use parameter instead of hardcoded "en"
|
|
)
|
|
|
|
# Rebuild VTT with translated text
|
|
translated_cues = []
|
|
for i, cue in enumerate(cues):
|
|
translated_text = results[i]["translatedText"] if isinstance(results, list) else results["translatedText"]
|
|
translated_cues.append({
|
|
"start": cue["start"],
|
|
"end": cue["end"],
|
|
"text": translated_text
|
|
})
|
|
|
|
return self._build_vtt(translated_cues)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Translation failed: {e}")
|
|
raise
|
|
|
|
def _parse_vtt_cues(self, vtt_content: str) -> list[dict[str, str]]:
|
|
"""Parse VTT content and extract timing and text cues"""
|
|
lines = vtt_content.strip().split('\n')
|
|
cues = []
|
|
current_cue = {}
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
|
|
# Skip WEBVTT header and empty lines
|
|
if line == "WEBVTT" or line == "" or line.startswith("NOTE"):
|
|
continue
|
|
|
|
# Check if line contains timing
|
|
if " --> " in line:
|
|
timing_parts = line.split(" --> ")
|
|
current_cue = {
|
|
"start": timing_parts[0].strip(),
|
|
"end": timing_parts[1].strip(),
|
|
"text": ""
|
|
}
|
|
elif current_cue and line:
|
|
# This is subtitle text
|
|
if current_cue.get("text"):
|
|
current_cue["text"] += " " + line
|
|
else:
|
|
current_cue["text"] = line
|
|
|
|
# If next line is empty or timing, cue is complete
|
|
# For simplicity, we'll add the cue here and handle multi-line in a more robust way
|
|
if current_cue["text"]:
|
|
cues.append(current_cue.copy())
|
|
current_cue = {}
|
|
|
|
# Add final cue if exists
|
|
if current_cue and current_cue.get("text"):
|
|
cues.append(current_cue)
|
|
|
|
return cues
|
|
|
|
def _build_vtt(self, cues: list[dict[str, str]]) -> str:
|
|
"""Build VTT content from cues"""
|
|
vtt_lines = ["WEBVTT", ""]
|
|
|
|
for cue in cues:
|
|
vtt_lines.append(f"{cue['start']} --> {cue['end']}")
|
|
vtt_lines.append(cue["text"])
|
|
vtt_lines.append("") # Empty line between cues
|
|
|
|
return "\n".join(vtt_lines)
|
|
|
|
|
|
# Global service instance
|
|
translate_service = TranslateService()
|