110 lines
3.5 KiB
Python
110 lines
3.5 KiB
Python
|
|
from google.cloud import translate_v2 as translate
|
|
|
|
from ..core.config import settings
|
|
from ..core.logging import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
class TranslateService:
|
|
def __init__(self):
|
|
if settings.translate_api_key:
|
|
self.client = translate.Client()
|
|
else:
|
|
logger.warning("Google Translate API key not configured")
|
|
self.client = None
|
|
|
|
async def translate_vtt(self, vtt_content: str, target_language: str) -> str:
|
|
"""
|
|
Translate VTT content while preserving timing and structure
|
|
"""
|
|
if not self.client:
|
|
raise ValueError("Google Translate not configured")
|
|
|
|
# Parse VTT to extract cues
|
|
cues = self._parse_vtt_cues(vtt_content)
|
|
|
|
# Extract text for translation
|
|
texts_to_translate = [cue["text"] for cue in cues]
|
|
|
|
if not texts_to_translate:
|
|
return vtt_content
|
|
|
|
try:
|
|
# Translate all texts in batch
|
|
results = self.client.translate(
|
|
texts_to_translate,
|
|
target_language=target_language,
|
|
source_language="en"
|
|
)
|
|
|
|
# Rebuild VTT with translated text
|
|
translated_cues = []
|
|
for i, cue in enumerate(cues):
|
|
translated_text = results[i]["translatedText"] if isinstance(results, list) else results["translatedText"]
|
|
translated_cues.append({
|
|
"start": cue["start"],
|
|
"end": cue["end"],
|
|
"text": translated_text
|
|
})
|
|
|
|
return self._build_vtt(translated_cues)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Translation failed: {e}")
|
|
raise
|
|
|
|
def _parse_vtt_cues(self, vtt_content: str) -> list[dict[str, str]]:
|
|
"""Parse VTT content and extract timing and text cues"""
|
|
lines = vtt_content.strip().split('\n')
|
|
cues = []
|
|
current_cue = {}
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
|
|
# Skip WEBVTT header and empty lines
|
|
if line == "WEBVTT" or line == "" or line.startswith("NOTE"):
|
|
continue
|
|
|
|
# Check if line contains timing
|
|
if " --> " in line:
|
|
timing_parts = line.split(" --> ")
|
|
current_cue = {
|
|
"start": timing_parts[0].strip(),
|
|
"end": timing_parts[1].strip(),
|
|
"text": ""
|
|
}
|
|
elif current_cue and line:
|
|
# This is subtitle text
|
|
if current_cue.get("text"):
|
|
current_cue["text"] += " " + line
|
|
else:
|
|
current_cue["text"] = line
|
|
|
|
# If next line is empty or timing, cue is complete
|
|
# For simplicity, we'll add the cue here and handle multi-line in a more robust way
|
|
if current_cue["text"]:
|
|
cues.append(current_cue.copy())
|
|
current_cue = {}
|
|
|
|
# Add final cue if exists
|
|
if current_cue and current_cue.get("text"):
|
|
cues.append(current_cue)
|
|
|
|
return cues
|
|
|
|
def _build_vtt(self, cues: list[dict[str, str]]) -> str:
|
|
"""Build VTT content from cues"""
|
|
vtt_lines = ["WEBVTT", ""]
|
|
|
|
for cue in cues:
|
|
vtt_lines.append(f"{cue['start']} --> {cue['end']}")
|
|
vtt_lines.append(cue["text"])
|
|
vtt_lines.append("") # Empty line between cues
|
|
|
|
return "\n".join(vtt_lines)
|
|
|
|
|
|
# Global service instance
|
|
translate_service = TranslateService()
|