chore: add one-off script to regenerate AD cue TTS with different voice

For replacing a single cue's voice (e.g., French Canadian → France French female) without re-running the full pipeline. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 10:58:18 +00:00 · 2026-03-03 10:58:18 +00:00 · 64a3fa2bef
commit 64a3fa2bef
parent 030f1b67ee
1 changed files with 122 additions and 0 deletions
--- a/scripts/replace_cue_voice.py
+++ b/scripts/replace_cue_voice.py
@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+"""
+One-off script: Generate a new TTS MP3 for a single AD cue.
+
+Downloads the AD VTT from GCS, shows all cues, lets you pick one,
+synthesizes it with a different voice, and saves the MP3 locally.
+
+Usage (from project root on the server):
+    PYTHONPATH=backend python scripts/replace_cue_voice.py
+"""
+
+import asyncio
+import os
+import sys
+
+# Add backend to path so we can import app modules
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "backend"))
+
+# Load .env before importing app modules that read settings at import time
+from dotenv import load_dotenv
+load_dotenv(os.path.join(os.path.dirname(__file__), "..", "backend", ".env"))
+
+from app.core.config import settings
+from app.services.gemini_tts import GeminiTTSService
+from app.services.gcs import gcs_service
+from app.tasks.tts_synthesis import parse_ad_cues
+
+
+# Voices for quick reference
+FEMALE_VOICES = ["Kore", "Leda", "Aoede", "Callirrhoe", "Autonoe", "Erinome",
+                 "Laomedeia", "Achernar", "Despina", "Pulcherrima"]
+MALE_VOICES = ["Puck", "Charon", "Fenrir", "Orus", "Enceladus", "Iapetus"]
+
+JOB_ID = "1798981-27-littmann-basics-of-auscultation-how-to-video-ms-st-fr-v2fv"
+LANGUAGE = "fr"
+DEFAULT_VOICE = "Leda"
+
+
+def format_ts(seconds: float) -> str:
+    h = int(seconds // 3600)
+    m = int((seconds % 3600) // 60)
+    s = seconds % 60
+    return f"{h:02d}:{m:02d}:{s:06.3f}"
+
+
+async def main():
+    print("=" * 60)
+    print("  Generate AD Cue MP3")
+    print(f"  Job: {JOB_ID}")
+    print(f"  Language: {LANGUAGE}")
+    print("=" * 60)
+
+    # --- Download AD VTT ---
+    print("\nDownloading AD VTT from GCS...")
+    blob_path = f"{JOB_ID}/{LANGUAGE}/ad.vtt"
+    blob = gcs_service.bucket.blob(blob_path)
+    if not blob.exists():
+        print(f"ERROR: Not found: gs://{settings.gcs_bucket}/{blob_path}")
+        return
+    vtt_content = blob.download_as_text()
+
+    # --- Show cues ---
+    cues = parse_ad_cues(vtt_content)
+    if not cues:
+        print("No AD cues found.")
+        return
+
+    print(f"\n{len(cues)} AD cues:\n")
+    for i, cue in enumerate(cues):
+        text_preview = cue["text"][:90] + ("..." if len(cue["text"]) > 90 else "")
+        print(f"  [{i:2d}] {format_ts(cue['start_time'])} → {format_ts(cue['end_time'])}")
+        print(f"       {text_preview}")
+
+    # --- Select cue ---
+    print()
+    cue_input = input("Cue index to regenerate: ").strip()
+    try:
+        cue_index = int(cue_input)
+        selected = cues[cue_index]
+    except (ValueError, IndexError):
+        print("Invalid index. Exiting.")
+        return
+
+    print(f"\nSelected: [{cue_index}] {selected['text']}")
+
+    # --- Voice ---
+    print(f"\nFemale: {', '.join(FEMALE_VOICES)}")
+    print(f"Male:   {', '.join(MALE_VOICES)}")
+    voice = input(f"Voice [{DEFAULT_VOICE}]: ").strip() or DEFAULT_VOICE
+
+    # --- Synthesize ---
+    print(f"\nSynthesizing with voice={voice}, language={LANGUAGE}...")
+    tts = GeminiTTSService()
+
+    text = selected["text"].strip()
+    if not text.endswith((".", "!", "?")):
+        text += "."
+
+    try:
+        audio_bytes = await tts.synthesize_text(
+            text=text,
+            voice_name=voice,
+            language=LANGUAGE,
+            model="flash",
+            speed=1.0,
+            style_prompt="",
+        )
+    except Exception as e:
+        print(f"ERROR: TTS failed: {e}")
+        return
+
+    # --- Save ---
+    filename = f"cue_{cue_index}_{voice}_{LANGUAGE}.mp3"
+    with open(filename, "wb") as f:
+        f.write(audio_bytes)
+
+    print(f"\nSaved: {filename} ({len(audio_bytes):,} bytes)")
+    print("Done. Download this file and listen to verify.")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())