chore: add one-off script to regenerate AD cue TTS with different voice
For replacing a single cue's voice (e.g., French Canadian → France French female) without re-running the full pipeline. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
030f1b67ee
commit
64a3fa2bef
1 changed files with 122 additions and 0 deletions
122
scripts/replace_cue_voice.py
Normal file
122
scripts/replace_cue_voice.py
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
One-off script: Generate a new TTS MP3 for a single AD cue.
|
||||
|
||||
Downloads the AD VTT from GCS, shows all cues, lets you pick one,
|
||||
synthesizes it with a different voice, and saves the MP3 locally.
|
||||
|
||||
Usage (from project root on the server):
|
||||
PYTHONPATH=backend python scripts/replace_cue_voice.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add backend to path so we can import app modules
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "backend"))
|
||||
|
||||
# Load .env before importing app modules that read settings at import time
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(os.path.join(os.path.dirname(__file__), "..", "backend", ".env"))
|
||||
|
||||
from app.core.config import settings
|
||||
from app.services.gemini_tts import GeminiTTSService
|
||||
from app.services.gcs import gcs_service
|
||||
from app.tasks.tts_synthesis import parse_ad_cues
|
||||
|
||||
|
||||
# Voices for quick reference
|
||||
FEMALE_VOICES = ["Kore", "Leda", "Aoede", "Callirrhoe", "Autonoe", "Erinome",
|
||||
"Laomedeia", "Achernar", "Despina", "Pulcherrima"]
|
||||
MALE_VOICES = ["Puck", "Charon", "Fenrir", "Orus", "Enceladus", "Iapetus"]
|
||||
|
||||
JOB_ID = "1798981-27-littmann-basics-of-auscultation-how-to-video-ms-st-fr-v2fv"
|
||||
LANGUAGE = "fr"
|
||||
DEFAULT_VOICE = "Leda"
|
||||
|
||||
|
||||
def format_ts(seconds: float) -> str:
|
||||
h = int(seconds // 3600)
|
||||
m = int((seconds % 3600) // 60)
|
||||
s = seconds % 60
|
||||
return f"{h:02d}:{m:02d}:{s:06.3f}"
|
||||
|
||||
|
||||
async def main():
|
||||
print("=" * 60)
|
||||
print(" Generate AD Cue MP3")
|
||||
print(f" Job: {JOB_ID}")
|
||||
print(f" Language: {LANGUAGE}")
|
||||
print("=" * 60)
|
||||
|
||||
# --- Download AD VTT ---
|
||||
print("\nDownloading AD VTT from GCS...")
|
||||
blob_path = f"{JOB_ID}/{LANGUAGE}/ad.vtt"
|
||||
blob = gcs_service.bucket.blob(blob_path)
|
||||
if not blob.exists():
|
||||
print(f"ERROR: Not found: gs://{settings.gcs_bucket}/{blob_path}")
|
||||
return
|
||||
vtt_content = blob.download_as_text()
|
||||
|
||||
# --- Show cues ---
|
||||
cues = parse_ad_cues(vtt_content)
|
||||
if not cues:
|
||||
print("No AD cues found.")
|
||||
return
|
||||
|
||||
print(f"\n{len(cues)} AD cues:\n")
|
||||
for i, cue in enumerate(cues):
|
||||
text_preview = cue["text"][:90] + ("..." if len(cue["text"]) > 90 else "")
|
||||
print(f" [{i:2d}] {format_ts(cue['start_time'])} → {format_ts(cue['end_time'])}")
|
||||
print(f" {text_preview}")
|
||||
|
||||
# --- Select cue ---
|
||||
print()
|
||||
cue_input = input("Cue index to regenerate: ").strip()
|
||||
try:
|
||||
cue_index = int(cue_input)
|
||||
selected = cues[cue_index]
|
||||
except (ValueError, IndexError):
|
||||
print("Invalid index. Exiting.")
|
||||
return
|
||||
|
||||
print(f"\nSelected: [{cue_index}] {selected['text']}")
|
||||
|
||||
# --- Voice ---
|
||||
print(f"\nFemale: {', '.join(FEMALE_VOICES)}")
|
||||
print(f"Male: {', '.join(MALE_VOICES)}")
|
||||
voice = input(f"Voice [{DEFAULT_VOICE}]: ").strip() or DEFAULT_VOICE
|
||||
|
||||
# --- Synthesize ---
|
||||
print(f"\nSynthesizing with voice={voice}, language={LANGUAGE}...")
|
||||
tts = GeminiTTSService()
|
||||
|
||||
text = selected["text"].strip()
|
||||
if not text.endswith((".", "!", "?")):
|
||||
text += "."
|
||||
|
||||
try:
|
||||
audio_bytes = await tts.synthesize_text(
|
||||
text=text,
|
||||
voice_name=voice,
|
||||
language=LANGUAGE,
|
||||
model="flash",
|
||||
speed=1.0,
|
||||
style_prompt="",
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"ERROR: TTS failed: {e}")
|
||||
return
|
||||
|
||||
# --- Save ---
|
||||
filename = f"cue_{cue_index}_{voice}_{LANGUAGE}.mp3"
|
||||
with open(filename, "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
|
||||
print(f"\nSaved: {filename} ({len(audio_bytes):,} bytes)")
|
||||
print("Done. Download this file and listen to verify.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Loading…
Add table
Reference in a new issue