fix(tts): replace pydub MP3 export with lameenc (pure Python, no system ffmpeg)
Gemini TTS _pcm_to_mp3 used pydub.AudioSegment.export(format='mp3') which requires a system ffmpeg binary. Worker containers don't have ffmpeg installed (video ops run on Cloud Run). Switch to lameenc which is pure Python and encodes PCM→MP3 without any system binary. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
b0a90777ed
commit
a53cf960ae
2 changed files with 11 additions and 19 deletions
|
|
@ -1,5 +1,4 @@
|
|||
import io
|
||||
import wave
|
||||
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
|
@ -354,25 +353,17 @@ class GeminiTTSService:
|
|||
def _pcm_to_mp3(self, pcm_data: bytes) -> bytes:
|
||||
"""
|
||||
Convert raw PCM audio (24kHz, 16-bit, mono) to MP3.
|
||||
Gemini TTS outputs PCM at 24000 Hz sample rate.
|
||||
Uses lameenc (pure Python) — no system ffmpeg required.
|
||||
"""
|
||||
# Create WAV from PCM data
|
||||
wav_buffer = io.BytesIO()
|
||||
with wave.open(wav_buffer, "wb") as wf:
|
||||
wf.setnchannels(1) # Mono
|
||||
wf.setsampwidth(2) # 16-bit (2 bytes)
|
||||
wf.setframerate(24000) # 24kHz
|
||||
wf.writeframes(pcm_data)
|
||||
|
||||
# Convert WAV to MP3 using pydub
|
||||
wav_buffer.seek(0)
|
||||
audio_segment = AudioSegment.from_wav(wav_buffer)
|
||||
|
||||
# Export as MP3
|
||||
mp3_buffer = io.BytesIO()
|
||||
audio_segment.export(mp3_buffer, format="mp3", bitrate="128k")
|
||||
|
||||
return mp3_buffer.getvalue()
|
||||
import lameenc
|
||||
encoder = lameenc.Encoder()
|
||||
encoder.set_bit_rate(128)
|
||||
encoder.set_in_sample_rate(24000)
|
||||
encoder.set_channels(1)
|
||||
encoder.set_quality(2) # 2 = high quality
|
||||
mp3_data = encoder.encode(pcm_data)
|
||||
mp3_data += encoder.flush()
|
||||
return mp3_data
|
||||
|
||||
def _parse_ad_cues(self, vtt_content: str) -> list[dict]:
|
||||
"""Parse audio description VTT and extract timing + text"""
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ prometheus-client = "^0.19.0"
|
|||
sentry-sdk = {extras = ["fastapi"], version = "^1.38.0"}
|
||||
ffmpeg-python = "^0.2.0"
|
||||
pydub = "^0.25.1"
|
||||
lameenc = "^1.7.0"
|
||||
faster-whisper = "^1.2.0"
|
||||
python-magic = "^0.4.27"
|
||||
aiohttp = "^3.12.15"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue