fix(tts): respect API retryDelay on 429 instead of short exponential backoff

Gemini TTS allows 10 RPM; with concurrency=8 the rate limit is hit quickly.
The previous backoff (1-3s) was far too short — the API returns retryDelay ~37s.
Both synthesize_cue_task (Celery retry countdown) and GeminiTTSService
(_synthesize_cue_with_retry sleep) now parse the retryDelay from the 429
error message and use it (+ 5s buffer) instead of the exponential guess.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-05-08 17:04:45 +01:00
parent 39a9d62b06
commit 95dbed03bd
2 changed files with 35 additions and 4 deletions

View file

@ -1,4 +1,5 @@
import io
import re
from google import genai
from google.genai import types
@ -30,6 +31,18 @@ class GeminiTTSService:
self.default_voice = settings.gemini_tts_default_voice
logger.info(f"Gemini TTS service initialized with model: {self.model}")
@staticmethod
def _extract_retry_after(error: Exception) -> float | None:
"""Return seconds to wait from a Google API 429 retryDelay, or None."""
msg = str(error)
m = re.search(r"retry in ([0-9.]+)s", msg, re.IGNORECASE)
if m:
return float(m.group(1)) + 5
m = re.search(r"'retryDelay':\s*'([0-9.]+)s'", msg)
if m:
return float(m.group(1)) + 5
return None
async def synthesize_text(
self,
text: str,
@ -234,8 +247,9 @@ class GeminiTTSService:
api_response_info = str(e)
if attempt < max_attempts - 1:
# Exponential backoff with jitter
delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
# Honour API-provided retry delay on 429; fall back to exponential backoff
api_delay = self._extract_retry_after(e)
delay = api_delay if api_delay else base_delay * (2 ** attempt) + random.uniform(0, 1)
logger.warning(
f"TTS synthesis attempt {attempt + 1}/{max_attempts} failed for cue {cue_index}. "
f"Retrying in {delay:.2f}s. Error: {e}"

View file

@ -8,6 +8,7 @@ in parallel using a dedicated TTS worker with concurrency=8.
import asyncio
import hashlib
import io
import re
import time
from celery import group
@ -23,6 +24,21 @@ from . import celery_app
logger = get_logger(__name__)
def _extract_retry_after(error: Exception) -> float | None:
"""Return seconds to wait from a Google API 429 retryDelay, or None."""
msg = str(error)
# "Please retry in 37.65s" pattern from the message text
m = re.search(r"retry in ([0-9.]+)s", msg, re.IGNORECASE)
if m:
return float(m.group(1)) + 5
# 'retryDelay': '37s' pattern in the JSON body
m = re.search(r"'retryDelay':\s*'([0-9.]+)s'", msg)
if m:
return float(m.group(1)) + 5
return None
_TTS_PROVIDER_MODEL_MAP = {
# (provider, model) → cost-tracker provider + model strings
"gemini": "google",
@ -169,9 +185,10 @@ def synthesize_cue_task(
# Check if we have retries left
if self.request.retries < self.max_retries:
# Calculate backoff delay with jitter
import random
delay = (2 ** self.request.retries) + random.uniform(0, 1)
# Honour the API-provided retry delay on 429; fall back to exponential backoff
api_delay = _extract_retry_after(e)
delay = api_delay if api_delay else (2 ** self.request.retries) + random.uniform(0, 1)
logger.info(
f"Retrying TTS cue {cue_index} in {delay:.1f}s "
f"(attempt {self.request.retries + 2}/{self.max_retries + 1})"