fix(tts): respect API retryDelay on 429 instead of short exponential backoff
Gemini TTS allows 10 RPM; with concurrency=8 the rate limit is hit quickly. The previous backoff (1-3s) was far too short — the API returns retryDelay ~37s. Both synthesize_cue_task (Celery retry countdown) and GeminiTTSService (_synthesize_cue_with_retry sleep) now parse the retryDelay from the 429 error message and use it (+ 5s buffer) instead of the exponential guess. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
39a9d62b06
commit
95dbed03bd
2 changed files with 35 additions and 4 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import io
|
||||
import re
|
||||
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
|
@ -30,6 +31,18 @@ class GeminiTTSService:
|
|||
self.default_voice = settings.gemini_tts_default_voice
|
||||
logger.info(f"Gemini TTS service initialized with model: {self.model}")
|
||||
|
||||
@staticmethod
|
||||
def _extract_retry_after(error: Exception) -> float | None:
|
||||
"""Return seconds to wait from a Google API 429 retryDelay, or None."""
|
||||
msg = str(error)
|
||||
m = re.search(r"retry in ([0-9.]+)s", msg, re.IGNORECASE)
|
||||
if m:
|
||||
return float(m.group(1)) + 5
|
||||
m = re.search(r"'retryDelay':\s*'([0-9.]+)s'", msg)
|
||||
if m:
|
||||
return float(m.group(1)) + 5
|
||||
return None
|
||||
|
||||
async def synthesize_text(
|
||||
self,
|
||||
text: str,
|
||||
|
|
@ -234,8 +247,9 @@ class GeminiTTSService:
|
|||
api_response_info = str(e)
|
||||
|
||||
if attempt < max_attempts - 1:
|
||||
# Exponential backoff with jitter
|
||||
delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
|
||||
# Honour API-provided retry delay on 429; fall back to exponential backoff
|
||||
api_delay = self._extract_retry_after(e)
|
||||
delay = api_delay if api_delay else base_delay * (2 ** attempt) + random.uniform(0, 1)
|
||||
logger.warning(
|
||||
f"TTS synthesis attempt {attempt + 1}/{max_attempts} failed for cue {cue_index}. "
|
||||
f"Retrying in {delay:.2f}s. Error: {e}"
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ in parallel using a dedicated TTS worker with concurrency=8.
|
|||
import asyncio
|
||||
import hashlib
|
||||
import io
|
||||
import re
|
||||
import time
|
||||
|
||||
from celery import group
|
||||
|
|
@ -23,6 +24,21 @@ from . import celery_app
|
|||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _extract_retry_after(error: Exception) -> float | None:
|
||||
"""Return seconds to wait from a Google API 429 retryDelay, or None."""
|
||||
msg = str(error)
|
||||
# "Please retry in 37.65s" pattern from the message text
|
||||
m = re.search(r"retry in ([0-9.]+)s", msg, re.IGNORECASE)
|
||||
if m:
|
||||
return float(m.group(1)) + 5
|
||||
# 'retryDelay': '37s' pattern in the JSON body
|
||||
m = re.search(r"'retryDelay':\s*'([0-9.]+)s'", msg)
|
||||
if m:
|
||||
return float(m.group(1)) + 5
|
||||
return None
|
||||
|
||||
|
||||
_TTS_PROVIDER_MODEL_MAP = {
|
||||
# (provider, model) → cost-tracker provider + model strings
|
||||
"gemini": "google",
|
||||
|
|
@ -169,9 +185,10 @@ def synthesize_cue_task(
|
|||
|
||||
# Check if we have retries left
|
||||
if self.request.retries < self.max_retries:
|
||||
# Calculate backoff delay with jitter
|
||||
import random
|
||||
delay = (2 ** self.request.retries) + random.uniform(0, 1)
|
||||
# Honour the API-provided retry delay on 429; fall back to exponential backoff
|
||||
api_delay = _extract_retry_after(e)
|
||||
delay = api_delay if api_delay else (2 ** self.request.retries) + random.uniform(0, 1)
|
||||
logger.info(
|
||||
f"Retrying TTS cue {cue_index} in {delay:.1f}s "
|
||||
f"(attempt {self.request.retries + 2}/{self.max_retries + 1})"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue