From 95dbed03bd7bf3563eb7e5faac226c2750bb0874 Mon Sep 17 00:00:00 2001 From: Vadym Samoilenko Date: Fri, 8 May 2026 17:04:45 +0100 Subject: [PATCH] fix(tts): respect API retryDelay on 429 instead of short exponential backoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gemini TTS allows 10 RPM; with concurrency=8 the rate limit is hit quickly. The previous backoff (1-3s) was far too short — the API returns retryDelay ~37s. Both synthesize_cue_task (Celery retry countdown) and GeminiTTSService (_synthesize_cue_with_retry sleep) now parse the retryDelay from the 429 error message and use it (+ 5s buffer) instead of the exponential guess. Co-Authored-By: Claude Sonnet 4.6 --- backend/app/services/gemini_tts.py | 18 ++++++++++++++++-- backend/app/tasks/tts_synthesis.py | 21 +++++++++++++++++++-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/backend/app/services/gemini_tts.py b/backend/app/services/gemini_tts.py index c30b98e..1cf3c93 100644 --- a/backend/app/services/gemini_tts.py +++ b/backend/app/services/gemini_tts.py @@ -1,4 +1,5 @@ import io +import re from google import genai from google.genai import types @@ -30,6 +31,18 @@ class GeminiTTSService: self.default_voice = settings.gemini_tts_default_voice logger.info(f"Gemini TTS service initialized with model: {self.model}") + @staticmethod + def _extract_retry_after(error: Exception) -> float | None: + """Return seconds to wait from a Google API 429 retryDelay, or None.""" + msg = str(error) + m = re.search(r"retry in ([0-9.]+)s", msg, re.IGNORECASE) + if m: + return float(m.group(1)) + 5 + m = re.search(r"'retryDelay':\s*'([0-9.]+)s'", msg) + if m: + return float(m.group(1)) + 5 + return None + async def synthesize_text( self, text: str, @@ -234,8 +247,9 @@ class GeminiTTSService: api_response_info = str(e) if attempt < max_attempts - 1: - # Exponential backoff with jitter - delay = base_delay * (2 ** attempt) + random.uniform(0, 1) + # Honour API-provided retry delay on 429; fall back to exponential backoff + api_delay = self._extract_retry_after(e) + delay = api_delay if api_delay else base_delay * (2 ** attempt) + random.uniform(0, 1) logger.warning( f"TTS synthesis attempt {attempt + 1}/{max_attempts} failed for cue {cue_index}. " f"Retrying in {delay:.2f}s. Error: {e}" diff --git a/backend/app/tasks/tts_synthesis.py b/backend/app/tasks/tts_synthesis.py index 0f97320..658b4fc 100644 --- a/backend/app/tasks/tts_synthesis.py +++ b/backend/app/tasks/tts_synthesis.py @@ -8,6 +8,7 @@ in parallel using a dedicated TTS worker with concurrency=8. import asyncio import hashlib import io +import re import time from celery import group @@ -23,6 +24,21 @@ from . import celery_app logger = get_logger(__name__) + +def _extract_retry_after(error: Exception) -> float | None: + """Return seconds to wait from a Google API 429 retryDelay, or None.""" + msg = str(error) + # "Please retry in 37.65s" pattern from the message text + m = re.search(r"retry in ([0-9.]+)s", msg, re.IGNORECASE) + if m: + return float(m.group(1)) + 5 + # 'retryDelay': '37s' pattern in the JSON body + m = re.search(r"'retryDelay':\s*'([0-9.]+)s'", msg) + if m: + return float(m.group(1)) + 5 + return None + + _TTS_PROVIDER_MODEL_MAP = { # (provider, model) → cost-tracker provider + model strings "gemini": "google", @@ -169,9 +185,10 @@ def synthesize_cue_task( # Check if we have retries left if self.request.retries < self.max_retries: - # Calculate backoff delay with jitter import random - delay = (2 ** self.request.retries) + random.uniform(0, 1) + # Honour the API-provided retry delay on 429; fall back to exponential backoff + api_delay = _extract_retry_after(e) + delay = api_delay if api_delay else (2 ** self.request.retries) + random.uniform(0, 1) logger.info( f"Retrying TTS cue {cue_index} in {delay:.1f}s " f"(attempt {self.request.retries + 2}/{self.max_retries + 1})"