fix(embed): respect Gemini 429 retryDelay and reduce concurrency
- Parse retryDelay from 429 error body and sleep for server_delay+1s instead of our own 2s/4s backoff (which was shorter than API requires) - Reduce embed concurrency 5→2 to halve burst when multiple glossary versions embed simultaneously - Increase max_retries 3→5 and initial backoff 2s→8s for headroom Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
ca312d48fa
commit
7a7b6c1c12
2 changed files with 19 additions and 5 deletions
|
|
@ -8,6 +8,7 @@ for memory and retry ergonomics with large glossaries).
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
|
||||
from google import genai
|
||||
|
|
@ -20,8 +21,17 @@ logger = get_logger(__name__)
|
|||
|
||||
_EMBED_MODEL = "gemini-embedding-001"
|
||||
_BATCH_SIZE = 100
|
||||
_MAX_RETRIES = 3
|
||||
_INITIAL_BACKOFF = 2.0
|
||||
_MAX_RETRIES = 5
|
||||
_INITIAL_BACKOFF = 8.0
|
||||
|
||||
# Matches the 'retryDelay': '7s' field in Gemini 429 error bodies
|
||||
_RETRY_DELAY_RE = re.compile(r"'retryDelay':\s*'(\d+)s'")
|
||||
|
||||
|
||||
def _parse_retry_delay(exc: Exception) -> float | None:
|
||||
"""Extract the server-suggested retry delay from a Gemini 429 error."""
|
||||
m = _RETRY_DELAY_RE.search(str(exc))
|
||||
return float(m.group(1)) if m else None
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
|
|
@ -62,8 +72,12 @@ class EmbeddingService:
|
|||
if attempt == _MAX_RETRIES:
|
||||
logger.error(f"Embedding batch failed after {_MAX_RETRIES} attempts: {exc}")
|
||||
raise
|
||||
logger.warning(f"Embedding attempt {attempt} failed, retrying in {backoff}s: {exc}")
|
||||
await asyncio.sleep(backoff)
|
||||
# Honour the server-suggested retryDelay when present (e.g. 429 RESOURCE_EXHAUSTED).
|
||||
# Fall back to our own exponential backoff otherwise.
|
||||
server_delay = _parse_retry_delay(exc)
|
||||
delay = max(server_delay + 1.0, backoff) if server_delay else backoff
|
||||
logger.warning(f"Embedding attempt {attempt} failed, retrying in {delay}s: {exc}")
|
||||
await asyncio.sleep(delay)
|
||||
backoff *= 2
|
||||
|
||||
raise RuntimeError("unreachable") # makes type-checker happy
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ from . import celery_app
|
|||
logger = get_logger(__name__)
|
||||
|
||||
_BATCH_SIZE = 250
|
||||
_CONCURRENCY = 5
|
||||
_CONCURRENCY = 2
|
||||
|
||||
|
||||
@celery_app.task(name="embed_glossary_version", bind=True, max_retries=3)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue