diff --git a/backend/app/services/embedding_service.py b/backend/app/services/embedding_service.py index 8556945..ef2b1db 100644 --- a/backend/app/services/embedding_service.py +++ b/backend/app/services/embedding_service.py @@ -8,6 +8,7 @@ for memory and retry ergonomics with large glossaries). from __future__ import annotations import asyncio +import re from collections.abc import Sequence from google import genai @@ -20,8 +21,17 @@ logger = get_logger(__name__) _EMBED_MODEL = "gemini-embedding-001" _BATCH_SIZE = 100 -_MAX_RETRIES = 3 -_INITIAL_BACKOFF = 2.0 +_MAX_RETRIES = 5 +_INITIAL_BACKOFF = 8.0 + +# Matches the 'retryDelay': '7s' field in Gemini 429 error bodies +_RETRY_DELAY_RE = re.compile(r"'retryDelay':\s*'(\d+)s'") + + +def _parse_retry_delay(exc: Exception) -> float | None: + """Extract the server-suggested retry delay from a Gemini 429 error.""" + m = _RETRY_DELAY_RE.search(str(exc)) + return float(m.group(1)) if m else None class EmbeddingService: @@ -62,8 +72,12 @@ class EmbeddingService: if attempt == _MAX_RETRIES: logger.error(f"Embedding batch failed after {_MAX_RETRIES} attempts: {exc}") raise - logger.warning(f"Embedding attempt {attempt} failed, retrying in {backoff}s: {exc}") - await asyncio.sleep(backoff) + # Honour the server-suggested retryDelay when present (e.g. 429 RESOURCE_EXHAUSTED). + # Fall back to our own exponential backoff otherwise. + server_delay = _parse_retry_delay(exc) + delay = max(server_delay + 1.0, backoff) if server_delay else backoff + logger.warning(f"Embedding attempt {attempt} failed, retrying in {delay}s: {exc}") + await asyncio.sleep(delay) backoff *= 2 raise RuntimeError("unreachable") # makes type-checker happy diff --git a/backend/app/tasks/embed_glossary.py b/backend/app/tasks/embed_glossary.py index ba9941f..a2e49d0 100644 --- a/backend/app/tasks/embed_glossary.py +++ b/backend/app/tasks/embed_glossary.py @@ -20,7 +20,7 @@ from . import celery_app logger = get_logger(__name__) _BATCH_SIZE = 250 -_CONCURRENCY = 5 +_CONCURRENCY = 2 @celery_app.task(name="embed_glossary_version", bind=True, max_retries=3)