From 6bf88474ee1aeb6475fbf7d8d7e2998a38872a0d Mon Sep 17 00:00:00 2001 From: Vadym Samoilenko Date: Wed, 13 May 2026 18:41:32 +0100 Subject: [PATCH] feat(embed): switch embeddings to Vertex AI text-multilingual-embedding-002 Replace AI Studio gemini-embedding-001 with Vertex AI text-multilingual-embedding-002 via google-genai SDK (vertexai=True). Vertex AI uses ADC (already configured) and has significantly higher per-project quotas than AI Studio per-user limits. Same 768-dim output; multilingual model better suited for 50+ language glossaries. Add gcp_location config field (default us-central1). Co-Authored-By: Claude Sonnet 4.6 --- backend/app/core/config.py | 1 + backend/app/services/embedding_service.py | 24 ++++++++++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 9f145e0..cea818f 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -30,6 +30,7 @@ class Settings(BaseSettings): # GCP gcp_project_id: str + gcp_location: str = "us-central1" gcs_bucket: str = "accessible-video" google_application_credentials: str = "" diff --git a/backend/app/services/embedding_service.py b/backend/app/services/embedding_service.py index ef2b1db..c7b3cbe 100644 --- a/backend/app/services/embedding_service.py +++ b/backend/app/services/embedding_service.py @@ -1,9 +1,10 @@ """ -Embedding service backed by Gemini text-embedding-004. +Embedding service backed by Vertex AI text-multilingual-embedding-002. -Provides batch embedding with retry/backoff for use in glossary ingestion. -Batch size: 100 texts per API call (API limit is 2048 but we keep it conservative -for memory and retry ergonomics with large glossaries). +Uses the google-genai SDK in Vertex AI mode (Application Default Credentials) +instead of AI Studio so we get higher per-project quotas and no per-user limits. + +Batch size: 100 texts per API call. """ from __future__ import annotations @@ -19,24 +20,29 @@ from ..core.logging import get_logger logger = get_logger(__name__) -_EMBED_MODEL = "gemini-embedding-001" +# Vertex AI multilingual model — 768-dim, 50+ languages, higher quota than AI Studio +_EMBED_MODEL = "text-multilingual-embedding-002" _BATCH_SIZE = 100 _MAX_RETRIES = 5 -_INITIAL_BACKOFF = 8.0 +_INITIAL_BACKOFF = 4.0 -# Matches the 'retryDelay': '7s' field in Gemini 429 error bodies +# Matches the 'retryDelay': '7s' field in Gemini/Vertex 429 error bodies _RETRY_DELAY_RE = re.compile(r"'retryDelay':\s*'(\d+)s'") def _parse_retry_delay(exc: Exception) -> float | None: - """Extract the server-suggested retry delay from a Gemini 429 error.""" + """Extract the server-suggested retry delay from a 429 error.""" m = _RETRY_DELAY_RE.search(str(exc)) return float(m.group(1)) if m else None class EmbeddingService: def __init__(self) -> None: - self._client = genai.Client(api_key=settings.gemini_api_key) + self._client = genai.Client( + vertexai=True, + project=settings.gcp_project_id, + location=settings.gcp_location, + ) async def embed_texts(self, texts: Sequence[str]) -> list[list[float]]: """