From 6bf88474ee1aeb6475fbf7d8d7e2998a38872a0d Mon Sep 17 00:00:00 2001
From: Vadym Samoilenko <vadymsamoilenko@oliver.agency>
Date: Wed, 13 May 2026 18:41:32 +0100
Subject: [PATCH] feat(embed): switch embeddings to Vertex AI
 text-multilingual-embedding-002

Replace AI Studio gemini-embedding-001 with Vertex AI text-multilingual-embedding-002
via google-genai SDK (vertexai=True). Vertex AI uses ADC (already configured) and
has significantly higher per-project quotas than AI Studio per-user limits.
Same 768-dim output; multilingual model better suited for 50+ language glossaries.
Add gcp_location config field (default us-central1).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 backend/app/core/config.py                |  1 +
 backend/app/services/embedding_service.py | 24 ++++++++++++++---------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/backend/app/core/config.py b/backend/app/core/config.py
index 9f145e0..cea818f 100644
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -30,6 +30,7 @@ class Settings(BaseSettings):
 
     # GCP
     gcp_project_id: str
+    gcp_location: str = "us-central1"
     gcs_bucket: str = "accessible-video"
     google_application_credentials: str = ""
 
diff --git a/backend/app/services/embedding_service.py b/backend/app/services/embedding_service.py
index ef2b1db..c7b3cbe 100644
--- a/backend/app/services/embedding_service.py
+++ b/backend/app/services/embedding_service.py
@@ -1,9 +1,10 @@
 """
-Embedding service backed by Gemini text-embedding-004.
+Embedding service backed by Vertex AI text-multilingual-embedding-002.
 
-Provides batch embedding with retry/backoff for use in glossary ingestion.
-Batch size: 100 texts per API call (API limit is 2048 but we keep it conservative
-for memory and retry ergonomics with large glossaries).
+Uses the google-genai SDK in Vertex AI mode (Application Default Credentials)
+instead of AI Studio so we get higher per-project quotas and no per-user limits.
+
+Batch size: 100 texts per API call.
 """
 from __future__ import annotations
 
@@ -19,24 +20,29 @@ from ..core.logging import get_logger
 
 logger = get_logger(__name__)
 
-_EMBED_MODEL = "gemini-embedding-001"
+# Vertex AI multilingual model — 768-dim, 50+ languages, higher quota than AI Studio
+_EMBED_MODEL = "text-multilingual-embedding-002"
 _BATCH_SIZE = 100
 _MAX_RETRIES = 5
-_INITIAL_BACKOFF = 8.0
+_INITIAL_BACKOFF = 4.0
 
-# Matches the 'retryDelay': '7s' field in Gemini 429 error bodies
+# Matches the 'retryDelay': '7s' field in Gemini/Vertex 429 error bodies
 _RETRY_DELAY_RE = re.compile(r"'retryDelay':\s*'(\d+)s'")
 
 
 def _parse_retry_delay(exc: Exception) -> float | None:
-    """Extract the server-suggested retry delay from a Gemini 429 error."""
+    """Extract the server-suggested retry delay from a 429 error."""
     m = _RETRY_DELAY_RE.search(str(exc))
     return float(m.group(1)) if m else None
 
 
 class EmbeddingService:
     def __init__(self) -> None:
-        self._client = genai.Client(api_key=settings.gemini_api_key)
+        self._client = genai.Client(
+            vertexai=True,
+            project=settings.gcp_project_id,
+            location=settings.gcp_location,
+        )
 
     async def embed_texts(self, texts: Sequence[str]) -> list[list[float]]:
         """