feat(embed): switch embeddings to Vertex AI text-multilingual-embedding-002
Replace AI Studio gemini-embedding-001 with Vertex AI text-multilingual-embedding-002 via google-genai SDK (vertexai=True). Vertex AI uses ADC (already configured) and has significantly higher per-project quotas than AI Studio per-user limits. Same 768-dim output; multilingual model better suited for 50+ language glossaries. Add gcp_location config field (default us-central1). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
7a7b6c1c12
commit
6bf88474ee
2 changed files with 16 additions and 9 deletions
|
|
@ -30,6 +30,7 @@ class Settings(BaseSettings):
|
|||
|
||||
# GCP
|
||||
gcp_project_id: str
|
||||
gcp_location: str = "us-central1"
|
||||
gcs_bucket: str = "accessible-video"
|
||||
google_application_credentials: str = ""
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
"""
|
||||
Embedding service backed by Gemini text-embedding-004.
|
||||
Embedding service backed by Vertex AI text-multilingual-embedding-002.
|
||||
|
||||
Provides batch embedding with retry/backoff for use in glossary ingestion.
|
||||
Batch size: 100 texts per API call (API limit is 2048 but we keep it conservative
|
||||
for memory and retry ergonomics with large glossaries).
|
||||
Uses the google-genai SDK in Vertex AI mode (Application Default Credentials)
|
||||
instead of AI Studio so we get higher per-project quotas and no per-user limits.
|
||||
|
||||
Batch size: 100 texts per API call.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
@ -19,24 +20,29 @@ from ..core.logging import get_logger
|
|||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_EMBED_MODEL = "gemini-embedding-001"
|
||||
# Vertex AI multilingual model — 768-dim, 50+ languages, higher quota than AI Studio
|
||||
_EMBED_MODEL = "text-multilingual-embedding-002"
|
||||
_BATCH_SIZE = 100
|
||||
_MAX_RETRIES = 5
|
||||
_INITIAL_BACKOFF = 8.0
|
||||
_INITIAL_BACKOFF = 4.0
|
||||
|
||||
# Matches the 'retryDelay': '7s' field in Gemini 429 error bodies
|
||||
# Matches the 'retryDelay': '7s' field in Gemini/Vertex 429 error bodies
|
||||
_RETRY_DELAY_RE = re.compile(r"'retryDelay':\s*'(\d+)s'")
|
||||
|
||||
|
||||
def _parse_retry_delay(exc: Exception) -> float | None:
|
||||
"""Extract the server-suggested retry delay from a Gemini 429 error."""
|
||||
"""Extract the server-suggested retry delay from a 429 error."""
|
||||
m = _RETRY_DELAY_RE.search(str(exc))
|
||||
return float(m.group(1)) if m else None
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
def __init__(self) -> None:
|
||||
self._client = genai.Client(api_key=settings.gemini_api_key)
|
||||
self._client = genai.Client(
|
||||
vertexai=True,
|
||||
project=settings.gcp_project_id,
|
||||
location=settings.gcp_location,
|
||||
)
|
||||
|
||||
async def embed_texts(self, texts: Sequence[str]) -> list[list[float]]:
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue