Replace AI Studio gemini-embedding-001 with Vertex AI text-multilingual-embedding-002 via google-genai SDK (vertexai=True). Vertex AI uses ADC (already configured) and has significantly higher per-project quotas than AI Studio per-user limits. Same 768-dim output; multilingual model better suited for 50+ language glossaries. Add gcp_location config field (default us-central1). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
321 lines
13 KiB
Python
321 lines
13 KiB
Python
|
|
from pydantic_settings import BaseSettings
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
# App
|
|
app_env: str = "dev"
|
|
api_base_url: str = "http://localhost:8000"
|
|
app_url: str = "https://optical-dev.oliver.solutions/video-accessibility"
|
|
|
|
# Auth
|
|
jwt_secret: str
|
|
jwt_alg: str = "HS256"
|
|
jwt_access_ttl_min: int = 15
|
|
jwt_refresh_ttl_days: int = 7
|
|
cookie_domain: str = "localhost"
|
|
cookie_secure: bool = False
|
|
cookie_samesite: str = "Lax"
|
|
|
|
# Database
|
|
mongodb_uri: str
|
|
mongodb_db: str = "accessible_video"
|
|
|
|
# Redis
|
|
redis_url: str
|
|
|
|
# Celery
|
|
celery_broker_url: str = ""
|
|
celery_result_backend: str = ""
|
|
|
|
# GCP
|
|
gcp_project_id: str
|
|
gcp_location: str = "us-central1"
|
|
gcs_bucket: str = "accessible-video"
|
|
google_application_credentials: str = ""
|
|
|
|
# AI Services
|
|
gemini_api_key: str
|
|
elevenlabs_api_key: str = ""
|
|
google_tts_credentials: str = ""
|
|
|
|
# TTS Voice Configuration
|
|
tts_provider: str = "gemini" # "gemini", "google", or "elevenlabs"
|
|
google_tts_voices: dict[str, str] = {
|
|
"en-US": "en-US-Neural2-D",
|
|
"es-ES": "es-ES-Neural2-A",
|
|
"fr-FR": "fr-FR-Neural2-A",
|
|
"de-DE": "de-DE-Neural2-B"
|
|
}
|
|
# Deprecated: ElevenLabs voices are now fetched dynamically via the API.
|
|
# This fallback map is only used by _get_elevenlabs_voice() when no voice_name is provided.
|
|
elevenlabs_voices: dict[str, str] = {}
|
|
|
|
# Gemini TTS Configuration
|
|
gemini_tts_model: str = "gemini-3.1-flash-tts-preview"
|
|
gemini_tts_default_voice: str = "Kore"
|
|
gemini_tts_voices: list[str] = [
|
|
"Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Aoede",
|
|
"Callirrhoe", "Autonoe", "Enceladus", "Iapetus", "Umbriel", "Algieba",
|
|
"Despina", "Erinome", "Algenib", "Rasalgethi", "Laomedeia", "Achernar",
|
|
"Alnilam", "Schedar", "Gacrux", "Pulcherrima", "Achird", "Zubenelgenubi",
|
|
"Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat"
|
|
]
|
|
gemini_tts_languages: dict[str, str] = {
|
|
"en": "en-US",
|
|
"es": "es-US",
|
|
"fr": "fr-FR",
|
|
"de": "de-DE",
|
|
"it": "it-IT",
|
|
"pt": "pt-BR",
|
|
"ja": "ja-JP",
|
|
"ko": "ko-KR",
|
|
"ar": "ar-EG",
|
|
"hi": "hi-IN",
|
|
"id": "id-ID",
|
|
"nl": "nl-NL",
|
|
"pl": "pl-PL",
|
|
"ru": "ru-RU",
|
|
"th": "th-TH",
|
|
"tr": "tr-TR",
|
|
"vi": "vi-VN",
|
|
"ro": "ro-RO",
|
|
"uk": "uk-UA",
|
|
"bn": "bn-BD",
|
|
"mr": "mr-IN",
|
|
"ta": "ta-IN",
|
|
"te": "te-IN",
|
|
"zh": "zh-CN",
|
|
"cs": "cs-CZ",
|
|
"da": "da-DK",
|
|
"fi": "fi-FI",
|
|
"hu": "hu-HU",
|
|
"no": "nb-NO",
|
|
"sk": "sk-SK",
|
|
"sv": "sv-SE",
|
|
"es-419": "es-US",
|
|
"pt-BR": "pt-BR",
|
|
"fr-CA": "fr-CA",
|
|
# Explicit region variants (added for locale-aware glossary support)
|
|
"de-DE": "de-DE",
|
|
"en-US": "en-US",
|
|
"en-GB": "en-GB",
|
|
"en-CA": "en-CA",
|
|
"es-ES": "es-ES",
|
|
"es-MX": "es-US",
|
|
"fr-FR": "fr-FR",
|
|
"it-IT": "it-IT",
|
|
"ja-JP": "ja-JP",
|
|
"ko-KR": "ko-KR",
|
|
"nl-NL": "nl-NL",
|
|
"pl-PL": "pl-PL",
|
|
"cs-CZ": "cs-CZ",
|
|
"tr-TR": "tr-TR",
|
|
"id-ID": "id-ID",
|
|
"pt-PT": "pt-PT",
|
|
}
|
|
gemini_tts_language_names: dict[str, str] = {
|
|
"en": "English",
|
|
"es": "Spanish",
|
|
"fr": "French",
|
|
"de": "German",
|
|
"it": "Italian",
|
|
"pt": "Portuguese",
|
|
"ja": "Japanese",
|
|
"ko": "Korean",
|
|
"ar": "Arabic",
|
|
"hi": "Hindi",
|
|
"id": "Indonesian",
|
|
"nl": "Dutch",
|
|
"pl": "Polish",
|
|
"ru": "Russian",
|
|
"th": "Thai",
|
|
"tr": "Turkish",
|
|
"vi": "Vietnamese",
|
|
"ro": "Romanian",
|
|
"uk": "Ukrainian",
|
|
"bn": "Bengali",
|
|
"mr": "Marathi",
|
|
"ta": "Tamil",
|
|
"te": "Telugu",
|
|
"zh": "Chinese",
|
|
"cs": "Czech",
|
|
"da": "Danish",
|
|
"fi": "Finnish",
|
|
"hu": "Hungarian",
|
|
"no": "Norwegian",
|
|
"sk": "Slovak",
|
|
"sv": "Swedish",
|
|
"es-419": "Spanish (Latin America)",
|
|
"pt-BR": "Portuguese (Brazil)",
|
|
"fr-CA": "French (Canada)",
|
|
# Explicit region variants
|
|
"de-DE": "German (Germany)",
|
|
"en-US": "English (US)",
|
|
"en-GB": "English (UK)",
|
|
"en-CA": "English (Canada)",
|
|
"es-ES": "Spanish (Spain)",
|
|
"es-MX": "Spanish (Mexico)",
|
|
"fr-FR": "French (France)",
|
|
"it-IT": "Italian (Italy)",
|
|
"ja-JP": "Japanese (Japan)",
|
|
"ko-KR": "Korean (Korea)",
|
|
"nl-NL": "Dutch (Netherlands)",
|
|
"pl-PL": "Polish (Poland)",
|
|
"cs-CZ": "Czech (Czech Republic)",
|
|
"tr-TR": "Turkish (Turkey)",
|
|
"id-ID": "Indonesian (Indonesia)",
|
|
"pt-PT": "Portuguese (Portugal)",
|
|
}
|
|
gemini_tts_preview_samples: dict[str, str] = {
|
|
"en": "This is a preview of the audio description voice.",
|
|
"es": "Esta es una vista previa de la voz de audiodescripcion.",
|
|
"fr": "Ceci est un apercu de la voix de l'audiodescription.",
|
|
"de": "Dies ist eine Vorschau der Audiodeskriptionsstimme.",
|
|
"it": "Questa e un'anteprima della voce dell'audiodescrizione.",
|
|
"pt": "Esta e uma previa da voz da audiodescricao.",
|
|
"ja": "これは音声解説の声のプレビューです。",
|
|
"ko": "이것은 오디오 설명 음성의 미리보기입니다.",
|
|
"ar": "هذه معاينة لصوت الوصف الصوتي.",
|
|
"hi": "यह ऑडियो विवरण आवाज का पूर्वावलोकन है।",
|
|
"id": "Ini adalah pratinjau suara deskripsi audio.",
|
|
"nl": "Dit is een voorbeeld van de audiodescriptiestem.",
|
|
"pl": "To jest podglad glosu audiodeskrypcji.",
|
|
"ru": "Это предварительный просмотр голоса аудиоописания.",
|
|
"th": "นี่คือตัวอย่างเสียงบรรยายภาพ",
|
|
"tr": "Bu, sesli betimleme sesinin bir onizlemesidir.",
|
|
"vi": "Day la ban xem truoc giong mo ta am thanh.",
|
|
"ro": "Aceasta este o previzualizare a vocii descrierii audio.",
|
|
"uk": "Це попередній перегляд голосу аудіоопису.",
|
|
"bn": "এটি অডিও বর্ণনা ভয়েসের একটি প্রিভিউ।",
|
|
"mr": "हे ऑडिओ वर्णन आवाजाचे पूर्वावलोकन आहे.",
|
|
"ta": "இது ஆடியோ விளக்க குரலின் முன்னோட்டம்.",
|
|
"te": "ఇది ఆడియో వివరణ స్వరం యొక్క ప్రివ్యూ.",
|
|
"zh": "这是音频描述语音的预览。",
|
|
"cs": "Toto je náhled hlasu zvukového popisu.",
|
|
"da": "Dette er en forhåndsvisning af lydbeskrivelsesstemmen.",
|
|
"fi": "Tämä on äänikuvauksen äänen esikatselu.",
|
|
"hu": "Ez a hangos leírás hangjának előnézete.",
|
|
"no": "Dette er en forhåndsvisning av lydbeskrivelsesstemmen.",
|
|
"sk": "Toto je náhľad hlasu zvukového popisu.",
|
|
"sv": "Det här är en förhandsgranskning av ljudbeskrivningsrösten.",
|
|
"es-419": "Esta es una vista previa de la voz de audiodescripción.",
|
|
"pt-BR": "Esta é uma prévia da voz da audiodescrição.",
|
|
"fr-CA": "Ceci est un aperçu de la voix de l'audiodescription.",
|
|
# Explicit region variants
|
|
"de-DE": "Dies ist eine Vorschau der Audiodeskriptionsstimme.",
|
|
"en-US": "This is a preview of the audio description voice.",
|
|
"en-GB": "This is a preview of the audio description voice.",
|
|
"en-CA": "This is a preview of the audio description voice.",
|
|
"es-ES": "Esta es una vista previa de la voz de audiodescripción.",
|
|
"es-MX": "Esta es una vista previa de la voz de audiodescripción.",
|
|
"fr-FR": "Ceci est un aperçu de la voix de l'audiodescription.",
|
|
"it-IT": "Questa è un'anteprima della voce dell'audiodescrizione.",
|
|
"ja-JP": "これは音声解説の声のプレビューです。",
|
|
"ko-KR": "이것은 오디오 설명 음성의 미리보기입니다.",
|
|
"nl-NL": "Dit is een voorbeeld van de audiodescriptiestem.",
|
|
"pl-PL": "To jest podgląd głosu audiodeskrypcji.",
|
|
"cs-CZ": "Toto je náhled hlasu zvukového popisu.",
|
|
"tr-TR": "Bu, sesli betimleme sesinin bir önizlemesidir.",
|
|
"id-ID": "Ini adalah pratinjau suara deskripsi audio.",
|
|
"pt-PT": "Esta é uma pré-visualização da voz da audiodescrição.",
|
|
}
|
|
|
|
# Gemini TTS Model Options
|
|
gemini_tts_models: dict[str, str] = {
|
|
"flash": "gemini-3.1-flash-tts-preview", # Fast, cost-efficient (Preview)
|
|
"pro": "gemini-2.5-pro-tts", # Higher quality (GA)
|
|
}
|
|
|
|
# Gemini TTS Style Presets - prompts prepended to text for style control
|
|
gemini_tts_style_prompts: dict[str, str] = {
|
|
"neutral": "", # No modification
|
|
"calm": "Speak in a calm, gentle, and soothing manner with a relaxed pace. ",
|
|
"energetic": "Speak with energy and enthusiasm, maintaining an upbeat and dynamic tone. ",
|
|
"professional": "Speak in a clear, professional, and authoritative manner suitable for corporate content. ",
|
|
"warm": "Speak in a warm, friendly, and approachable manner as if speaking to a friend. ",
|
|
"documentary": "Speak in a measured, informative tone similar to a documentary narrator, with clear enunciation and appropriate pauses. ",
|
|
}
|
|
|
|
# TTS Speed range configuration
|
|
gemini_tts_speed_min: float = 0.5
|
|
gemini_tts_speed_max: float = 2.0
|
|
gemini_tts_speed_default: float = 1.0
|
|
gemini_tts_speed_step: float = 0.1
|
|
|
|
# Whisper Configuration (for pause point refinement)
|
|
whisper_model: str = "medium" # Options: tiny, base, small, medium, large-v3
|
|
whisper_max_search_window: float = 30.0 # Max seconds to search for speech gap after Gemini point
|
|
whisper_sentence_gap_threshold: float = 0.5 # Gap duration to classify as sentence boundary
|
|
whisper_phrase_gap_threshold: float = 0.3 # Gap duration to classify as phrase boundary
|
|
whisper_min_gap_threshold: float = 0.15 # Minimum gap duration to consider
|
|
# Forward-preferred snap windows (A2)
|
|
whisper_snap_forward_window: float = 4.0 # Prefer boundary up to N seconds ahead of Gemini point
|
|
whisper_snap_backward_window: float = 1.5 # Fall back to boundary up to N seconds behind
|
|
# Adaptive silence buffer (A1)
|
|
ad_silence_buffer_default: float = 0.5 # Base silence duration (s) before/after AD audio
|
|
ad_silence_buffer_min_after: float = 0.1 # Minimum silence after AD audio
|
|
# Minimum gap required at the chosen pause point (A3)
|
|
ad_min_acceptable_gap: float = 0.2 # Seconds; points with shorter gaps trigger forward search
|
|
|
|
# Cloud Run Service URLs (empty = use local processing)
|
|
# When set, CPU-intensive work is offloaded to Cloud Run with autoscaling
|
|
whisper_service_url: str = "" # e.g., "https://whisper-service-xxx.run.app"
|
|
ffmpeg_service_url: str = "" # e.g., "https://ffmpeg-service-xxx.run.app"
|
|
|
|
# Celery Worker Concurrency Settings
|
|
# When using Cloud Run, workers just make HTTP calls so can handle more concurrent tasks
|
|
# When running locally, concurrency is limited by CPU/RAM constraints
|
|
#
|
|
# Recommended settings:
|
|
# Cloud Run mode: WHISPER_WORKER_CONCURRENCY=10, FFMPEG_WORKER_CONCURRENCY=20
|
|
# Local mode: WHISPER_WORKER_CONCURRENCY=1, FFMPEG_WORKER_CONCURRENCY=2
|
|
worker_concurrency: int = 8 # Main worker (default,ingest,notify,render)
|
|
whisper_worker_concurrency: int = 1 # Whisper worker (default: 1 for local RAM constraints)
|
|
ffmpeg_worker_concurrency: int = 4 # FFmpeg tasks on main worker
|
|
tts_worker_concurrency: int = 8 # TTS worker
|
|
|
|
# Email (Mailgun)
|
|
mailgun_api_key: str = ""
|
|
mailgun_domain: str = "mg.oliver.solutions"
|
|
mailgun_from: str = "noreply@mg.oliver.solutions"
|
|
email_from: str = "noreply@mg.oliver.solutions"
|
|
client_base_url: str
|
|
|
|
# Microsoft Authentication (Azure AD)
|
|
azure_client_id: str = ""
|
|
azure_authority: str = ""
|
|
azure_redirect_uri: str = ""
|
|
|
|
# Observability
|
|
sentry_dsn: str = ""
|
|
otel_exporter_otlp_endpoint: str = ""
|
|
|
|
# AI Cost Tracker (direct HTTP — see services/cost_tracker.py)
|
|
cost_tracker_base_url: str = ""
|
|
cost_tracker_api_key: str = ""
|
|
cost_tracker_source_app: str = "video-accessibility"
|
|
cost_tracker_enabled: bool = True
|
|
|
|
# Upload limits (T-14 — single source of truth)
|
|
upload_max_video_bytes: int = 2 * 1024 * 1024 * 1024 # 2GB
|
|
upload_signed_url_ttl_hours: int = 24 # signed URL lifetime
|
|
|
|
# CORS - comma-separated list of allowed origins
|
|
cors_origins: str = "http://localhost:5173,http://localhost:5174,http://localhost:3000,http://localhost:6001"
|
|
|
|
@property
|
|
def cors_origins_list(self) -> list[str]:
|
|
"""Parse CORS origins from comma-separated string to list."""
|
|
return [origin.strip() for origin in self.cors_origins.split(",") if origin.strip()]
|
|
|
|
class Config:
|
|
env_file = ".env"
|
|
|
|
|
|
settings = Settings()
|
|
|
|
|
|
def get_settings():
|
|
"""Get settings instance - for dependency injection"""
|
|
return settings
|