Backend:
- New UserRole.PROJECT_MANAGER with pm_client_ids[] on User model
- New models: Client (slug-based), Team (member_user_ids[]), Project (client-scoped)
- Job model gains project_id field
- New GET/POST/PATCH/DELETE /clients, /clients/{id}/teams, /clients/{id}/projects,
/clients/{id}/pm routes (admin-only client CRUD; PM or admin for teams/projects)
- get_accessible_project_ids() helper: staff→all, PM→their clients' projects,
CLIENT→projects from teams they belong to (with legacy owner fallback)
- list_jobs, get_job, bulk_download, get_vtt_content, delete_job all use new isolation
Frontend:
- UserRole type gains 'project_manager'
- Job, JobCreateRequest gain project_id field
- Client, Team, Project, PMUser types added
- ApiClient: full client/team/project/PM CRUD methods
- useClients hook with all query/mutation hooks
- Admin pages: ClientList + ClientDetail (teams, members, projects, PM assignment)
- NewJob form: client + project picker (shown when clients exist)
- Sidebar: Clients nav item for admin and project_manager roles
- Routes: /admin/clients and /admin/clients/:clientId behind RoleGate
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
347 lines
11 KiB
Python
347 lines
11 KiB
Python
import asyncio
|
|
import time
|
|
from typing import Literal, Optional
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
from fastapi.responses import Response
|
|
from pydantic import BaseModel, Field
|
|
|
|
from ...core.config import settings
|
|
from ...core.logging import get_logger
|
|
from ...services.gemini_tts import gemini_tts_service
|
|
from ...services.elevenlabs_voices import elevenlabs_voice_service
|
|
from ...services.tts import tts_service
|
|
from ...services import cost_tracker
|
|
from ...core.dependencies import get_current_user
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
router = APIRouter(prefix="/tts", tags=["tts"])
|
|
|
|
|
|
class VoicePreviewRequest(BaseModel):
|
|
"""Request to generate a voice preview"""
|
|
voice_name: str
|
|
language: str = "en"
|
|
provider: Literal["gemini", "elevenlabs"] = "gemini"
|
|
# Gemini-specific
|
|
model: Literal["flash", "pro"] = "flash"
|
|
speed: float = Field(default=1.0, ge=0.5, le=2.0)
|
|
style_preset: Literal[
|
|
"neutral", "calm", "energetic", "professional", "warm", "documentary", "custom"
|
|
] = "neutral"
|
|
custom_style_prompt: Optional[str] = None
|
|
# ElevenLabs-specific
|
|
stability: Optional[float] = Field(default=None, ge=0.0, le=1.0)
|
|
similarity_boost: Optional[float] = Field(default=None, ge=0.0, le=1.0)
|
|
|
|
|
|
class VoiceInfo(BaseModel):
|
|
"""Structured voice information for any provider."""
|
|
id: str
|
|
name: str
|
|
description: Optional[str] = None
|
|
preview_url: Optional[str] = None
|
|
labels: Optional[dict[str, str]] = None
|
|
category: Optional[str] = None
|
|
|
|
|
|
class ProviderVoicesResponse(BaseModel):
|
|
"""Available TTS voices for a specific provider."""
|
|
provider: str
|
|
voices: list[VoiceInfo]
|
|
default: str
|
|
available: bool = True
|
|
error: Optional[str] = None
|
|
|
|
|
|
class LanguagesResponse(BaseModel):
|
|
"""Supported TTS languages"""
|
|
languages: dict[str, str] # code -> display name
|
|
preview_samples: dict[str, str] # code -> sample text
|
|
|
|
|
|
class TTSOptionItem(BaseModel):
|
|
"""Single option with value and label"""
|
|
value: str
|
|
label: str
|
|
|
|
|
|
class SpeedRange(BaseModel):
|
|
"""Speed slider range configuration"""
|
|
min: float
|
|
max: float
|
|
default: float
|
|
step: float
|
|
|
|
|
|
class FloatRange(BaseModel):
|
|
"""Generic float range for sliders."""
|
|
min: float
|
|
max: float
|
|
default: float
|
|
step: float
|
|
|
|
|
|
class ProviderOptionsResponse(BaseModel):
|
|
"""Available TTS configuration options for a provider."""
|
|
provider: str
|
|
# Gemini-specific
|
|
models: Optional[list[TTSOptionItem]] = None
|
|
style_presets: Optional[list[TTSOptionItem]] = None
|
|
speed_range: Optional[SpeedRange] = None
|
|
# ElevenLabs-specific
|
|
stability_range: Optional[FloatRange] = None
|
|
similarity_boost_range: Optional[FloatRange] = None
|
|
|
|
|
|
@router.get("/voices", response_model=ProviderVoicesResponse)
|
|
async def list_voices(
|
|
provider: str = Query("gemini", description="TTS provider: gemini or elevenlabs"),
|
|
current_user=Depends(get_current_user),
|
|
) -> ProviderVoicesResponse:
|
|
"""
|
|
List available TTS voices for the specified provider.
|
|
"""
|
|
if provider == "elevenlabs":
|
|
if not tts_service.elevenlabs_available:
|
|
return ProviderVoicesResponse(
|
|
provider="elevenlabs",
|
|
voices=[],
|
|
default="",
|
|
available=False,
|
|
)
|
|
try:
|
|
el_voices = await elevenlabs_voice_service.get_voices()
|
|
except Exception as e:
|
|
logger.warning(f"ElevenLabs get_voices failed: {e}")
|
|
return ProviderVoicesResponse(
|
|
provider="elevenlabs",
|
|
voices=[],
|
|
default="",
|
|
available=False,
|
|
error=str(e),
|
|
)
|
|
voices = [
|
|
VoiceInfo(
|
|
id=v.voice_id,
|
|
name=v.name,
|
|
description=v.description or None,
|
|
preview_url=v.preview_url or None,
|
|
labels=v.labels or None,
|
|
category=v.category or None,
|
|
)
|
|
for v in el_voices
|
|
]
|
|
default_id = voices[0].id if voices else ""
|
|
return ProviderVoicesResponse(
|
|
provider="elevenlabs",
|
|
voices=voices,
|
|
default=default_id,
|
|
available=True,
|
|
)
|
|
|
|
# Default: Gemini
|
|
voices = [
|
|
VoiceInfo(id=name, name=name)
|
|
for name in settings.gemini_tts_voices
|
|
]
|
|
return ProviderVoicesResponse(
|
|
provider="gemini",
|
|
voices=voices,
|
|
default=settings.gemini_tts_default_voice,
|
|
)
|
|
|
|
|
|
@router.get("/languages", response_model=LanguagesResponse)
|
|
async def list_languages(
|
|
current_user=Depends(get_current_user)
|
|
) -> LanguagesResponse:
|
|
"""
|
|
List all supported TTS languages with display names and preview samples.
|
|
"""
|
|
return LanguagesResponse(
|
|
languages=settings.gemini_tts_language_names,
|
|
preview_samples=settings.gemini_tts_preview_samples
|
|
)
|
|
|
|
|
|
@router.get("/options", response_model=ProviderOptionsResponse)
|
|
async def get_tts_options(
|
|
provider: str = Query("gemini", description="TTS provider: gemini or elevenlabs"),
|
|
current_user=Depends(get_current_user),
|
|
) -> ProviderOptionsResponse:
|
|
"""
|
|
Get available TTS configuration options for the specified provider.
|
|
"""
|
|
if provider == "elevenlabs":
|
|
return ProviderOptionsResponse(
|
|
provider="elevenlabs",
|
|
stability_range=FloatRange(min=0.0, max=1.0, default=0.5, step=0.05),
|
|
similarity_boost_range=FloatRange(min=0.0, max=1.0, default=0.5, step=0.05),
|
|
)
|
|
|
|
# Default: Gemini
|
|
return ProviderOptionsResponse(
|
|
provider="gemini",
|
|
models=[
|
|
TTSOptionItem(value="flash", label="Flash (Fast, Cost-efficient)"),
|
|
TTSOptionItem(value="pro", label="Pro (Higher Quality)"),
|
|
],
|
|
style_presets=[
|
|
TTSOptionItem(value="neutral", label="Neutral"),
|
|
TTSOptionItem(value="calm", label="Calm & Soothing"),
|
|
TTSOptionItem(value="energetic", label="Energetic"),
|
|
TTSOptionItem(value="professional", label="Professional"),
|
|
TTSOptionItem(value="warm", label="Warm & Friendly"),
|
|
TTSOptionItem(value="documentary", label="Documentary"),
|
|
TTSOptionItem(value="custom", label="Custom Prompt"),
|
|
],
|
|
speed_range=SpeedRange(
|
|
min=settings.gemini_tts_speed_min,
|
|
max=settings.gemini_tts_speed_max,
|
|
default=settings.gemini_tts_speed_default,
|
|
step=settings.gemini_tts_speed_step
|
|
),
|
|
)
|
|
|
|
|
|
@router.post("/preview")
|
|
async def preview_voice(
|
|
request: VoicePreviewRequest,
|
|
current_user=Depends(get_current_user)
|
|
) -> Response:
|
|
"""
|
|
Generate a voice preview audio sample with all TTS settings applied.
|
|
Returns MP3 audio data.
|
|
"""
|
|
user_id: str = current_user.get("email") or current_user.get("sub") or "unknown"
|
|
|
|
if request.provider == "elevenlabs":
|
|
return await _preview_elevenlabs(request, user_id)
|
|
|
|
return await _preview_gemini(request, user_id)
|
|
|
|
|
|
async def _preview_gemini(request: VoicePreviewRequest, user_id: str) -> Response:
|
|
"""Generate a Gemini TTS voice preview."""
|
|
# Validate voice name
|
|
if request.voice_name not in settings.gemini_tts_voices:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid voice name. Available voices: {', '.join(settings.gemini_tts_voices)}"
|
|
)
|
|
|
|
# Validate language
|
|
if request.language not in settings.gemini_tts_languages:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Unsupported language. Available languages: {', '.join(settings.gemini_tts_languages.keys())}"
|
|
)
|
|
|
|
# Resolve style prompt from preset or custom
|
|
if request.style_preset == "custom" and request.custom_style_prompt:
|
|
style_prompt = request.custom_style_prompt
|
|
else:
|
|
style_prompt = settings.gemini_tts_style_prompts.get(request.style_preset, "")
|
|
|
|
sample_text = settings.gemini_tts_preview_samples.get(
|
|
request.language,
|
|
settings.gemini_tts_preview_samples.get("en", "This is a voice preview.")
|
|
)
|
|
|
|
try:
|
|
logger.info(
|
|
f"Generating Gemini voice preview: voice={request.voice_name}, language={request.language}, "
|
|
f"model={request.model}, speed={request.speed}x, style={request.style_preset}"
|
|
)
|
|
|
|
t0 = time.monotonic()
|
|
audio_data = await gemini_tts_service.synthesize_preview(
|
|
voice_name=request.voice_name,
|
|
language=request.language,
|
|
model=request.model,
|
|
speed=request.speed,
|
|
style_prompt=style_prompt
|
|
)
|
|
elapsed_ms = int((time.monotonic() - t0) * 1000)
|
|
|
|
model_id = settings.gemini_tts_models.get(request.model, settings.gemini_tts_model)
|
|
asyncio.create_task(cost_tracker.aio_record(
|
|
model=model_id,
|
|
provider="google",
|
|
user_external_id=user_id,
|
|
chars=len(sample_text),
|
|
latency_ms=elapsed_ms,
|
|
))
|
|
|
|
return Response(
|
|
content=audio_data,
|
|
media_type="audio/mpeg",
|
|
headers={
|
|
"Content-Disposition": f"inline; filename=preview_{request.voice_name}_{request.language}.mp3"
|
|
}
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Gemini voice preview generation failed: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to generate voice preview: {str(e)}"
|
|
) from e
|
|
|
|
|
|
async def _preview_elevenlabs(request: VoicePreviewRequest, user_id: str) -> Response:
|
|
"""Generate an ElevenLabs TTS voice preview."""
|
|
if not tts_service.elevenlabs_available:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="ElevenLabs TTS is not configured"
|
|
)
|
|
|
|
# Get sample text for the language
|
|
sample_text = settings.gemini_tts_preview_samples.get(
|
|
request.language,
|
|
settings.gemini_tts_preview_samples.get("en", "This is a preview of the audio description voice.")
|
|
)
|
|
|
|
stability = request.stability if request.stability is not None else 0.5
|
|
similarity_boost = request.similarity_boost if request.similarity_boost is not None else 0.5
|
|
|
|
try:
|
|
logger.info(
|
|
f"Generating ElevenLabs voice preview: voice={request.voice_name}, language={request.language}, "
|
|
f"stability={stability}, similarity_boost={similarity_boost}"
|
|
)
|
|
|
|
t0 = time.monotonic()
|
|
audio_data = await tts_service._synthesize_text_elevenlabs(
|
|
text=sample_text,
|
|
voice_id=request.voice_name,
|
|
stability=stability,
|
|
similarity_boost=similarity_boost,
|
|
)
|
|
elapsed_ms = int((time.monotonic() - t0) * 1000)
|
|
|
|
asyncio.create_task(cost_tracker.aio_record(
|
|
model="eleven_multilingual_v2",
|
|
provider="elevenlabs",
|
|
user_external_id=user_id,
|
|
chars=len(sample_text),
|
|
latency_ms=elapsed_ms,
|
|
))
|
|
|
|
return Response(
|
|
content=audio_data,
|
|
media_type="audio/mpeg",
|
|
headers={
|
|
"Content-Disposition": f"inline; filename=preview_{request.voice_name}_{request.language}.mp3"
|
|
}
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"ElevenLabs voice preview generation failed: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to generate voice preview: {str(e)}"
|
|
) from e
|