- conftest.py: set required env vars before app import to prevent Settings() crash
- gcs.py: lazy bucket init checks _bucket instead of _client; add @bucket.setter
- vtt.py: fix float precision in _format_timestamp; include empty-text cues in parser
- security.py: guard verify_password against empty hash (passlib UnknownHashError)
- tts.py: _parse_timestamp raises ValueError("Invalid timestamp format: …")
- emailer.py: HTML-escape job_title in _render_completion_template (XSS fix)
- test_emailer.py: rewrite for Mailgun-based service (replaced SendGrid)
- test_gcs.py: fix UploadFile constructor, MIME type, remove executor.submit mock
- test_gemini.py: patch module-level client instead of non-existent genai.upload_file;
translate_vtt tests use numbered-list mock responses matching new implementation
- test_tts.py: fix aiohttp async CM mock pattern; fix error message match
- test_models.py: update JobCreate to use source_is_english instead of language
- test_security.py: set jwt_access_ttl_min in token test
- test_cross_tenant_isolation.py: add patch to imports
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
459 lines
18 KiB
Python
459 lines
18 KiB
Python
import json
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from app.services.gemini import GeminiService
|
|
|
|
|
|
class TestGeminiService:
|
|
"""Test Gemini AI service functionality"""
|
|
|
|
@pytest.fixture
|
|
def gemini_service(self):
|
|
return GeminiService()
|
|
|
|
@pytest.fixture
|
|
def mock_uploaded_file(self):
|
|
f = MagicMock()
|
|
f.name = "files/test123"
|
|
f.uri = "gs://test-bucket/files/test123"
|
|
f.mime_type = "video/mp4"
|
|
return f
|
|
|
|
@pytest.fixture
|
|
def valid_gemini_response(self):
|
|
"""Sample valid Gemini response"""
|
|
return {
|
|
"language": "en",
|
|
"confidence": 0.92,
|
|
"summary": "A short video about accessibility features in web development.",
|
|
"transcript_plaintext": "Hello everyone, today we'll learn about accessibility features.",
|
|
"captions_vtt": """WEBVTT
|
|
|
|
00:00:01.000 --> 00:00:03.000
|
|
Hello everyone, today we'll
|
|
|
|
00:00:03.000 --> 00:00:05.000
|
|
learn about accessibility features.
|
|
""",
|
|
"audio_description_vtt": """WEBVTT
|
|
|
|
00:00:00.500 --> 00:00:01.000
|
|
[Upbeat intro music plays]
|
|
|
|
00:00:05.500 --> 00:00:07.000
|
|
[Speaker gestures toward screen]
|
|
"""
|
|
}
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_accessibility_success(self, gemini_service, mock_uploaded_file, valid_gemini_response):
|
|
"""Test successful accessibility extraction"""
|
|
mock_response = MagicMock()
|
|
mock_response.text = json.dumps(valid_gemini_response)
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.files.upload.return_value = mock_uploaded_file
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
with patch.object(gemini_service, '_wait_for_file_active', return_value=True):
|
|
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
|
|
result = await gemini_service.extract_accessibility("/tmp/test.mp4")
|
|
|
|
assert result == valid_gemini_response
|
|
assert result["confidence"] == 0.92
|
|
assert result["language"] == "en"
|
|
assert "WEBVTT" in result["captions_vtt"]
|
|
assert "WEBVTT" in result["audio_description_vtt"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_accessibility_with_markdown_formatting(self, gemini_service, mock_uploaded_file, valid_gemini_response):
|
|
"""Test handling Gemini response with markdown formatting"""
|
|
mock_response = MagicMock()
|
|
mock_response.text = f"```json\n{json.dumps(valid_gemini_response)}\n```"
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.files.upload.return_value = mock_uploaded_file
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
with patch.object(gemini_service, '_wait_for_file_active', return_value=True):
|
|
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
|
|
result = await gemini_service.extract_accessibility("/tmp/test.mp4")
|
|
|
|
assert result == valid_gemini_response
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_accessibility_invalid_json(self, gemini_service, mock_uploaded_file):
|
|
"""Test handling of invalid JSON response"""
|
|
mock_response = MagicMock()
|
|
mock_response.text = "invalid json content"
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.files.upload.return_value = mock_uploaded_file
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
with patch.object(gemini_service, '_wait_for_file_active', return_value=True):
|
|
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
|
|
with patch.object(gemini_service, '_self_heal_response') as mock_self_heal:
|
|
mock_self_heal.return_value = {"language": "en", "confidence": 0.8}
|
|
|
|
result = await gemini_service.extract_accessibility("/tmp/test.mp4")
|
|
|
|
assert result == {"language": "en", "confidence": 0.8}
|
|
mock_self_heal.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_accessibility_missing_fields(self, gemini_service, mock_uploaded_file):
|
|
"""Test error handling for missing required fields"""
|
|
incomplete_response = {
|
|
"language": "en",
|
|
"confidence": 0.92
|
|
# Missing required fields
|
|
}
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.text = json.dumps(incomplete_response)
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.files.upload.return_value = mock_uploaded_file
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
with patch.object(gemini_service, '_wait_for_file_active', return_value=True):
|
|
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
|
|
with pytest.raises(ValueError, match="Missing required field"):
|
|
await gemini_service.extract_accessibility("/tmp/test.mp4")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_accessibility_invalid_vtt_format(self, gemini_service, mock_uploaded_file):
|
|
"""Test error handling for invalid VTT format"""
|
|
invalid_response = {
|
|
"language": "en",
|
|
"confidence": 0.92,
|
|
"summary": "Test summary",
|
|
"transcript_plaintext": "Test transcript",
|
|
"captions_vtt": "Invalid VTT content", # Missing WEBVTT header
|
|
"audio_description_vtt": "WEBVTT\n\n00:00:01.000 --> 00:00:02.000\nValid AD"
|
|
}
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.text = json.dumps(invalid_response)
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.files.upload.return_value = mock_uploaded_file
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
with patch.object(gemini_service, '_wait_for_file_active', return_value=True):
|
|
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
|
|
with pytest.raises(ValueError, match="Invalid captions VTT format"):
|
|
await gemini_service.extract_accessibility("/tmp/test.mp4")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_self_heal_response_success(self, gemini_service, valid_gemini_response):
|
|
"""Test successful self-healing of invalid response"""
|
|
mock_response = MagicMock()
|
|
mock_response.text = json.dumps(valid_gemini_response)
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
result = await gemini_service._self_heal_response("/tmp/test.mp4", "invalid json")
|
|
|
|
assert result == valid_gemini_response
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_self_heal_response_reask(self, gemini_service):
|
|
"""Test self-healing when Gemini cannot produce valid JSON"""
|
|
mock_response = MagicMock()
|
|
mock_response.text = "REASK" # Not valid JSON → triggers failure path
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
with pytest.raises(ValueError, match="Failed to get valid JSON"):
|
|
await gemini_service._self_heal_response("/tmp/test.mp4", "invalid json")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transcreate_content_success(self, gemini_service):
|
|
"""Test successful content transcreation"""
|
|
transcreate_response = {
|
|
"captions_vtt": """WEBVTT
|
|
|
|
00:00:01.000 --> 00:00:03.000
|
|
Hola a todos, hoy vamos a
|
|
|
|
00:00:03.000 --> 00:00:05.000
|
|
aprender sobre características de accesibilidad.
|
|
""",
|
|
"audio_description_vtt": """WEBVTT
|
|
|
|
00:00:00.500 --> 00:00:01.000
|
|
[Música de introducción alegre]
|
|
|
|
00:00:05.500 --> 00:00:07.000
|
|
[El presentador gesticula hacia la pantalla]
|
|
"""
|
|
}
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.text = json.dumps(transcreate_response)
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
|
|
result = await gemini_service.transcreate_content(
|
|
"English captions VTT",
|
|
"English AD VTT",
|
|
"es",
|
|
"Brand guidelines"
|
|
)
|
|
|
|
assert result == transcreate_response
|
|
assert "WEBVTT" in result["captions_vtt"]
|
|
assert "WEBVTT" in result["audio_description_vtt"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transcreate_content_missing_fields(self, gemini_service):
|
|
"""Test transcreation with missing required fields"""
|
|
incomplete_response = {
|
|
"captions_vtt": "Some content"
|
|
# Missing audio_description_vtt
|
|
}
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.text = json.dumps(incomplete_response)
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
|
|
with pytest.raises(ValueError, match="Missing required VTT fields"):
|
|
await gemini_service.transcreate_content(
|
|
"English captions VTT",
|
|
"English AD VTT",
|
|
"es"
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transcreate_content_invalid_json(self, gemini_service):
|
|
"""Test transcreation with invalid JSON response"""
|
|
mock_response = MagicMock()
|
|
mock_response.text = "invalid json"
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
|
|
with pytest.raises(ValueError, match="Invalid JSON response from transcreation"):
|
|
await gemini_service.transcreate_content(
|
|
"English captions VTT",
|
|
"English AD VTT",
|
|
"es"
|
|
)
|
|
|
|
def test_load_prompt_success(self, gemini_service):
|
|
"""Test successful prompt loading"""
|
|
prompt_content = "Test prompt content with {TARGET_LANGUAGE} placeholder"
|
|
|
|
with patch('pathlib.Path.read_text', return_value=prompt_content):
|
|
result = gemini_service._load_prompt("test_prompt.md")
|
|
assert result == prompt_content
|
|
|
|
def test_load_prompt_file_not_found(self, gemini_service):
|
|
"""Test prompt loading with missing file"""
|
|
with patch('pathlib.Path.read_text', side_effect=FileNotFoundError):
|
|
with pytest.raises(FileNotFoundError):
|
|
gemini_service._load_prompt("nonexistent.md")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transcreate_with_markdown_response(self, gemini_service):
|
|
"""Test transcreation handling markdown-formatted response"""
|
|
transcreate_response = {
|
|
"captions_vtt": "Test VTT",
|
|
"audio_description_vtt": "Test AD VTT"
|
|
}
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.text = f"```json\n{json.dumps(transcreate_response)}\n```"
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
|
|
result = await gemini_service.transcreate_content(
|
|
"English captions VTT",
|
|
"English AD VTT",
|
|
"es"
|
|
)
|
|
|
|
assert result == transcreate_response
|
|
|
|
|
|
class TestGeminiTranslateVtt:
|
|
"""Tests for GeminiService.translate_vtt() method"""
|
|
|
|
@pytest.fixture
|
|
def gemini_service(self):
|
|
return GeminiService()
|
|
|
|
@pytest.fixture
|
|
def sample_vtt(self):
|
|
"""Sample VTT content for testing"""
|
|
return """WEBVTT
|
|
|
|
00:00:01.000 --> 00:00:03.000
|
|
Hello everyone
|
|
|
|
00:00:04.000 --> 00:00:06.000
|
|
Welcome to our tutorial
|
|
|
|
00:00:07.000 --> 00:00:09.000
|
|
Let's get started
|
|
"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_translate_vtt_success(self, gemini_service, sample_vtt):
|
|
"""Test successful VTT translation using Gemini"""
|
|
# translate_vtt sends cue texts as a numbered list; mock must return a numbered list
|
|
mock_response = MagicMock()
|
|
mock_response.text = "1. Hola a todos\n2. Bienvenidos a nuestro tutorial\n3. Empecemos"
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
result = await gemini_service.translate_vtt(sample_vtt, "es")
|
|
|
|
assert "WEBVTT" in result
|
|
assert "00:00:01.000 --> 00:00:03.000" in result
|
|
assert "00:00:04.000 --> 00:00:06.000" in result
|
|
assert "00:00:07.000 --> 00:00:09.000" in result
|
|
assert "Hola a todos" in result
|
|
assert "Bienvenidos a nuestro tutorial" in result
|
|
assert "Empecemos" in result
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_translate_vtt_preserves_timing(self, gemini_service):
|
|
"""Test that translation preserves exact timestamps"""
|
|
original_vtt = """WEBVTT
|
|
|
|
00:00:01.234 --> 00:00:03.567
|
|
Original text
|
|
|
|
00:00:05.890 --> 00:00:08.123
|
|
Another line
|
|
"""
|
|
mock_response = MagicMock()
|
|
mock_response.text = "1. Texto original\n2. Otra línea"
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
result = await gemini_service.translate_vtt(original_vtt, "es")
|
|
|
|
assert "00:00:01.234 --> 00:00:03.567" in result
|
|
assert "00:00:05.890 --> 00:00:08.123" in result
|
|
assert "Texto original" in result
|
|
assert "Otra línea" in result
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_translate_vtt_maintains_webvtt_header(self, gemini_service, sample_vtt):
|
|
"""Test that result always has WEBVTT header (rebuilt from original timings)"""
|
|
mock_response = MagicMock()
|
|
mock_response.text = "1. Hola a todos\n2. Bienvenidos a nuestro tutorial\n3. Empecemos"
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
result = await gemini_service.translate_vtt(sample_vtt, "es")
|
|
|
|
assert result.startswith("WEBVTT")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_translate_vtt_handles_multiline_cues(self, gemini_service):
|
|
"""Test translation of VTT with multi-line cues"""
|
|
multiline_vtt = """WEBVTT
|
|
|
|
00:00:01.000 --> 00:00:03.000
|
|
First line
|
|
Second line
|
|
|
|
00:00:04.000 --> 00:00:06.000
|
|
Another cue
|
|
"""
|
|
# Multi-line cues are joined with a space before sending to Gemini
|
|
mock_response = MagicMock()
|
|
mock_response.text = "1. Primera línea Segunda línea\n2. Otra señal"
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
result = await gemini_service.translate_vtt(multiline_vtt, "es")
|
|
|
|
assert "Primera línea" in result
|
|
assert "Segunda línea" in result
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_translate_vtt_with_source_language(self, gemini_service, sample_vtt):
|
|
"""Test translation with non-English source language"""
|
|
mock_response = MagicMock()
|
|
mock_response.text = "1. Hello everyone\n2. Welcome to our tutorial\n3. Let's get started"
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
await gemini_service.translate_vtt(sample_vtt, "en", source_language="fr")
|
|
|
|
call_args = mock_client.models.generate_content.call_args
|
|
assert call_args is not None
|
|
# Prompt should reference the source language
|
|
prompt_content = str(call_args)
|
|
assert "fr" in prompt_content or "French" in prompt_content
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_translate_vtt_error_handling(self, gemini_service, sample_vtt):
|
|
"""Test proper error propagation on API failure"""
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.side_effect = Exception("API Error")
|
|
|
|
with pytest.raises(Exception, match="API Error"):
|
|
await gemini_service.translate_vtt(sample_vtt, "es")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_translate_vtt_with_speaker_labels(self, gemini_service):
|
|
"""Test that speaker labels are preserved during translation"""
|
|
vtt_with_speakers = """WEBVTT
|
|
|
|
00:00:01.000 --> 00:00:03.000
|
|
[Speaker 1]: Hello everyone
|
|
|
|
00:00:04.000 --> 00:00:06.000
|
|
[Speaker 2]: Welcome to the show
|
|
"""
|
|
mock_response = MagicMock()
|
|
mock_response.text = "1. [Speaker 1]: Hola a todos\n2. [Speaker 2]: Bienvenidos al programa"
|
|
|
|
with patch('app.services.gemini.client') as mock_client:
|
|
mock_client.models.generate_content.return_value = mock_response
|
|
|
|
result = await gemini_service.translate_vtt(vtt_with_speakers, "es")
|
|
|
|
assert "[Speaker 1]" in result
|
|
assert "[Speaker 2]" in result
|
|
|
|
|
|
@pytest.mark.integration
|
|
class TestGeminiServiceIntegration:
|
|
"""Integration tests for Gemini service (requires actual API key)"""
|
|
|
|
@pytest.mark.skip(reason="Requires actual Gemini API key and video file")
|
|
@pytest.mark.asyncio
|
|
async def test_real_gemini_extraction(self):
|
|
"""Test real Gemini extraction (requires setup)"""
|
|
pass
|
|
|
|
@pytest.mark.skip(reason="Requires actual Gemini API key")
|
|
@pytest.mark.asyncio
|
|
async def test_real_transcreation(self):
|
|
"""Test real transcreation (requires setup)"""
|
|
pass
|