video-accessibility/backend/tests/unit/test_gemini.py
Vadym Samoilenko 5fd370c093 test: fix all unit tests — 168 passing, 0 failures
- conftest.py: set required env vars before app import to prevent Settings() crash
- gcs.py: lazy bucket init checks _bucket instead of _client; add @bucket.setter
- vtt.py: fix float precision in _format_timestamp; include empty-text cues in parser
- security.py: guard verify_password against empty hash (passlib UnknownHashError)
- tts.py: _parse_timestamp raises ValueError("Invalid timestamp format: …")
- emailer.py: HTML-escape job_title in _render_completion_template (XSS fix)
- test_emailer.py: rewrite for Mailgun-based service (replaced SendGrid)
- test_gcs.py: fix UploadFile constructor, MIME type, remove executor.submit mock
- test_gemini.py: patch module-level client instead of non-existent genai.upload_file;
  translate_vtt tests use numbered-list mock responses matching new implementation
- test_tts.py: fix aiohttp async CM mock pattern; fix error message match
- test_models.py: update JobCreate to use source_is_english instead of language
- test_security.py: set jwt_access_ttl_min in token test
- test_cross_tenant_isolation.py: add patch to imports

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-30 14:02:04 +01:00

459 lines
18 KiB
Python

import json
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from app.services.gemini import GeminiService
class TestGeminiService:
"""Test Gemini AI service functionality"""
@pytest.fixture
def gemini_service(self):
return GeminiService()
@pytest.fixture
def mock_uploaded_file(self):
f = MagicMock()
f.name = "files/test123"
f.uri = "gs://test-bucket/files/test123"
f.mime_type = "video/mp4"
return f
@pytest.fixture
def valid_gemini_response(self):
"""Sample valid Gemini response"""
return {
"language": "en",
"confidence": 0.92,
"summary": "A short video about accessibility features in web development.",
"transcript_plaintext": "Hello everyone, today we'll learn about accessibility features.",
"captions_vtt": """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello everyone, today we'll
00:00:03.000 --> 00:00:05.000
learn about accessibility features.
""",
"audio_description_vtt": """WEBVTT
00:00:00.500 --> 00:00:01.000
[Upbeat intro music plays]
00:00:05.500 --> 00:00:07.000
[Speaker gestures toward screen]
"""
}
@pytest.mark.asyncio
async def test_extract_accessibility_success(self, gemini_service, mock_uploaded_file, valid_gemini_response):
"""Test successful accessibility extraction"""
mock_response = MagicMock()
mock_response.text = json.dumps(valid_gemini_response)
with patch('app.services.gemini.client') as mock_client:
mock_client.files.upload.return_value = mock_uploaded_file
mock_client.models.generate_content.return_value = mock_response
with patch.object(gemini_service, '_wait_for_file_active', return_value=True):
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
result = await gemini_service.extract_accessibility("/tmp/test.mp4")
assert result == valid_gemini_response
assert result["confidence"] == 0.92
assert result["language"] == "en"
assert "WEBVTT" in result["captions_vtt"]
assert "WEBVTT" in result["audio_description_vtt"]
@pytest.mark.asyncio
async def test_extract_accessibility_with_markdown_formatting(self, gemini_service, mock_uploaded_file, valid_gemini_response):
"""Test handling Gemini response with markdown formatting"""
mock_response = MagicMock()
mock_response.text = f"```json\n{json.dumps(valid_gemini_response)}\n```"
with patch('app.services.gemini.client') as mock_client:
mock_client.files.upload.return_value = mock_uploaded_file
mock_client.models.generate_content.return_value = mock_response
with patch.object(gemini_service, '_wait_for_file_active', return_value=True):
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
result = await gemini_service.extract_accessibility("/tmp/test.mp4")
assert result == valid_gemini_response
@pytest.mark.asyncio
async def test_extract_accessibility_invalid_json(self, gemini_service, mock_uploaded_file):
"""Test handling of invalid JSON response"""
mock_response = MagicMock()
mock_response.text = "invalid json content"
with patch('app.services.gemini.client') as mock_client:
mock_client.files.upload.return_value = mock_uploaded_file
mock_client.models.generate_content.return_value = mock_response
with patch.object(gemini_service, '_wait_for_file_active', return_value=True):
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
with patch.object(gemini_service, '_self_heal_response') as mock_self_heal:
mock_self_heal.return_value = {"language": "en", "confidence": 0.8}
result = await gemini_service.extract_accessibility("/tmp/test.mp4")
assert result == {"language": "en", "confidence": 0.8}
mock_self_heal.assert_called_once()
@pytest.mark.asyncio
async def test_extract_accessibility_missing_fields(self, gemini_service, mock_uploaded_file):
"""Test error handling for missing required fields"""
incomplete_response = {
"language": "en",
"confidence": 0.92
# Missing required fields
}
mock_response = MagicMock()
mock_response.text = json.dumps(incomplete_response)
with patch('app.services.gemini.client') as mock_client:
mock_client.files.upload.return_value = mock_uploaded_file
mock_client.models.generate_content.return_value = mock_response
with patch.object(gemini_service, '_wait_for_file_active', return_value=True):
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
with pytest.raises(ValueError, match="Missing required field"):
await gemini_service.extract_accessibility("/tmp/test.mp4")
@pytest.mark.asyncio
async def test_extract_accessibility_invalid_vtt_format(self, gemini_service, mock_uploaded_file):
"""Test error handling for invalid VTT format"""
invalid_response = {
"language": "en",
"confidence": 0.92,
"summary": "Test summary",
"transcript_plaintext": "Test transcript",
"captions_vtt": "Invalid VTT content", # Missing WEBVTT header
"audio_description_vtt": "WEBVTT\n\n00:00:01.000 --> 00:00:02.000\nValid AD"
}
mock_response = MagicMock()
mock_response.text = json.dumps(invalid_response)
with patch('app.services.gemini.client') as mock_client:
mock_client.files.upload.return_value = mock_uploaded_file
mock_client.models.generate_content.return_value = mock_response
with patch.object(gemini_service, '_wait_for_file_active', return_value=True):
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
with pytest.raises(ValueError, match="Invalid captions VTT format"):
await gemini_service.extract_accessibility("/tmp/test.mp4")
@pytest.mark.asyncio
async def test_self_heal_response_success(self, gemini_service, valid_gemini_response):
"""Test successful self-healing of invalid response"""
mock_response = MagicMock()
mock_response.text = json.dumps(valid_gemini_response)
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service._self_heal_response("/tmp/test.mp4", "invalid json")
assert result == valid_gemini_response
@pytest.mark.asyncio
async def test_self_heal_response_reask(self, gemini_service):
"""Test self-healing when Gemini cannot produce valid JSON"""
mock_response = MagicMock()
mock_response.text = "REASK" # Not valid JSON → triggers failure path
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
with pytest.raises(ValueError, match="Failed to get valid JSON"):
await gemini_service._self_heal_response("/tmp/test.mp4", "invalid json")
@pytest.mark.asyncio
async def test_transcreate_content_success(self, gemini_service):
"""Test successful content transcreation"""
transcreate_response = {
"captions_vtt": """WEBVTT
00:00:01.000 --> 00:00:03.000
Hola a todos, hoy vamos a
00:00:03.000 --> 00:00:05.000
aprender sobre características de accesibilidad.
""",
"audio_description_vtt": """WEBVTT
00:00:00.500 --> 00:00:01.000
[Música de introducción alegre]
00:00:05.500 --> 00:00:07.000
[El presentador gesticula hacia la pantalla]
"""
}
mock_response = MagicMock()
mock_response.text = json.dumps(transcreate_response)
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
result = await gemini_service.transcreate_content(
"English captions VTT",
"English AD VTT",
"es",
"Brand guidelines"
)
assert result == transcreate_response
assert "WEBVTT" in result["captions_vtt"]
assert "WEBVTT" in result["audio_description_vtt"]
@pytest.mark.asyncio
async def test_transcreate_content_missing_fields(self, gemini_service):
"""Test transcreation with missing required fields"""
incomplete_response = {
"captions_vtt": "Some content"
# Missing audio_description_vtt
}
mock_response = MagicMock()
mock_response.text = json.dumps(incomplete_response)
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
with pytest.raises(ValueError, match="Missing required VTT fields"):
await gemini_service.transcreate_content(
"English captions VTT",
"English AD VTT",
"es"
)
@pytest.mark.asyncio
async def test_transcreate_content_invalid_json(self, gemini_service):
"""Test transcreation with invalid JSON response"""
mock_response = MagicMock()
mock_response.text = "invalid json"
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
with pytest.raises(ValueError, match="Invalid JSON response from transcreation"):
await gemini_service.transcreate_content(
"English captions VTT",
"English AD VTT",
"es"
)
def test_load_prompt_success(self, gemini_service):
"""Test successful prompt loading"""
prompt_content = "Test prompt content with {TARGET_LANGUAGE} placeholder"
with patch('pathlib.Path.read_text', return_value=prompt_content):
result = gemini_service._load_prompt("test_prompt.md")
assert result == prompt_content
def test_load_prompt_file_not_found(self, gemini_service):
"""Test prompt loading with missing file"""
with patch('pathlib.Path.read_text', side_effect=FileNotFoundError):
with pytest.raises(FileNotFoundError):
gemini_service._load_prompt("nonexistent.md")
@pytest.mark.asyncio
async def test_transcreate_with_markdown_response(self, gemini_service):
"""Test transcreation handling markdown-formatted response"""
transcreate_response = {
"captions_vtt": "Test VTT",
"audio_description_vtt": "Test AD VTT"
}
mock_response = MagicMock()
mock_response.text = f"```json\n{json.dumps(transcreate_response)}\n```"
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
result = await gemini_service.transcreate_content(
"English captions VTT",
"English AD VTT",
"es"
)
assert result == transcreate_response
class TestGeminiTranslateVtt:
"""Tests for GeminiService.translate_vtt() method"""
@pytest.fixture
def gemini_service(self):
return GeminiService()
@pytest.fixture
def sample_vtt(self):
"""Sample VTT content for testing"""
return """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello everyone
00:00:04.000 --> 00:00:06.000
Welcome to our tutorial
00:00:07.000 --> 00:00:09.000
Let's get started
"""
@pytest.mark.asyncio
async def test_translate_vtt_success(self, gemini_service, sample_vtt):
"""Test successful VTT translation using Gemini"""
# translate_vtt sends cue texts as a numbered list; mock must return a numbered list
mock_response = MagicMock()
mock_response.text = "1. Hola a todos\n2. Bienvenidos a nuestro tutorial\n3. Empecemos"
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service.translate_vtt(sample_vtt, "es")
assert "WEBVTT" in result
assert "00:00:01.000 --> 00:00:03.000" in result
assert "00:00:04.000 --> 00:00:06.000" in result
assert "00:00:07.000 --> 00:00:09.000" in result
assert "Hola a todos" in result
assert "Bienvenidos a nuestro tutorial" in result
assert "Empecemos" in result
@pytest.mark.asyncio
async def test_translate_vtt_preserves_timing(self, gemini_service):
"""Test that translation preserves exact timestamps"""
original_vtt = """WEBVTT
00:00:01.234 --> 00:00:03.567
Original text
00:00:05.890 --> 00:00:08.123
Another line
"""
mock_response = MagicMock()
mock_response.text = "1. Texto original\n2. Otra línea"
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service.translate_vtt(original_vtt, "es")
assert "00:00:01.234 --> 00:00:03.567" in result
assert "00:00:05.890 --> 00:00:08.123" in result
assert "Texto original" in result
assert "Otra línea" in result
@pytest.mark.asyncio
async def test_translate_vtt_maintains_webvtt_header(self, gemini_service, sample_vtt):
"""Test that result always has WEBVTT header (rebuilt from original timings)"""
mock_response = MagicMock()
mock_response.text = "1. Hola a todos\n2. Bienvenidos a nuestro tutorial\n3. Empecemos"
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service.translate_vtt(sample_vtt, "es")
assert result.startswith("WEBVTT")
@pytest.mark.asyncio
async def test_translate_vtt_handles_multiline_cues(self, gemini_service):
"""Test translation of VTT with multi-line cues"""
multiline_vtt = """WEBVTT
00:00:01.000 --> 00:00:03.000
First line
Second line
00:00:04.000 --> 00:00:06.000
Another cue
"""
# Multi-line cues are joined with a space before sending to Gemini
mock_response = MagicMock()
mock_response.text = "1. Primera línea Segunda línea\n2. Otra señal"
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service.translate_vtt(multiline_vtt, "es")
assert "Primera línea" in result
assert "Segunda línea" in result
@pytest.mark.asyncio
async def test_translate_vtt_with_source_language(self, gemini_service, sample_vtt):
"""Test translation with non-English source language"""
mock_response = MagicMock()
mock_response.text = "1. Hello everyone\n2. Welcome to our tutorial\n3. Let's get started"
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
await gemini_service.translate_vtt(sample_vtt, "en", source_language="fr")
call_args = mock_client.models.generate_content.call_args
assert call_args is not None
# Prompt should reference the source language
prompt_content = str(call_args)
assert "fr" in prompt_content or "French" in prompt_content
@pytest.mark.asyncio
async def test_translate_vtt_error_handling(self, gemini_service, sample_vtt):
"""Test proper error propagation on API failure"""
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.side_effect = Exception("API Error")
with pytest.raises(Exception, match="API Error"):
await gemini_service.translate_vtt(sample_vtt, "es")
@pytest.mark.asyncio
async def test_translate_vtt_with_speaker_labels(self, gemini_service):
"""Test that speaker labels are preserved during translation"""
vtt_with_speakers = """WEBVTT
00:00:01.000 --> 00:00:03.000
[Speaker 1]: Hello everyone
00:00:04.000 --> 00:00:06.000
[Speaker 2]: Welcome to the show
"""
mock_response = MagicMock()
mock_response.text = "1. [Speaker 1]: Hola a todos\n2. [Speaker 2]: Bienvenidos al programa"
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service.translate_vtt(vtt_with_speakers, "es")
assert "[Speaker 1]" in result
assert "[Speaker 2]" in result
@pytest.mark.integration
class TestGeminiServiceIntegration:
"""Integration tests for Gemini service (requires actual API key)"""
@pytest.mark.skip(reason="Requires actual Gemini API key and video file")
@pytest.mark.asyncio
async def test_real_gemini_extraction(self):
"""Test real Gemini extraction (requires setup)"""
pass
@pytest.mark.skip(reason="Requires actual Gemini API key")
@pytest.mark.asyncio
async def test_real_transcreation(self):
"""Test real transcreation (requires setup)"""
pass