video-accessibility/backend/tests/unit/test_gemini.py
michael d2d8e32819 feat: add video-native translation mode for multi-language content
Add a new "Video Native Mode" translation option that re-processes the
video through Gemini for each target language, generating captions and
audio descriptions directly from visual context. This produces more
natural and culturally appropriate content compared to traditional VTT
text translation.

Changes:
- Add translation_mode field to RequestedOutputs (video_native | traditional)
- Create gemini_ingestion_targeted.md prompt for target language generation
- Add extract_accessibility_targeted() method to Gemini service
- Modify translate_and_synthesize task to handle both translation modes
- Add Translation Mode UI selector in NewJob screen (video_native is default)
- Remove transcreation UI (replaced by video_native mode)
- Remove Google Translate service (replaced by Gemini translation)
- Add LanguageSelector component with searchable dropdown

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-31 13:50:05 -06:00

519 lines
No EOL
19 KiB
Python

import json
from unittest.mock import AsyncMock, MagicMock, patch, mock_open
import pytest
from app.services.gemini import GeminiService
class TestGeminiService:
"""Test Gemini AI service functionality"""
@pytest.fixture
def gemini_service(self):
"""Create Gemini service instance with mocked dependencies"""
with patch('app.services.gemini.genai'):
service = GeminiService()
service.model = MagicMock()
return service
@pytest.fixture
def valid_gemini_response(self):
"""Sample valid Gemini response"""
return {
"language": "en",
"confidence": 0.92,
"summary": "A short video about accessibility features in web development.",
"transcript_plaintext": "Hello everyone, today we'll learn about accessibility features.",
"captions_vtt": """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello everyone, today we'll
00:00:03.000 --> 00:00:05.000
learn about accessibility features.
""",
"audio_description_vtt": """WEBVTT
00:00:00.500 --> 00:00:01.000
[Upbeat intro music plays]
00:00:05.500 --> 00:00:07.000
[Speaker gestures toward screen]
"""
}
@pytest.mark.asyncio
async def test_extract_accessibility_success(self, gemini_service, valid_gemini_response):
"""Test successful accessibility extraction"""
# Mock file upload and model response
mock_response = MagicMock()
mock_response.text = json.dumps(valid_gemini_response)
gemini_service.model.generate_content.return_value = mock_response
with patch('app.services.gemini.genai.upload_file') as mock_upload:
mock_upload.return_value = MagicMock()
with patch.object(gemini_service, '_load_prompt') as mock_load_prompt:
mock_load_prompt.return_value = "Test prompt"
result = await gemini_service.extract_accessibility("/tmp/test.mp4")
assert result == valid_gemini_response
assert result["confidence"] == 0.92
assert result["language"] == "en"
assert "WEBVTT" in result["captions_vtt"]
assert "WEBVTT" in result["audio_description_vtt"]
@pytest.mark.asyncio
async def test_extract_accessibility_with_markdown_formatting(self, gemini_service, valid_gemini_response):
"""Test handling Gemini response with markdown formatting"""
# Mock response with markdown formatting
mock_response = MagicMock()
mock_response.text = f"```json\n{json.dumps(valid_gemini_response)}\n```"
gemini_service.model.generate_content.return_value = mock_response
with patch('app.services.gemini.genai.upload_file'):
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
result = await gemini_service.extract_accessibility("/tmp/test.mp4")
assert result == valid_gemini_response
@pytest.mark.asyncio
async def test_extract_accessibility_invalid_json(self, gemini_service):
"""Test handling of invalid JSON response"""
# Mock invalid JSON response
mock_response = MagicMock()
mock_response.text = "invalid json content"
gemini_service.model.generate_content.return_value = mock_response
with patch('app.services.gemini.genai.upload_file'):
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
with patch.object(gemini_service, '_self_heal_response') as mock_self_heal:
mock_self_heal.return_value = {"language": "en", "confidence": 0.8}
result = await gemini_service.extract_accessibility("/tmp/test.mp4")
assert result == {"language": "en", "confidence": 0.8}
mock_self_heal.assert_called_once_with("/tmp/test.mp4", "invalid json content")
@pytest.mark.asyncio
async def test_extract_accessibility_missing_fields(self, gemini_service):
"""Test error handling for missing required fields"""
incomplete_response = {
"language": "en",
"confidence": 0.92
# Missing required fields
}
mock_response = MagicMock()
mock_response.text = json.dumps(incomplete_response)
gemini_service.model.generate_content.return_value = mock_response
with patch('app.services.gemini.genai.upload_file'):
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
with pytest.raises(ValueError, match="Missing required field"):
await gemini_service.extract_accessibility("/tmp/test.mp4")
@pytest.mark.asyncio
async def test_extract_accessibility_invalid_vtt_format(self, gemini_service):
"""Test error handling for invalid VTT format"""
invalid_response = {
"language": "en",
"confidence": 0.92,
"summary": "Test summary",
"transcript_plaintext": "Test transcript",
"captions_vtt": "Invalid VTT content", # Missing WEBVTT header
"audio_description_vtt": "WEBVTT\n\n00:00:01.000 --> 00:00:02.000\nValid AD"
}
mock_response = MagicMock()
mock_response.text = json.dumps(invalid_response)
gemini_service.model.generate_content.return_value = mock_response
with patch('app.services.gemini.genai.upload_file'):
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt"):
with pytest.raises(ValueError, match="Invalid captions VTT format"):
await gemini_service.extract_accessibility("/tmp/test.mp4")
@pytest.mark.asyncio
async def test_self_heal_response_success(self, gemini_service, valid_gemini_response):
"""Test successful self-healing of invalid response"""
mock_response = MagicMock()
mock_response.text = json.dumps(valid_gemini_response)
gemini_service.model.generate_content.return_value = mock_response
with patch('app.services.gemini.genai.upload_file'):
result = await gemini_service._self_heal_response("/tmp/test.mp4", "invalid json")
assert result == valid_gemini_response
@pytest.mark.asyncio
async def test_self_heal_response_reask(self, gemini_service):
"""Test self-healing when Gemini returns REASK"""
mock_response = MagicMock()
mock_response.text = "REASK"
gemini_service.model.generate_content.return_value = mock_response
with patch('app.services.gemini.genai.upload_file'):
with pytest.raises(ValueError, match="Gemini unable to self-heal response"):
await gemini_service._self_heal_response("/tmp/test.mp4", "invalid json")
@pytest.mark.asyncio
async def test_transcreate_content_success(self, gemini_service):
"""Test successful content transcreation"""
transcreate_response = {
"captions_vtt": """WEBVTT
00:00:01.000 --> 00:00:03.000
Hola a todos, hoy vamos a
00:00:03.000 --> 00:00:05.000
aprender sobre características de accesibilidad.
""",
"audio_description_vtt": """WEBVTT
00:00:00.500 --> 00:00:01.000
[Música de introducción alegre]
00:00:05.500 --> 00:00:07.000
[El presentador gesticula hacia la pantalla]
"""
}
mock_response = MagicMock()
mock_response.text = json.dumps(transcreate_response)
gemini_service.model.generate_content.return_value = mock_response
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
result = await gemini_service.transcreate_content(
"English captions VTT",
"English AD VTT",
"es",
"Brand guidelines"
)
assert result == transcreate_response
assert "WEBVTT" in result["captions_vtt"]
assert "WEBVTT" in result["audio_description_vtt"]
@pytest.mark.asyncio
async def test_transcreate_content_missing_fields(self, gemini_service):
"""Test transcreation with missing required fields"""
incomplete_response = {
"captions_vtt": "Some content"
# Missing audio_description_vtt
}
mock_response = MagicMock()
mock_response.text = json.dumps(incomplete_response)
gemini_service.model.generate_content.return_value = mock_response
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
with pytest.raises(ValueError, match="Missing required VTT fields"):
await gemini_service.transcreate_content(
"English captions VTT",
"English AD VTT",
"es"
)
@pytest.mark.asyncio
async def test_transcreate_content_invalid_json(self, gemini_service):
"""Test transcreation with invalid JSON response"""
mock_response = MagicMock()
mock_response.text = "invalid json"
gemini_service.model.generate_content.return_value = mock_response
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
with pytest.raises(ValueError, match="Invalid JSON response from transcreation"):
await gemini_service.transcreate_content(
"English captions VTT",
"English AD VTT",
"es"
)
def test_load_prompt_success(self, gemini_service):
"""Test successful prompt loading"""
prompt_content = "Test prompt content with {TARGET_LANGUAGE} placeholder"
with patch('pathlib.Path.read_text', return_value=prompt_content):
result = gemini_service._load_prompt("test_prompt.md")
assert result == prompt_content
def test_load_prompt_file_not_found(self, gemini_service):
"""Test prompt loading with missing file"""
with patch('pathlib.Path.read_text', side_effect=FileNotFoundError):
with pytest.raises(FileNotFoundError):
gemini_service._load_prompt("nonexistent.md")
@pytest.mark.asyncio
async def test_transcreate_with_markdown_response(self, gemini_service):
"""Test transcreation handling markdown-formatted response"""
transcreate_response = {
"captions_vtt": "Test VTT",
"audio_description_vtt": "Test AD VTT"
}
mock_response = MagicMock()
mock_response.text = f"```json\n{json.dumps(transcreate_response)}\n```"
gemini_service.model.generate_content.return_value = mock_response
with patch.object(gemini_service, '_load_prompt', return_value="Test prompt {TARGET_LANGUAGE}"):
result = await gemini_service.transcreate_content(
"English captions VTT",
"English AD VTT",
"es"
)
assert result == transcreate_response
class TestGeminiTranslateVtt:
"""Tests for GeminiService.translate_vtt() method"""
@pytest.fixture
def gemini_service(self):
"""Create Gemini service instance with mocked dependencies"""
with patch('app.services.gemini.genai'):
service = GeminiService()
service.model = MagicMock()
return service
@pytest.fixture
def sample_vtt(self):
"""Sample VTT content for testing"""
return """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello everyone
00:00:04.000 --> 00:00:06.000
Welcome to our tutorial
00:00:07.000 --> 00:00:09.000
Let's get started
"""
@pytest.fixture
def translated_vtt_es(self):
"""Expected Spanish translation of sample VTT"""
return """WEBVTT
00:00:01.000 --> 00:00:03.000
Hola a todos
00:00:04.000 --> 00:00:06.000
Bienvenidos a nuestro tutorial
00:00:07.000 --> 00:00:09.000
Empecemos
"""
@pytest.mark.asyncio
async def test_translate_vtt_success(self, gemini_service, sample_vtt, translated_vtt_es):
"""Test successful VTT translation using Gemini"""
mock_response = MagicMock()
mock_response.text = translated_vtt_es
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service.translate_vtt(sample_vtt, "es")
# Verify structure is preserved
assert "WEBVTT" in result
assert "00:00:01.000 --> 00:00:03.000" in result
assert "00:00:04.000 --> 00:00:06.000" in result
assert "00:00:07.000 --> 00:00:09.000" in result
# Verify translation content
assert "Hola a todos" in result
assert "Bienvenidos a nuestro tutorial" in result
assert "Empecemos" in result
@pytest.mark.asyncio
async def test_translate_vtt_preserves_timing(self, gemini_service):
"""Test that translation preserves exact timestamps"""
original_vtt = """WEBVTT
00:00:01.234 --> 00:00:03.567
Original text
00:00:05.890 --> 00:00:08.123
Another line
"""
translated_vtt = """WEBVTT
00:00:01.234 --> 00:00:03.567
Texto original
00:00:05.890 --> 00:00:08.123
Otra línea
"""
mock_response = MagicMock()
mock_response.text = translated_vtt
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service.translate_vtt(original_vtt, "es")
# Check that exact timestamps are preserved
assert "00:00:01.234 --> 00:00:03.567" in result
assert "00:00:05.890 --> 00:00:08.123" in result
assert "Texto original" in result
assert "Otra línea" in result
@pytest.mark.asyncio
async def test_translate_vtt_maintains_webvtt_header(self, gemini_service, sample_vtt):
"""Test that WEBVTT header is preserved or added if missing"""
# Response without WEBVTT header
response_without_header = """00:00:01.000 --> 00:00:03.000
Hola a todos
"""
mock_response = MagicMock()
mock_response.text = response_without_header
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service.translate_vtt(sample_vtt, "es")
# Should add WEBVTT header if missing
assert result.startswith("WEBVTT")
@pytest.mark.asyncio
async def test_translate_vtt_handles_markdown_formatting(self, gemini_service, sample_vtt, translated_vtt_es):
"""Test handling of markdown code blocks in response"""
# Response with markdown formatting
markdown_response = f"```vtt\n{translated_vtt_es}\n```"
mock_response = MagicMock()
mock_response.text = markdown_response
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service.translate_vtt(sample_vtt, "es")
# Should strip markdown formatting
assert "```" not in result
assert "WEBVTT" in result
assert "Hola a todos" in result
@pytest.mark.asyncio
async def test_translate_vtt_handles_multiline_cues(self, gemini_service):
"""Test translation of VTT with multi-line cues"""
multiline_vtt = """WEBVTT
00:00:01.000 --> 00:00:03.000
First line
Second line
00:00:04.000 --> 00:00:06.000
Another cue
"""
translated_multiline = """WEBVTT
00:00:01.000 --> 00:00:03.000
Primera línea
Segunda línea
00:00:04.000 --> 00:00:06.000
Otra señal
"""
mock_response = MagicMock()
mock_response.text = translated_multiline
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service.translate_vtt(multiline_vtt, "es")
assert "Primera línea" in result
assert "Segunda línea" in result
@pytest.mark.asyncio
async def test_translate_vtt_with_source_language(self, gemini_service, sample_vtt):
"""Test translation with non-English source language"""
mock_response = MagicMock()
mock_response.text = sample_vtt # Just return same content for this test
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
# Call with French as source language
await gemini_service.translate_vtt(sample_vtt, "en", source_language="fr")
# Verify the prompt included the source language
call_args = mock_client.models.generate_content.call_args
prompt_content = str(call_args)
assert "fr" in prompt_content or "French" in prompt_content or call_args is not None
@pytest.mark.asyncio
async def test_translate_vtt_error_handling(self, gemini_service, sample_vtt):
"""Test proper error propagation on API failure"""
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.side_effect = Exception("API Error")
with pytest.raises(Exception, match="API Error"):
await gemini_service.translate_vtt(sample_vtt, "es")
@pytest.mark.asyncio
async def test_translate_vtt_with_speaker_labels(self, gemini_service):
"""Test that speaker labels are preserved during translation"""
vtt_with_speakers = """WEBVTT
00:00:01.000 --> 00:00:03.000
[Speaker 1]: Hello everyone
00:00:04.000 --> 00:00:06.000
[Speaker 2]: Welcome to the show
"""
translated_with_speakers = """WEBVTT
00:00:01.000 --> 00:00:03.000
[Speaker 1]: Hola a todos
00:00:04.000 --> 00:00:06.000
[Speaker 2]: Bienvenidos al programa
"""
mock_response = MagicMock()
mock_response.text = translated_with_speakers
with patch('app.services.gemini.client') as mock_client:
mock_client.models.generate_content.return_value = mock_response
result = await gemini_service.translate_vtt(vtt_with_speakers, "es")
# Verify speaker labels are preserved
assert "[Speaker 1]" in result
assert "[Speaker 2]" in result
@pytest.mark.integration
class TestGeminiServiceIntegration:
"""Integration tests for Gemini service (requires actual API key)"""
@pytest.mark.skip(reason="Requires actual Gemini API key and video file")
@pytest.mark.asyncio
async def test_real_gemini_extraction(self):
"""Test real Gemini extraction (requires setup)"""
# This test should be enabled when running with real credentials
service = GeminiService()
# Would require a real test video file
# result = await service.extract_accessibility("/path/to/test/video.mp4")
# assert "captions_vtt" in result
# assert "audio_description_vtt" in result
pass
@pytest.mark.skip(reason="Requires actual Gemini API key")
@pytest.mark.asyncio
async def test_real_transcreation(self):
"""Test real transcreation (requires setup)"""
# This test should be enabled when running with real credentials
service = GeminiService()
# Would test actual transcreation
pass