video-accessibility/backend/tests/unit/test_tts.py
2025-08-24 16:28:33 -05:00

266 lines
No EOL
11 KiB
Python

from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from app.services.tts import TTSService
class TestTTSService:
"""Test Text-to-Speech service functionality"""
@pytest.fixture
def tts_service_google(self):
"""Create TTS service with mocked Google TTS client"""
with patch('app.services.tts.settings') as mock_settings:
mock_settings.google_tts_credentials = "/path/to/credentials.json"
mock_settings.elevenlabs_api_key = ""
mock_settings.google_tts_voices = {
"en-US": "en-US-Neural2-D",
"es-ES": "es-ES-Neural2-A"
}
with patch('app.services.tts.texttospeech.TextToSpeechClient') as mock_client:
service = TTSService()
service.google_client = MagicMock()
return service
@pytest.fixture
def tts_service_elevenlabs(self):
"""Create TTS service with mocked ElevenLabs"""
with patch('app.services.tts.settings') as mock_settings:
mock_settings.google_tts_credentials = ""
mock_settings.elevenlabs_api_key = "test_api_key"
mock_settings.elevenlabs_voices = {
"en-US": "21m00Tcm4TlvDq8ikWAM",
"es-ES": "VR6AewLTigWG4xSOukaG"
}
service = TTSService()
service.google_client = None
return service
@pytest.fixture
def sample_ad_vtt(self):
"""Sample audio description VTT for testing"""
return """WEBVTT
00:00:01.000 --> 00:00:03.000
[Upbeat music plays]
00:00:05.000 --> 00:00:07.000
[Person walks into frame]
00:00:10.000 --> 00:00:12.000
[Graph appears on screen]
"""
@pytest.mark.asyncio
async def test_synthesize_with_google_success(self, tts_service_google, sample_ad_vtt):
"""Test successful TTS synthesis with Google"""
# Mock Google TTS response
mock_response = MagicMock()
mock_response.audio_content = b"fake_audio_data"
tts_service_google.google_client.synthesize_speech.return_value = mock_response
# Mock AudioSegment operations
with patch('app.services.tts.AudioSegment') as mock_audio_segment:
mock_segment = MagicMock()
mock_audio_segment.from_file.return_value = mock_segment
mock_audio_segment.silent.return_value = mock_segment
mock_audio_segment.empty.return_value = mock_segment
# Mock the sum operation for combining segments
mock_combined = MagicMock()
mock_combined.export.return_value = None
with patch('builtins.sum', return_value=mock_combined):
# Mock BytesIO for output
with patch('app.services.tts.io.BytesIO') as mock_bytesio:
mock_buffer = MagicMock()
mock_buffer.getvalue.return_value = b"final_audio_data"
mock_bytesio.return_value = mock_buffer
result = await tts_service_google._synthesize_with_google(
sample_ad_vtt,
"en-US"
)
assert result == b"final_audio_data"
@pytest.mark.asyncio
async def test_synthesize_with_elevenlabs_success(self, tts_service_elevenlabs, sample_ad_vtt):
"""Test successful TTS synthesis with ElevenLabs"""
# Mock ElevenLabs API response
mock_audio_data = b"elevenlabs_audio_data"
with patch.object(tts_service_elevenlabs, '_synthesize_text_elevenlabs') as mock_elevenlabs:
mock_elevenlabs.return_value = mock_audio_data
# Mock AudioSegment operations
with patch('app.services.tts.AudioSegment') as mock_audio_segment:
mock_segment = MagicMock()
mock_audio_segment.from_file.return_value = mock_segment
mock_audio_segment.silent.return_value = mock_segment
mock_audio_segment.empty.return_value = mock_segment
mock_combined = MagicMock()
mock_combined.export.return_value = None
with patch('builtins.sum', return_value=mock_combined):
with patch('app.services.tts.io.BytesIO') as mock_bytesio:
mock_buffer = MagicMock()
mock_buffer.getvalue.return_value = b"final_elevenlabs_audio"
mock_bytesio.return_value = mock_buffer
result = await tts_service_elevenlabs._synthesize_with_elevenlabs(
sample_ad_vtt,
"en-US"
)
assert result == b"final_elevenlabs_audio"
@pytest.mark.asyncio
async def test_synthesize_no_service_configured(self):
"""Test error when no TTS service is configured"""
with patch('app.services.tts.settings') as mock_settings:
mock_settings.google_tts_credentials = ""
mock_settings.elevenlabs_api_key = ""
service = TTSService()
service.google_client = None
service.elevenlabs_available = False
with pytest.raises(ValueError, match="No TTS service configured"):
await service.synthesize_audio_description("WEBVTT\n", "en-US")
@pytest.mark.asyncio
async def test_synthesize_fallback_to_elevenlabs(self, tts_service_elevenlabs):
"""Test fallback from Google to ElevenLabs on failure"""
# Set up service with both providers available
tts_service_elevenlabs.google_client = MagicMock()
tts_service_elevenlabs.elevenlabs_available = True
with patch.object(tts_service_elevenlabs, '_synthesize_with_google') as mock_google:
mock_google.side_effect = Exception("Google TTS failed")
with patch.object(tts_service_elevenlabs, '_synthesize_with_elevenlabs') as mock_elevenlabs:
mock_elevenlabs.return_value = b"elevenlabs_fallback_audio"
result = await tts_service_elevenlabs.synthesize_audio_description(
"WEBVTT\n\n00:00:01.000 --> 00:00:02.000\n[Test]",
"en-US"
)
assert result == b"elevenlabs_fallback_audio"
mock_google.assert_called_once()
mock_elevenlabs.assert_called_once()
def test_parse_ad_cues(self, tts_service_google, sample_ad_vtt):
"""Test parsing audio description cues"""
cues = tts_service_google._parse_ad_cues(sample_ad_vtt)
assert len(cues) == 3
assert cues[0]["start_time"] == 1.0
assert cues[0]["end_time"] == 3.0
assert cues[0]["text"] == "[Upbeat music plays]"
assert cues[1]["start_time"] == 5.0
assert cues[1]["end_time"] == 7.0
assert cues[1]["text"] == "[Person walks into frame]"
def test_parse_ad_cues_empty_vtt(self, tts_service_google):
"""Test parsing empty VTT content"""
empty_vtt = "WEBVTT\n"
cues = tts_service_google._parse_ad_cues(empty_vtt)
assert len(cues) == 0
def test_parse_timestamp(self, tts_service_google):
"""Test timestamp parsing"""
# Test HH:MM:SS.mmm format
assert tts_service_google._parse_timestamp("01:23:45.678") == 5025.678
# Test MM:SS.mmm format
assert tts_service_google._parse_timestamp("23:45.678") == 1425.678
# Test without milliseconds
assert tts_service_google._parse_timestamp("01:23:45") == 5025.0
def test_parse_timestamp_invalid_format(self, tts_service_google):
"""Test error handling for invalid timestamp"""
with pytest.raises(ValueError, match="Invalid timestamp format"):
tts_service_google._parse_timestamp("invalid:timestamp")
@pytest.mark.asyncio
async def test_synthesize_text_elevenlabs_success(self, tts_service_elevenlabs):
"""Test ElevenLabs text synthesis"""
mock_audio_data = b"elevenlabs_audio_response"
# Mock aiohttp session
mock_response = AsyncMock()
mock_response.status = 200
mock_response.read.return_value = mock_audio_data
mock_session = AsyncMock()
mock_session.post.return_value.__aenter__.return_value = mock_response
with patch('app.services.tts.aiohttp.ClientSession', return_value=mock_session):
result = await tts_service_elevenlabs._synthesize_text_elevenlabs(
"Test text",
"21m00Tcm4TlvDq8ikWAM"
)
assert result == mock_audio_data
@pytest.mark.asyncio
async def test_synthesize_text_elevenlabs_error(self, tts_service_elevenlabs):
"""Test ElevenLabs API error handling"""
# Mock error response
mock_response = AsyncMock()
mock_response.status = 400
mock_response.text.return_value = "Bad request error"
mock_session = AsyncMock()
mock_session.post.return_value.__aenter__.return_value = mock_response
with patch('app.services.tts.aiohttp.ClientSession', return_value=mock_session):
with pytest.raises(ValueError, match="ElevenLabs TTS failed: 400"):
await tts_service_elevenlabs._synthesize_text_elevenlabs(
"Test text",
"voice_id"
)
def test_get_elevenlabs_voice_default(self, tts_service_elevenlabs):
"""Test getting default ElevenLabs voice for language"""
with patch('app.services.tts.settings') as mock_settings:
mock_settings.elevenlabs_voices = {
"en-US": "default_voice_id",
"es-ES": "spanish_voice_id"
}
voice_id = tts_service_elevenlabs._get_elevenlabs_voice("en-US")
assert voice_id == "default_voice_id"
def test_get_elevenlabs_voice_custom(self, tts_service_elevenlabs):
"""Test getting custom ElevenLabs voice"""
voice_id = tts_service_elevenlabs._get_elevenlabs_voice("en-US", "custom_voice")
assert voice_id == "custom_voice"
def test_get_elevenlabs_voice_fallback(self, tts_service_elevenlabs):
"""Test ElevenLabs voice fallback for unknown language"""
with patch('app.services.tts.settings') as mock_settings:
mock_settings.elevenlabs_voices = {}
voice_id = tts_service_elevenlabs._get_elevenlabs_voice("unknown-LANG")
assert voice_id == "21m00Tcm4TlvDq8ikWAM" # Default fallback
def test_service_initialization_no_credentials(self):
"""Test service initialization with no credentials"""
with patch('app.services.tts.settings') as mock_settings:
mock_settings.google_tts_credentials = ""
mock_settings.elevenlabs_api_key = ""
service = TTSService()
assert service.google_client is None
assert service.elevenlabs_available is False