266 lines
No EOL
11 KiB
Python
266 lines
No EOL
11 KiB
Python
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from app.services.tts import TTSService
|
|
|
|
|
|
class TestTTSService:
|
|
"""Test Text-to-Speech service functionality"""
|
|
|
|
@pytest.fixture
|
|
def tts_service_google(self):
|
|
"""Create TTS service with mocked Google TTS client"""
|
|
with patch('app.services.tts.settings') as mock_settings:
|
|
mock_settings.google_tts_credentials = "/path/to/credentials.json"
|
|
mock_settings.elevenlabs_api_key = ""
|
|
mock_settings.google_tts_voices = {
|
|
"en-US": "en-US-Neural2-D",
|
|
"es-ES": "es-ES-Neural2-A"
|
|
}
|
|
|
|
with patch('app.services.tts.texttospeech.TextToSpeechClient') as mock_client:
|
|
service = TTSService()
|
|
service.google_client = MagicMock()
|
|
return service
|
|
|
|
@pytest.fixture
|
|
def tts_service_elevenlabs(self):
|
|
"""Create TTS service with mocked ElevenLabs"""
|
|
with patch('app.services.tts.settings') as mock_settings:
|
|
mock_settings.google_tts_credentials = ""
|
|
mock_settings.elevenlabs_api_key = "test_api_key"
|
|
mock_settings.elevenlabs_voices = {
|
|
"en-US": "21m00Tcm4TlvDq8ikWAM",
|
|
"es-ES": "VR6AewLTigWG4xSOukaG"
|
|
}
|
|
|
|
service = TTSService()
|
|
service.google_client = None
|
|
return service
|
|
|
|
@pytest.fixture
|
|
def sample_ad_vtt(self):
|
|
"""Sample audio description VTT for testing"""
|
|
return """WEBVTT
|
|
|
|
00:00:01.000 --> 00:00:03.000
|
|
[Upbeat music plays]
|
|
|
|
00:00:05.000 --> 00:00:07.000
|
|
[Person walks into frame]
|
|
|
|
00:00:10.000 --> 00:00:12.000
|
|
[Graph appears on screen]
|
|
"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_synthesize_with_google_success(self, tts_service_google, sample_ad_vtt):
|
|
"""Test successful TTS synthesis with Google"""
|
|
# Mock Google TTS response
|
|
mock_response = MagicMock()
|
|
mock_response.audio_content = b"fake_audio_data"
|
|
tts_service_google.google_client.synthesize_speech.return_value = mock_response
|
|
|
|
# Mock AudioSegment operations
|
|
with patch('app.services.tts.AudioSegment') as mock_audio_segment:
|
|
mock_segment = MagicMock()
|
|
mock_audio_segment.from_file.return_value = mock_segment
|
|
mock_audio_segment.silent.return_value = mock_segment
|
|
mock_audio_segment.empty.return_value = mock_segment
|
|
|
|
# Mock the sum operation for combining segments
|
|
mock_combined = MagicMock()
|
|
mock_combined.export.return_value = None
|
|
|
|
with patch('builtins.sum', return_value=mock_combined):
|
|
# Mock BytesIO for output
|
|
with patch('app.services.tts.io.BytesIO') as mock_bytesio:
|
|
mock_buffer = MagicMock()
|
|
mock_buffer.getvalue.return_value = b"final_audio_data"
|
|
mock_bytesio.return_value = mock_buffer
|
|
|
|
result = await tts_service_google._synthesize_with_google(
|
|
sample_ad_vtt,
|
|
"en-US"
|
|
)
|
|
|
|
assert result == b"final_audio_data"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_synthesize_with_elevenlabs_success(self, tts_service_elevenlabs, sample_ad_vtt):
|
|
"""Test successful TTS synthesis with ElevenLabs"""
|
|
# Mock ElevenLabs API response
|
|
mock_audio_data = b"elevenlabs_audio_data"
|
|
|
|
with patch.object(tts_service_elevenlabs, '_synthesize_text_elevenlabs') as mock_elevenlabs:
|
|
mock_elevenlabs.return_value = mock_audio_data
|
|
|
|
# Mock AudioSegment operations
|
|
with patch('app.services.tts.AudioSegment') as mock_audio_segment:
|
|
mock_segment = MagicMock()
|
|
mock_audio_segment.from_file.return_value = mock_segment
|
|
mock_audio_segment.silent.return_value = mock_segment
|
|
mock_audio_segment.empty.return_value = mock_segment
|
|
|
|
mock_combined = MagicMock()
|
|
mock_combined.export.return_value = None
|
|
|
|
with patch('builtins.sum', return_value=mock_combined):
|
|
with patch('app.services.tts.io.BytesIO') as mock_bytesio:
|
|
mock_buffer = MagicMock()
|
|
mock_buffer.getvalue.return_value = b"final_elevenlabs_audio"
|
|
mock_bytesio.return_value = mock_buffer
|
|
|
|
result = await tts_service_elevenlabs._synthesize_with_elevenlabs(
|
|
sample_ad_vtt,
|
|
"en-US"
|
|
)
|
|
|
|
assert result == b"final_elevenlabs_audio"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_synthesize_no_service_configured(self):
|
|
"""Test error when no TTS service is configured"""
|
|
with patch('app.services.tts.settings') as mock_settings:
|
|
mock_settings.google_tts_credentials = ""
|
|
mock_settings.elevenlabs_api_key = ""
|
|
|
|
service = TTSService()
|
|
service.google_client = None
|
|
service.elevenlabs_available = False
|
|
|
|
with pytest.raises(ValueError, match="No TTS service configured"):
|
|
await service.synthesize_audio_description("WEBVTT\n", "en-US")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_synthesize_fallback_to_elevenlabs(self, tts_service_elevenlabs):
|
|
"""Test fallback from Google to ElevenLabs on failure"""
|
|
# Set up service with both providers available
|
|
tts_service_elevenlabs.google_client = MagicMock()
|
|
tts_service_elevenlabs.elevenlabs_available = True
|
|
|
|
with patch.object(tts_service_elevenlabs, '_synthesize_with_google') as mock_google:
|
|
mock_google.side_effect = Exception("Google TTS failed")
|
|
|
|
with patch.object(tts_service_elevenlabs, '_synthesize_with_elevenlabs') as mock_elevenlabs:
|
|
mock_elevenlabs.return_value = b"elevenlabs_fallback_audio"
|
|
|
|
result = await tts_service_elevenlabs.synthesize_audio_description(
|
|
"WEBVTT\n\n00:00:01.000 --> 00:00:02.000\n[Test]",
|
|
"en-US"
|
|
)
|
|
|
|
assert result == b"elevenlabs_fallback_audio"
|
|
mock_google.assert_called_once()
|
|
mock_elevenlabs.assert_called_once()
|
|
|
|
def test_parse_ad_cues(self, tts_service_google, sample_ad_vtt):
|
|
"""Test parsing audio description cues"""
|
|
cues = tts_service_google._parse_ad_cues(sample_ad_vtt)
|
|
|
|
assert len(cues) == 3
|
|
assert cues[0]["start_time"] == 1.0
|
|
assert cues[0]["end_time"] == 3.0
|
|
assert cues[0]["text"] == "[Upbeat music plays]"
|
|
assert cues[1]["start_time"] == 5.0
|
|
assert cues[1]["end_time"] == 7.0
|
|
assert cues[1]["text"] == "[Person walks into frame]"
|
|
|
|
def test_parse_ad_cues_empty_vtt(self, tts_service_google):
|
|
"""Test parsing empty VTT content"""
|
|
empty_vtt = "WEBVTT\n"
|
|
|
|
cues = tts_service_google._parse_ad_cues(empty_vtt)
|
|
|
|
assert len(cues) == 0
|
|
|
|
def test_parse_timestamp(self, tts_service_google):
|
|
"""Test timestamp parsing"""
|
|
# Test HH:MM:SS.mmm format
|
|
assert tts_service_google._parse_timestamp("01:23:45.678") == 5025.678
|
|
|
|
# Test MM:SS.mmm format
|
|
assert tts_service_google._parse_timestamp("23:45.678") == 1425.678
|
|
|
|
# Test without milliseconds
|
|
assert tts_service_google._parse_timestamp("01:23:45") == 5025.0
|
|
|
|
def test_parse_timestamp_invalid_format(self, tts_service_google):
|
|
"""Test error handling for invalid timestamp"""
|
|
with pytest.raises(ValueError, match="Invalid timestamp format"):
|
|
tts_service_google._parse_timestamp("invalid:timestamp")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_synthesize_text_elevenlabs_success(self, tts_service_elevenlabs):
|
|
"""Test ElevenLabs text synthesis"""
|
|
mock_audio_data = b"elevenlabs_audio_response"
|
|
|
|
# Mock aiohttp session
|
|
mock_response = AsyncMock()
|
|
mock_response.status = 200
|
|
mock_response.read.return_value = mock_audio_data
|
|
|
|
mock_session = AsyncMock()
|
|
mock_session.post.return_value.__aenter__.return_value = mock_response
|
|
|
|
with patch('app.services.tts.aiohttp.ClientSession', return_value=mock_session):
|
|
result = await tts_service_elevenlabs._synthesize_text_elevenlabs(
|
|
"Test text",
|
|
"21m00Tcm4TlvDq8ikWAM"
|
|
)
|
|
|
|
assert result == mock_audio_data
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_synthesize_text_elevenlabs_error(self, tts_service_elevenlabs):
|
|
"""Test ElevenLabs API error handling"""
|
|
# Mock error response
|
|
mock_response = AsyncMock()
|
|
mock_response.status = 400
|
|
mock_response.text.return_value = "Bad request error"
|
|
|
|
mock_session = AsyncMock()
|
|
mock_session.post.return_value.__aenter__.return_value = mock_response
|
|
|
|
with patch('app.services.tts.aiohttp.ClientSession', return_value=mock_session):
|
|
with pytest.raises(ValueError, match="ElevenLabs TTS failed: 400"):
|
|
await tts_service_elevenlabs._synthesize_text_elevenlabs(
|
|
"Test text",
|
|
"voice_id"
|
|
)
|
|
|
|
def test_get_elevenlabs_voice_default(self, tts_service_elevenlabs):
|
|
"""Test getting default ElevenLabs voice for language"""
|
|
with patch('app.services.tts.settings') as mock_settings:
|
|
mock_settings.elevenlabs_voices = {
|
|
"en-US": "default_voice_id",
|
|
"es-ES": "spanish_voice_id"
|
|
}
|
|
|
|
voice_id = tts_service_elevenlabs._get_elevenlabs_voice("en-US")
|
|
assert voice_id == "default_voice_id"
|
|
|
|
def test_get_elevenlabs_voice_custom(self, tts_service_elevenlabs):
|
|
"""Test getting custom ElevenLabs voice"""
|
|
voice_id = tts_service_elevenlabs._get_elevenlabs_voice("en-US", "custom_voice")
|
|
assert voice_id == "custom_voice"
|
|
|
|
def test_get_elevenlabs_voice_fallback(self, tts_service_elevenlabs):
|
|
"""Test ElevenLabs voice fallback for unknown language"""
|
|
with patch('app.services.tts.settings') as mock_settings:
|
|
mock_settings.elevenlabs_voices = {}
|
|
|
|
voice_id = tts_service_elevenlabs._get_elevenlabs_voice("unknown-LANG")
|
|
assert voice_id == "21m00Tcm4TlvDq8ikWAM" # Default fallback
|
|
|
|
def test_service_initialization_no_credentials(self):
|
|
"""Test service initialization with no credentials"""
|
|
with patch('app.services.tts.settings') as mock_settings:
|
|
mock_settings.google_tts_credentials = ""
|
|
mock_settings.elevenlabs_api_key = ""
|
|
|
|
service = TTSService()
|
|
|
|
assert service.google_client is None
|
|
assert service.elevenlabs_available is False |