semblance/backend/tests/test_llm_service.py
Vadym Samoilenko 3e9ccafad2 Add LLM usage tracking infrastructure (Phases A-C)
- Model renames: gpt-5.2 → gpt-5.4-2026-03-05, gemini-3-pro-preview → gemini-3.1-pro-preview; retire gpt-4.1 via alias fallback
- New: llm_usage_context.py (ContextVar-based attribution), model_pricing.py (tiered pricing + 60s cache), usage_event.py (append-only telemetry), quota.py (user/FG quota enforcement with 80% warning)
- Wire _record_usage into all 3 LLM methods; set_llm_context at every service entry point
- Fix admin_required decorator (was sync, never awaited User.find_by_id); add active_required and with_user_context decorators
- Inject user_id into ContextVar from JWT on every authenticated request
- Add DB indexes for usage_events, model_pricing, users collections
- Seed script for model pricing (gpt-5.4 single-tier, gemini-3.1 two-tier 200k threshold)
- Fix parse_json_response NameError (logger undefined at module level)
- 70 passing tests: conftest.py with sys.modules stubs, test_usage_infrastructure.py (52 tests), rewrite stale test_llm_service.py (18 tests)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-24 18:08:27 +01:00

128 lines
5.1 KiB
Python
Executable file

"""
Tests for LLMService — covers parse_json_response (sync) and generate_structured_array.
generate_content / generate_multimodal_content are async and call real provider APIs,
so they are covered via integration tests; only pure logic is unit-tested here.
"""
import sys
import pytest
from unittest.mock import MagicMock, AsyncMock, patch
from app.services.llm_service import LLMService, LLMServiceError
class TestParseJsonResponse:
def test_clean_json(self):
result = LLMService.parse_json_response('{"key": "value", "number": 42}')
assert result == {"key": "value", "number": 42}
def test_json_in_markdown_fenced_block(self):
md = '```json\n{"key": "value", "number": 42}\n```'
assert LLMService.parse_json_response(md) == {"key": "value", "number": 42}
def test_json_in_generic_fenced_block(self):
md = '```\n{"key": "value", "number": 42}\n```'
assert LLMService.parse_json_response(md) == {"key": "value", "number": 42}
def test_invalid_json_raises(self):
with pytest.raises(LLMServiceError) as exc_info:
LLMService.parse_json_response("This is not JSON")
assert "Failed to parse JSON response" in str(exc_info.value)
def test_empty_string_raises(self):
with pytest.raises(LLMServiceError):
LLMService.parse_json_response("")
def test_json_array(self):
result = LLMService.parse_json_response('[{"a": 1}, {"b": 2}]')
assert result == [{"a": 1}, {"b": 2}]
def test_nested_json(self):
result = LLMService.parse_json_response('{"outer": {"inner": [1, 2, 3]}}')
assert result == {"outer": {"inner": [1, 2, 3]}}
class TestResolveModelAndProvider:
"""_resolve_model is a pure function. Provider is looked up via SUPPORTED_MODELS."""
def test_none_resolves_to_default(self):
assert LLMService._resolve_model(None) == "gemini-3.1-pro-preview"
def test_all_aliases_resolve(self):
assert LLMService._resolve_model("gpt-5") == "gpt-5.4-2026-03-05"
assert LLMService._resolve_model("gpt-5.2") == "gpt-5.4-2026-03-05"
assert LLMService._resolve_model("gemini-3-pro-preview") == "gemini-3.1-pro-preview"
assert LLMService._resolve_model("gpt-4.1") == "gemini-3.1-pro-preview"
def test_known_models_unchanged(self):
assert LLMService._resolve_model("gemini-3.1-pro-preview") == "gemini-3.1-pro-preview"
assert LLMService._resolve_model("gpt-5.4-2026-03-05") == "gpt-5.4-2026-03-05"
def test_provider_for_gemini_model(self):
from app.services.llm_service import SUPPORTED_MODELS
assert SUPPORTED_MODELS.get("gemini-3.1-pro-preview") == "gemini"
def test_provider_for_openai_model(self):
from app.services.llm_service import SUPPORTED_MODELS
assert SUPPORTED_MODELS.get("gpt-5.4-2026-03-05") == "openai"
def test_unknown_model_not_in_supported(self):
from app.services.llm_service import SUPPORTED_MODELS
assert "gpt-4.1" not in SUPPORTED_MODELS # retired
class TestExtractUsageMetadata:
"""Static — no external calls."""
def test_gemini_extracts_all_fields(self):
response = MagicMock()
um = MagicMock()
um.prompt_token_count = 500
um.candidates_token_count = 100
um.cached_content_token_count = 20
response.usage_metadata = um
result = LLMService._extract_usage_metadata(response, "gemini")
assert result == {"prompt": 500, "completion": 100, "cached": 20, "reasoning": 0}
def test_openai_responses_api(self):
response = MagicMock()
usage = MagicMock()
usage.input_tokens = 1000
usage.output_tokens = 200
usage.input_tokens_details = MagicMock(cached_tokens=50)
usage.output_tokens_details = MagicMock(reasoning_tokens=80)
response.usage = usage
result = LLMService._extract_usage_metadata(response, "openai")
assert result == {"prompt": 1000, "completion": 200, "cached": 50, "reasoning": 80}
def test_openai_chat_completions(self):
response = MagicMock()
usage = MagicMock(spec=['prompt_tokens', 'completion_tokens', 'prompt_tokens_details'])
usage.prompt_tokens = 400
usage.completion_tokens = 100
usage.prompt_tokens_details = MagicMock(cached_tokens=10)
response.usage = usage
result = LLMService._extract_usage_metadata(response, "openai")
assert result == {"prompt": 400, "completion": 100, "cached": 10, "reasoning": 0}
def test_missing_usage_returns_zeros(self):
response = MagicMock()
response.usage = None
assert LLMService._extract_usage_metadata(response, "openai") == {
"prompt": 0, "completion": 0, "cached": 0, "reasoning": 0
}
def test_none_values_coerced_to_zero(self):
"""Fields returning None should become 0, not None."""
response = MagicMock()
um = MagicMock()
um.prompt_token_count = None
um.candidates_token_count = None
um.cached_content_token_count = None
response.usage_metadata = um
result = LLMService._extract_usage_metadata(response, "gemini")
assert all(v == 0 for v in result.values())