279 lines
9.4 KiB
Python
279 lines
9.4 KiB
Python
"""
|
|
Tests for OpenAI Responses API Service
|
|
|
|
Tests OpenAI service integration, response parsing, and RAG validation
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import Mock, AsyncMock, patch
|
|
from app.services.openai_service import OpenAIService
|
|
|
|
|
|
@pytest.fixture
|
|
def openai_service():
|
|
"""Create OpenAI service instance for testing"""
|
|
return OpenAIService(api_key="sk-test-key")
|
|
|
|
|
|
def test_openai_service_initialization(openai_service):
|
|
"""Test OpenAI service initialization"""
|
|
assert openai_service.api_key == "sk-test-key"
|
|
assert openai_service.model == "gpt-5-nano-2025-08-07"
|
|
assert openai_service.vector_store_id == "vs_QkOKiQCqzCHS4iFT5lP9qUxc"
|
|
|
|
|
|
def test_get_system_instructions(openai_service):
|
|
"""Test system instructions contain RAG enforcement"""
|
|
instructions = openai_service.get_system_instructions()
|
|
|
|
assert "The APAC OpsBot" in instructions
|
|
assert "STRICTLY RAG-ONLY" in instructions
|
|
assert "file_search results" in instructions
|
|
assert "don't have information" in instructions
|
|
|
|
|
|
def test_check_citations_with_valid_citation(openai_service):
|
|
"""Test citation detection with valid citations"""
|
|
message = "According to the Employee Handbook, leave policy states..."
|
|
search_results = [{"filename": "handbook.pdf"}]
|
|
|
|
has_citations = openai_service._check_citations(message, search_results)
|
|
assert has_citations is True
|
|
|
|
|
|
def test_check_citations_with_no_info_response(openai_service):
|
|
"""Test citation detection with 'no info' response"""
|
|
message = "I don't have information about this topic in my knowledge base."
|
|
search_results = []
|
|
|
|
has_citations = openai_service._check_citations(message, search_results)
|
|
assert has_citations is True # "No info" responses are acceptable
|
|
|
|
|
|
def test_check_citations_without_citation(openai_service):
|
|
"""Test citation detection without citations (potential hallucination)"""
|
|
message = "The leave policy allows 20 days of vacation per year."
|
|
search_results = [{"filename": "handbook.pdf"}]
|
|
|
|
has_citations = openai_service._check_citations(message, search_results)
|
|
assert has_citations is False # Missing citation keywords
|
|
|
|
|
|
def test_check_citations_empty_message(openai_service):
|
|
"""Test citation detection with empty message"""
|
|
has_citations = openai_service._check_citations(None, [])
|
|
assert has_citations is False
|
|
|
|
|
|
def test_format_search_results(openai_service):
|
|
"""Test formatting of file search results"""
|
|
# Mock search results
|
|
mock_result = Mock()
|
|
mock_result.file_id = "file-123"
|
|
mock_result.filename = "employee_handbook.pdf"
|
|
mock_result.content = "This is a long content snippet that should be truncated..." * 10
|
|
mock_result.score = 0.95
|
|
|
|
formatted = openai_service._format_search_results([mock_result])
|
|
|
|
assert len(formatted) == 1
|
|
assert formatted[0]["file_id"] == "file-123"
|
|
assert formatted[0]["filename"] == "employee_handbook.pdf"
|
|
assert len(formatted[0]["content_snippet"]) <= 200 # Should be truncated
|
|
assert formatted[0]["score"] == 0.95
|
|
|
|
|
|
def test_validate_rag_usage_with_citations(openai_service, caplog):
|
|
"""Test RAG validation with proper citations"""
|
|
parsed_response = {
|
|
"response_id": "resp-123",
|
|
"content": "According to the handbook, the policy states...",
|
|
"has_citations": True,
|
|
"file_search_results": [{"filename": "handbook.pdf"}]
|
|
}
|
|
|
|
openai_service._validate_rag_usage(parsed_response)
|
|
|
|
# Should not add warning or disclaimer
|
|
assert "POTENTIAL HALLUCINATION" not in caplog.text
|
|
assert "needs_review" not in parsed_response
|
|
|
|
|
|
def test_validate_rag_usage_without_citations(openai_service, caplog):
|
|
"""Test RAG validation without citations (hallucination warning)"""
|
|
parsed_response = {
|
|
"response_id": "resp-456",
|
|
"content": "The policy allows 20 days vacation.",
|
|
"has_citations": False,
|
|
"file_search_results": [{"filename": "handbook.pdf"}]
|
|
}
|
|
|
|
openai_service._validate_rag_usage(parsed_response)
|
|
|
|
# Should log warning and add disclaimer
|
|
assert "POTENTIAL HALLUCINATION" in caplog.text
|
|
assert "⚠️ Note:" in parsed_response["content"]
|
|
assert parsed_response["needs_review"] is True
|
|
|
|
|
|
def test_validate_rag_usage_no_info_response(openai_service, caplog):
|
|
"""Test RAG validation with valid 'no info' response"""
|
|
parsed_response = {
|
|
"response_id": "resp-789",
|
|
"content": "I don't have information about this in my knowledge base.",
|
|
"has_citations": True,
|
|
"file_search_results": []
|
|
}
|
|
|
|
openai_service._validate_rag_usage(parsed_response)
|
|
|
|
# Should not add warning (valid no-info response)
|
|
assert "POTENTIAL HALLUCINATION" not in caplog.text
|
|
assert "needs_review" not in parsed_response
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('app.services.openai_service.AsyncOpenAI')
|
|
async def test_generate_response_success(mock_async_openai, openai_service):
|
|
"""Test successful response generation"""
|
|
# Mock API response
|
|
mock_response = Mock()
|
|
mock_response.id = "resp-test-123"
|
|
mock_response.status = "completed"
|
|
|
|
mock_usage = Mock()
|
|
mock_usage.input_tokens = 100
|
|
mock_usage.output_tokens = 50
|
|
mock_usage.total_tokens = 150
|
|
mock_response.usage = mock_usage
|
|
|
|
# Mock output
|
|
mock_content_block = Mock()
|
|
mock_content_block.type = "output_text"
|
|
mock_content_block.text = "According to the handbook, leave policy allows 20 days."
|
|
|
|
mock_output_item = Mock()
|
|
mock_output_item.type = "message"
|
|
mock_output_item.role = "assistant"
|
|
mock_output_item.content = [mock_content_block]
|
|
|
|
mock_response.output = [mock_output_item]
|
|
|
|
# Mock file search results
|
|
mock_search_result = Mock()
|
|
mock_search_result.file_id = "file-123"
|
|
mock_search_result.filename = "handbook.pdf"
|
|
mock_search_result.content = "Leave policy details..."
|
|
mock_search_result.score = 0.9
|
|
|
|
mock_search_output = Mock()
|
|
mock_search_output.type = "file_search_call"
|
|
mock_search_output.results = [mock_search_result]
|
|
mock_response.output.append(mock_search_output)
|
|
|
|
# Setup mock client
|
|
mock_client = AsyncMock()
|
|
mock_client.responses.create = AsyncMock(return_value=mock_response)
|
|
openai_service.async_client = mock_client
|
|
|
|
# Call service
|
|
result = await openai_service.generate_response("What is the leave policy?")
|
|
|
|
# Verify
|
|
assert result["response_id"] == "resp-test-123"
|
|
assert "According to the handbook" in result["content"]
|
|
assert result["usage"]["total_tokens"] == 150
|
|
assert result["has_citations"] is True
|
|
assert len(result["file_search_results"]) == 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('app.services.openai_service.AsyncOpenAI')
|
|
async def test_generate_response_with_previous_response_id(mock_async_openai, openai_service):
|
|
"""Test response generation with previous_response_id for multi-turn"""
|
|
mock_response = Mock()
|
|
mock_response.id = "resp-followup-456"
|
|
mock_response.status = "completed"
|
|
|
|
mock_usage = Mock()
|
|
mock_usage.input_tokens = 80
|
|
mock_usage.output_tokens = 40
|
|
mock_usage.total_tokens = 120
|
|
mock_response.usage = mock_usage
|
|
|
|
mock_content_block = Mock()
|
|
mock_content_block.type = "output_text"
|
|
mock_content_block.text = "As mentioned before, the policy allows..."
|
|
|
|
mock_output_item = Mock()
|
|
mock_output_item.type = "message"
|
|
mock_output_item.role = "assistant"
|
|
mock_output_item.content = [mock_content_block]
|
|
|
|
mock_response.output = [mock_output_item]
|
|
|
|
mock_client = AsyncMock()
|
|
mock_client.responses.create = AsyncMock(return_value=mock_response)
|
|
openai_service.async_client = mock_client
|
|
|
|
# Call with previous_response_id
|
|
result = await openai_service.generate_response(
|
|
"Can you clarify that?",
|
|
previous_response_id="resp-previous-123"
|
|
)
|
|
|
|
# Verify previous_response_id was passed
|
|
call_kwargs = mock_client.responses.create.call_args.kwargs
|
|
assert call_kwargs["previous_response_id"] == "resp-previous-123"
|
|
assert result["response_id"] == "resp-followup-456"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('app.services.openai_service.AsyncOpenAI')
|
|
async def test_generate_response_api_error(mock_async_openai, openai_service):
|
|
"""Test error handling when API call fails"""
|
|
mock_client = AsyncMock()
|
|
mock_client.responses.create = AsyncMock(
|
|
side_effect=Exception("API connection failed")
|
|
)
|
|
openai_service.async_client = mock_client
|
|
|
|
with pytest.raises(Exception) as exc_info:
|
|
await openai_service.generate_response("Test message")
|
|
|
|
assert "API connection failed" in str(exc_info.value)
|
|
|
|
|
|
@pytest.mark.skip(reason="Requires actual OpenAI API key")
|
|
@pytest.mark.asyncio
|
|
async def test_generate_response_integration():
|
|
"""
|
|
Integration test with real OpenAI API
|
|
|
|
Note: This test is skipped by default. To run it:
|
|
1. Set OPENAI_API_KEY environment variable
|
|
2. Run: pytest -k test_generate_response_integration -v
|
|
"""
|
|
service = OpenAIService()
|
|
|
|
result = await service.generate_response(
|
|
"What are the office hours for Oliver APAC?",
|
|
previous_response_id=None
|
|
)
|
|
|
|
assert result["response_id"] is not None
|
|
assert len(result["content"]) > 0
|
|
assert result["usage"]["total_tokens"] > 0
|
|
|
|
|
|
@pytest.mark.skip(reason="Requires actual OpenAI API key")
|
|
@pytest.mark.asyncio
|
|
async def test_test_connection_integration():
|
|
"""
|
|
Integration test for connection testing
|
|
|
|
Note: Skipped by default, requires valid API key
|
|
"""
|
|
service = OpenAIService()
|
|
is_connected = await service.test_connection()
|
|
assert is_connected is True
|