192 lines
No EOL
6.6 KiB
Python
192 lines
No EOL
6.6 KiB
Python
import asyncio
|
|
from typing import List, Dict, Any, Optional
|
|
from datetime import datetime, timedelta
|
|
from motor.motor_asyncio import AsyncIOMotorDatabase
|
|
from bson import ObjectId
|
|
|
|
# Cache import removed - caching disabled for data freshness
|
|
from ..config.settings import settings
|
|
from .llama_processor import llama_processor
|
|
|
|
|
|
class ChatContextService:
|
|
def __init__(self):
|
|
self.max_context_messages = 10 # Maximum number of previous messages to include
|
|
self.context_window_hours = 24 # Context window in hours
|
|
|
|
async def get_conversation_context(
|
|
self,
|
|
user_id: str,
|
|
index_id: str,
|
|
db: AsyncIOMotorDatabase,
|
|
limit: int = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""Get recent conversation context for the user and index"""
|
|
try:
|
|
# Use provided limit or default
|
|
message_limit = limit or self.max_context_messages
|
|
|
|
# Get recent messages within the context window
|
|
cutoff_time = datetime.utcnow() - timedelta(hours=self.context_window_hours)
|
|
|
|
cursor = db.chat_messages.find({
|
|
"user_id": ObjectId(user_id),
|
|
"index_id": index_id,
|
|
"created_at": {"$gte": cutoff_time},
|
|
"deleted_by_user": {"$ne": True}
|
|
}).sort("created_at", -1).limit(message_limit)
|
|
|
|
messages = []
|
|
async for msg in cursor:
|
|
messages.append({
|
|
"query": msg["query"],
|
|
"response": msg["response"],
|
|
"created_at": msg["created_at"]
|
|
})
|
|
|
|
# Return in chronological order (oldest first)
|
|
return list(reversed(messages))
|
|
|
|
except Exception as e:
|
|
print(f"Error getting conversation context: {e}")
|
|
return []
|
|
|
|
def format_context_for_ai(self, context_messages: List[Dict[str, Any]]) -> str:
|
|
"""Format conversation context for AI prompt"""
|
|
if not context_messages:
|
|
return ""
|
|
|
|
formatted_context = []
|
|
for msg in context_messages:
|
|
formatted_context.append(f"User: {msg['query']}")
|
|
formatted_context.append(f"Assistant: {msg['response']}")
|
|
|
|
return "\n".join(formatted_context)
|
|
|
|
async def generate_contextual_response(
|
|
self,
|
|
query: str,
|
|
index_id: str,
|
|
user_id: str,
|
|
db: AsyncIOMotorDatabase,
|
|
context_chunks: List[str]
|
|
) -> Dict[str, Any]:
|
|
"""Generate response with conversation context"""
|
|
try:
|
|
# Get conversation context
|
|
context_messages = await self.get_conversation_context(
|
|
user_id, index_id, db
|
|
)
|
|
|
|
# Format conversation context
|
|
conversation_context = self.format_context_for_ai(context_messages)
|
|
|
|
# Prepare document context
|
|
document_context = "\n\n".join(context_chunks)
|
|
|
|
# Create enhanced prompt with conversation context
|
|
prompt = self._create_contextual_prompt(
|
|
query, document_context, conversation_context
|
|
)
|
|
|
|
# Generate response using OpenAI
|
|
from llama_index.llms.openai import OpenAI
|
|
llm = OpenAI(
|
|
model="gpt-4o",
|
|
api_key=settings.openai_api_key,
|
|
temperature=0.1
|
|
)
|
|
|
|
# Use sync completion for now as acomplete has issues
|
|
response = llm.complete(prompt)
|
|
|
|
return {
|
|
"response": response.text,
|
|
"context_used": conversation_context,
|
|
"context_messages_count": len(context_messages)
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f"Error generating contextual response: {e}")
|
|
# Fallback to basic response without context
|
|
try:
|
|
from llama_index.llms.openai import OpenAI
|
|
llm = OpenAI(
|
|
model="gpt-4o",
|
|
api_key=settings.openai_api_key,
|
|
temperature=0.1
|
|
)
|
|
|
|
# Simple prompt without context
|
|
context_text = "\n\n".join(context_chunks)
|
|
simple_prompt = f"""Based on the following context, answer the user's question. If the answer is not in the context, say "I don't have enough information to answer that question."
|
|
|
|
Return results as pure markdown - no code block.
|
|
|
|
Context:
|
|
{context_text}
|
|
|
|
Question: {query}
|
|
|
|
Answer:"""
|
|
|
|
response = llm.complete(simple_prompt)
|
|
|
|
return {
|
|
"response": response.text,
|
|
"context_used": None,
|
|
"context_messages_count": 0
|
|
}
|
|
except Exception as fallback_error:
|
|
print(f"Fallback response generation failed: {fallback_error}")
|
|
return {
|
|
"response": "I'm sorry, I encountered an error while processing your question. Please try again.",
|
|
"context_used": None,
|
|
"context_messages_count": 0
|
|
}
|
|
|
|
def _create_contextual_prompt(
|
|
self,
|
|
query: str,
|
|
document_context: str,
|
|
conversation_context: str
|
|
) -> str:
|
|
"""Create a contextual prompt for the AI"""
|
|
prompt_parts = []
|
|
|
|
prompt_parts.append(
|
|
"You are an AI assistant helping users understand their documents. "
|
|
"Answer questions based on the provided document context and consider "
|
|
"the conversation history for continuity."
|
|
)
|
|
|
|
if conversation_context:
|
|
prompt_parts.append(f"""
|
|
Previous conversation:
|
|
{conversation_context}
|
|
""")
|
|
|
|
prompt_parts.append(f"""
|
|
Document context:
|
|
{document_context}
|
|
|
|
Current question: {query}
|
|
|
|
Instructions:
|
|
1. Answer based primarily on the document context provided
|
|
2. Consider the conversation history for continuity and context
|
|
3. If the answer is not in the documents, clearly state this
|
|
4. Be concise but comprehensive
|
|
5. Reference specific information from the documents when possible
|
|
6. If referring to previous parts of the conversation, be explicit about it
|
|
7. Return results as pure markdown - no code block
|
|
|
|
Answer:""")
|
|
|
|
return "\n".join(prompt_parts)
|
|
|
|
# cache_context_key method removed - caching disabled for data freshness
|
|
|
|
|
|
# Global chat context service instance
|
|
chat_context_service = ChatContextService() |