contract-query/backend/app/services/chat_context_service.py

import asyncio
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from motor.motor_asyncio import AsyncIOMotorDatabase
from bson import ObjectId

# Cache import removed - caching disabled for data freshness
from ..config.settings import settings
from .llama_processor import llama_processor


class ChatContextService:
    def __init__(self):
        self.max_context_messages = 10  # Maximum number of previous messages to include
        self.context_window_hours = 24  # Context window in hours

    async def get_conversation_context(
        self,
        user_id: str,
        index_id: str,
        db: AsyncIOMotorDatabase,
        limit: int = None
    ) -> List[Dict[str, Any]]:
        """Get recent conversation context for the user and index"""
        try:
            # Use provided limit or default
            message_limit = limit or self.max_context_messages

            # Get recent messages within the context window
            cutoff_time = datetime.utcnow() - timedelta(hours=self.context_window_hours)

            cursor = db.chat_messages.find({
                "user_id": ObjectId(user_id),
                "index_id": index_id,
                "created_at": {"$gte": cutoff_time},
                "deleted_by_user": {"$ne": True}
            }).sort("created_at", -1).limit(message_limit)

            messages = []
            async for msg in cursor:
                messages.append({
                    "query": msg["query"],
                    "response": msg["response"],
                    "created_at": msg["created_at"]
                })

            # Return in chronological order (oldest first)
            return list(reversed(messages))

        except Exception as e:
            print(f"Error getting conversation context: {e}")
            return []

    def format_context_for_ai(self, context_messages: List[Dict[str, Any]]) -> str:
        """Format conversation context for AI prompt"""
        if not context_messages:
            return ""

        formatted_context = []
        for msg in context_messages:
            formatted_context.append(f"User: {msg['query']}")
            formatted_context.append(f"Assistant: {msg['response']}")

        return "\n".join(formatted_context)

    async def generate_contextual_response(
        self,
        query: str,
        index_id: str,
        user_id: str,
        db: AsyncIOMotorDatabase,
        context_chunks: List[str]
    ) -> Dict[str, Any]:
        """Generate response with conversation context"""
        try:
            # Get conversation context
            context_messages = await self.get_conversation_context(
                user_id, index_id, db
            )

            # Format conversation context
            conversation_context = self.format_context_for_ai(context_messages)

            # Prepare document context
            document_context = "\n\n".join(context_chunks)

            # Create enhanced prompt with conversation context
            prompt = self._create_contextual_prompt(
                query, document_context, conversation_context
            )

            # Generate response using OpenAI
            from llama_index.llms.openai import OpenAI
            llm = OpenAI(
                model="gpt-4o",
                api_key=settings.openai_api_key,
                temperature=0.1
            )

            # Use sync completion for now as acomplete has issues
            response = llm.complete(prompt)

            return {
                "response": response.text,
                "context_used": conversation_context,
                "context_messages_count": len(context_messages)
            }

        except Exception as e:
            print(f"Error generating contextual response: {e}")
            # Fallback to basic response without context
            try:
                from llama_index.llms.openai import OpenAI
                llm = OpenAI(
                    model="gpt-4o",
                    api_key=settings.openai_api_key,
                    temperature=0.1
                )

                # Simple prompt without context
                context_text = "\n\n".join(context_chunks)
                simple_prompt = f"""Based on the following context, answer the user's question. If the answer is not in the context, say "I don't have enough information to answer that question."

Return results as pure markdown - no code block.

Context:
{context_text}

Question: {query}

Answer:"""

                response = llm.complete(simple_prompt)

                return {
                    "response": response.text,
                    "context_used": None,
                    "context_messages_count": 0
                }
            except Exception as fallback_error:
                print(f"Fallback response generation failed: {fallback_error}")
                return {
                    "response": "I'm sorry, I encountered an error while processing your question. Please try again.",
                    "context_used": None,
                    "context_messages_count": 0
                }

    def _create_contextual_prompt(
        self,
        query: str,
        document_context: str,
        conversation_context: str
    ) -> str:
        """Create a contextual prompt for the AI"""
        prompt_parts = []

        prompt_parts.append(
            "You are an AI assistant helping users understand their documents. "
            "Answer questions based on the provided document context and consider "
            "the conversation history for continuity."
        )

        if conversation_context:
            prompt_parts.append(f"""
Previous conversation:
{conversation_context}
""")

        prompt_parts.append(f"""
Document context:
{document_context}

Current question: {query}

Instructions:
1. Answer based primarily on the document context provided
2. Consider the conversation history for continuity and context
3. If the answer is not in the documents, clearly state this
4. Be concise but comprehensive
5. Reference specific information from the documents when possible
6. If referring to previous parts of the conversation, be explicit about it
7. Return results as pure markdown - no code block

Answer:""")

        return "\n".join(prompt_parts)

    # cache_context_key method removed - caching disabled for data freshness


# Global chat context service instance
chat_context_service = ChatContextService()