semblance-dev/backend/app/services/focus_group_response_service.py

"""
Focus Group Response Service
This service handles generating realistic responses from personas during focus group discussions.
"""

from typing import Dict, Any, Optional, List, Union
import json
import random
import os
from .llm_service import LLMService, LLMServiceError
from app.utils.prompt_loader import load_prompt, PromptLoaderError

class FocusGroupResponseError(Exception):
    """Exception raised for errors in the focus group response generation process."""
    pass


async def generate_persona_response(
    persona: Dict[str, Any],
    current_topic: str,
    previous_messages: List[Dict[str, Any]],
    temperature: float = 0.7,
    focus_group_id: Optional[str] = None,
    llm_model: Optional[str] = None,
    reasoning_effort: Optional[str] = None,
    verbosity: Optional[str] = None
) -> str:
    """
    Generate a response from a persona in a focus group discussion.
    Now integrates visual context when available.

    Args:
        persona: The persona data (personality, traits, etc.)
        current_topic: The current question or topic being discussed
        previous_messages: List of previous messages in the discussion
        temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)
        focus_group_id: Optional focus group ID for visual context integration
        llm_model: Optional LLM model to use for generation
        reasoning_effort: GPT-5 only - Controls thinking time (minimal/low/medium/high)
        verbosity: GPT-5 only - Controls response length (low/medium/high)

    Returns:
        A string containing the persona's response

    Raises:
        FocusGroupResponseError: If there's an issue with the response generation
    """
    try:
        from app.services.llm_usage_context import set_llm_context
        set_llm_context(
            feature="persona_response",
            focus_group_id=focus_group_id or None,
            persona_id=str(persona.get("_id", "")) or None,
        )
        print(f"🎭 Generating persona response for {persona.get('name', 'Unknown')}")
        print(f"  - focus_group_id: {focus_group_id}")
        print(f"  - current_topic: {current_topic[:50]}...")

        # Import LLMService at the top to avoid scoping issues
        from app.services.llm_service import LLMService

        # Check for visual context if focus_group_id is provided
        has_visual_context = False
        multimodal_context = None

        if focus_group_id:
            try:
                from app.services.conversation_context_service import ConversationContextService
                has_visual_context = await ConversationContextService.has_visual_context(focus_group_id)

                if has_visual_context:
                    print(f"🎨 Visual context detected, building multimodal context...")
                    multimodal_context = await ConversationContextService.build_multimodal_context(
                        focus_group_id, previous_messages
                    )
                    print(f"🎨 Built context with {multimodal_context['total_visual_assets']} visual assets")
                else:
                    print(f"📝 No visual context, using standard generation")
            except Exception as e:
                print(f"⚠️  Error checking visual context, falling back to standard generation: {e}")
                has_visual_context = False

        # Determine the appropriate response length based on persona and context
        length_preference = _determine_response_length_preference(
            persona, previous_messages, current_topic
        )

        # Get length-specific instructions
        length_instructions = _get_length_specific_instructions(length_preference)

        # Extract relevant persona details for the prompt
        persona_details = _format_persona_details(persona)

        # If we have visual context, use contextual generation
        if has_visual_context and multimodal_context:
            print(f"🎨 Using contextual generation with visual context")

            # Load and format the contextual response prompt
            try:
                prompt = load_prompt('focus-group-response', {
                    'persona_details': persona_details,
                    'current_topic': current_topic,
                    'previous_messages': multimodal_context['text_context'],  # Use text fallback
                    'length_instructions': length_instructions,
                    'is_creative_review': True,  # Flag to indicate visual context available
                    'creative_instructions': """

VISUAL CONTEXT AVAILABLE:
You are participating in a focus group discussion where visual materials have been shown. The images in your conversation context are part of the ongoing discussion. Please provide your authentic reaction and feedback based on your personality, background, and preferences, taking into account both the conversation history and any visual materials you can see.

Consider:
- Your first impression of any visuals shown
- How the visual materials relate to the discussion topic
- Any specific elements that catch your attention
- How the visuals might appeal to people like you
- Any suggestions or concerns you might have
- The ongoing conversation context

Be genuine and specific in your feedback, drawing on your personal experiences and preferences.
"""
                })
            except PromptLoaderError as e:
                raise FocusGroupResponseError(f"Error loading contextual response prompt: {str(e)}")

            # Generate response using contextual conversation method
            response = await LLMService.generate_contextual_response(
                prompt=prompt,
                conversation_context=multimodal_context['conversation_context'],
                temperature=temperature,
                model_name=llm_model,
                reasoning_effort=reasoning_effort,
                verbosity=verbosity
            )

            print(f"✅ Generated contextual response with visual context")

        else:
            print(f"📝 Using standard generation (no visual context)")

            # Format the previous messages for context (standard approach)
            formatted_messages = _format_previous_messages(previous_messages)

            # Load and format the standard response prompt
            try:
                prompt = load_prompt('focus-group-response', {
                    'persona_details': persona_details,
                    'current_topic': current_topic,
                    'previous_messages': formatted_messages,
                    'length_instructions': length_instructions
                })
            except PromptLoaderError as e:
                raise FocusGroupResponseError(f"Error loading response prompt: {str(e)}")

            # Generate the standard response
            response = await LLMService.generate_content(
                prompt=prompt,
                temperature=temperature,
                model_name=llm_model,
                reasoning_effort=reasoning_effort,
                verbosity=verbosity
            )

            print(f"✅ Generated standard response")

        return response.strip()

    except LLMServiceError as e:
        raise FocusGroupResponseError(f"Error generating persona response: {str(e)}")
    except Exception as e:
        raise FocusGroupResponseError(f"Unexpected error in persona response generation: {str(e)}")

def _format_persona_details(persona: Dict[str, Any]) -> str:
    """Format persona details for the prompt."""
    details = []

    # Basic demographics
    details.append(f"Name: {persona.get('name', 'Unknown')}")
    details.append(f"Age: {persona.get('age', 'Unknown')}")
    details.append(f"Gender: {persona.get('gender', 'Unknown')}")
    details.append(f"Occupation: {persona.get('occupation', 'Unknown')}")
    details.append(f"Education: {persona.get('education', 'Unknown')}")
    details.append(f"Location: {persona.get('location', 'Unknown')}")

    # Personality characteristics
    details.append(f"Personality: {persona.get('personality', 'Not specified')}")

    # OCEAN traits if available
    ocean = persona.get('oceanTraits', {})
    if ocean:
        traits = []
        if 'openness' in ocean:
            traits.append(f"Openness: {ocean['openness']}/100")
        if 'conscientiousness' in ocean:
            traits.append(f"Conscientiousness: {ocean['conscientiousness']}/100")
        if 'extraversion' in ocean:
            traits.append(f"Extraversion: {ocean['extraversion']}/100")
        if 'agreeableness' in ocean:
            traits.append(f"Agreeableness: {ocean['agreeableness']}/100")
        if 'neuroticism' in ocean:
            traits.append(f"Neuroticism: {ocean['neuroticism']}/100")

        if traits:
            details.append("OCEAN Traits:")
            details.extend([f"- {trait}" for trait in traits])

    # Goals, frustrations, motivations
    if 'goals' in persona and persona['goals']:
        details.append("Goals:")
        details.extend([f"- {goal}" for goal in persona['goals']])

    if 'frustrations' in persona and persona['frustrations']:
        details.append("Frustrations:")
        details.extend([f"- {frustration}" for frustration in persona['frustrations']])

    if 'motivations' in persona and persona['motivations']:
        details.append("Motivations:")
        details.extend([f"- {motivation}" for motivation in persona['motivations']])

    # Think, feel, do
    tfd = persona.get('thinkFeelDo', {})
    if tfd:
        if 'thinks' in tfd and tfd['thinks']:
            details.append("Thinks:")
            details.extend([f"- {thought}" for thought in tfd['thinks']])

        if 'feels' in tfd and tfd['feels']:
            details.append("Feels:")
            details.extend([f"- {feeling}" for feeling in tfd['feels']])

        if 'does' in tfd and tfd['does']:
            details.append("Does:")
            details.extend([f"- {action}" for action in tfd['does']])

    # Join all details with line breaks
    return "\n".join(details)

def _format_previous_messages(messages: List[Dict[str, Any]]) -> str:
    """Format previous messages for context."""
    if not messages:
        return "No previous messages."

    # Limit to the most recent messages for context
    recent_messages = messages[-50:]  # Last 50 messages

    formatted = []
    for msg in recent_messages:
        sender = msg.get('senderId', 'Unknown')
        text = msg.get('text', '')
        msg_type = msg.get('type', 'response')

        # Format differently based on message type
        if msg_type == 'question':
            formatted.append(f"MODERATOR ({sender}): {text}")
        elif msg_type == 'system':
            formatted.append(f"SYSTEM: {text}")
        else:
            formatted.append(f"{sender}: {text}")

    return "\n".join(formatted)


def _determine_response_length_preference(
    persona: Dict[str, Any],
    previous_messages: List[Dict[str, Any]],
    current_topic: str
) -> str:
    """
    Determine the preferred response length based on persona traits and context.

    Args:
        persona: The persona data
        previous_messages: List of previous messages in the discussion
        current_topic: The current question or topic being discussed

    Returns:
        Response length preference: 'short', 'medium', or 'long'
    """
    # Base probabilities for response lengths
    short_prob = 0.10   # 10% chance of short responses
    medium_prob = 0.50  # 50% chance of medium responses
    long_prob = 0.40    # 40% chance of long responses

    # Adjust based on persona extraversion (if available)
    ocean_traits = persona.get('oceanTraits', {})
    if 'extraversion' in ocean_traits:
        extraversion = ocean_traits['extraversion'] / 100.0  # Normalize to 0-1

        # High extraversion = more likely to give longer responses
        # Low extraversion = more likely to give shorter responses
        if extraversion > 0.7:  # High extraversion (>70%)
            short_prob *= 0.6    # Short probability × 0.6
            medium_prob *= 0.9   # Medium probability × 0.9
            long_prob *= 1.8     # Long probability × 1.8
        elif extraversion < 0.3:  # Low extraversion (<30%)
            short_prob *= 1.3    # Short probability × 1.3
            medium_prob *= 1.1   # Medium probability × 1.1
            long_prob *= 0.7     # Long probability × 0.7

    # Adjust based on communication preferences
    comm_prefs = persona.get('communicationPreferences', '').lower()
    if 'brief' in comm_prefs or 'concise' in comm_prefs or 'direct' in comm_prefs:
        short_prob *= 1.2    # Short ×1.2
        medium_prob *= 1.1   # Medium ×1.1
        long_prob *= 0.8     # Long ×0.8
    elif 'detailed' in comm_prefs or 'verbose' in comm_prefs or 'elaborate' in comm_prefs:
        short_prob *= 0.7    # Short ×0.7
        medium_prob *= 0.9   # Medium ×0.9
        long_prob *= 1.5     # Long ×1.5

    # Analyze recent message context
    if previous_messages:
        recent_messages = previous_messages[-5:]  # Last 5 messages
        recent_lengths = []

        for msg in recent_messages:
            text = msg.get('text', '')
            word_count = len(text.split())
            recent_lengths.append(word_count)

        if recent_lengths:
            avg_recent_length = sum(recent_lengths) / len(recent_lengths)

            # Very short recent messages (<18 words avg)
            if avg_recent_length < 18:
                short_prob *= 1.3    # Short ×1.3
                long_prob *= 0.7     # Long ×0.7
            # Long recent messages (>60 words avg)
            elif avg_recent_length > 60:
                short_prob *= 1.2    # Short ×1.2
                medium_prob *= 1.1   # Medium ×1.1
                long_prob *= 0.8     # Long ×0.8

    # Consider topic complexity (>15 words or multiple questions)
    topic_words = current_topic.split()
    if len(topic_words) > 15 or current_topic.count('?') > 1:
        # Complex topics may warrant longer responses
        short_prob *= 0.8    # Short ×0.8
        long_prob *= 1.3     # Long ×1.3

    # Normalize probabilities
    total_prob = short_prob + medium_prob + long_prob
    short_prob /= total_prob
    medium_prob /= total_prob
    long_prob /= total_prob

    # Select length based on weighted random choice
    rand = random.random()
    if rand < short_prob:
        return 'short'
    elif rand < short_prob + medium_prob:
        return 'medium'
    else:
        return 'long'


def _get_length_specific_instructions(length_preference: str) -> str:
    """
    Get length-specific instructions for the LLM prompt.

    Args:
        length_preference: The preferred response length ('short', 'medium', 'long')

    Returns:
        Instructions specific to the response length
    """
    if length_preference == 'short':
        return """
RESPONSE LENGTH: Provide a SHORT response (1-18 words or brief phrase).
Examples of appropriate short responses:
- "Absolutely!"
- "I disagree."
- "That's interesting."
- "Not really."
- "Exactly my point."
- "Makes sense to me."
- "I'm not sure about that."
- "I love that design approach."
- "The colors feel too bright for me."

Keep it natural and conversational, but brief. Sometimes a simple reaction or acknowledgment is all that's needed.
"""
    elif length_preference == 'medium':
        return """
RESPONSE LENGTH: Provide a MEDIUM response (1-5 sentences).
This should be conversational but not overly detailed. Share your perspective clearly and concisely.
Example length: "I think that's a great point about mobile payments. I've had similar experiences with apps that make checkout too complicated."
"""
    else:  # long
        return """
RESPONSE LENGTH: Provide a LONGER response (2-4 sentences or 1-2 short paragraphs).
Feel free to elaborate on your thoughts, share personal examples, or explore different aspects of the topic.
This is your chance to provide more detailed insights and personal anecdotes.
"""


async def generate_creative_review_response(
    persona: Dict[str, Any],
    current_topic: str,
    creative_asset_path: str,
    previous_messages: List[Dict[str, Any]],
    focus_group_id: str,
    temperature: float = 0.7
) -> str:
    """
    Generate a response from a persona for a creative review activity with image context.

    Args:
        persona: The persona data (personality, traits, etc.)
        current_topic: The current question or topic being discussed
        creative_asset_path: Path to the creative asset image file
        previous_messages: List of previous messages in the discussion
        focus_group_id: The focus group ID for asset path resolution
        temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)

    Returns:
        A string containing the persona's response to the creative asset

    Raises:
        FocusGroupResponseError: If there's an issue with the response generation
    """
    try:
        from app.services.llm_usage_context import set_llm_context
        set_llm_context(
            feature="persona_response",
            focus_group_id=focus_group_id or None,
            persona_id=str(persona.get("_id", "")) or None,
        )
        print(f"🎨 CREATIVE REVIEW RESPONSE DEBUG:")
        print(f"  - persona: {persona.get('name', 'Unknown')}")
        print(f"  - current_topic: {current_topic}")
        print(f"  - creative_asset_path: {creative_asset_path}")
        print(f"  - focus_group_id: {focus_group_id}")
        print(f"  - temperature: {temperature}")

        # Determine the appropriate response length
        length_preference = _determine_response_length_preference(
            persona, previous_messages, current_topic
        )
        print(f"  - length_preference: {length_preference}")

        # Get length-specific instructions
        length_instructions = _get_length_specific_instructions(length_preference)

        # Extract relevant persona details for the prompt
        persona_details = _format_persona_details(persona)

        # Format the previous messages for context
        formatted_messages = _format_previous_messages(previous_messages)

        # Construct the full path to the creative asset
        if not os.path.isabs(creative_asset_path):
            # Files are stored in focus group subdirectories: uploads/focus-group-{id}/filename
            base_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))  # Go up to backend/
            full_asset_path = os.path.join(base_dir, 'uploads', f'focus-group-{focus_group_id}', creative_asset_path)
        else:
            full_asset_path = creative_asset_path

        print(f"  - full_asset_path: {full_asset_path}")
        print(f"  - asset_exists: {os.path.exists(full_asset_path)}")

        # Verify the asset file exists
        if not os.path.exists(full_asset_path):
            print(f"❌ Creative asset not found at: {full_asset_path}")
            # List available files in uploads directory for debugging
            uploads_dir = os.path.dirname(full_asset_path)
            if os.path.exists(uploads_dir):
                available_files = os.listdir(uploads_dir)
                print(f"  - Available files in uploads: {available_files}")
            raise FocusGroupResponseError(f"Creative asset not found: {full_asset_path}")

        # Load and format the creative response prompt
        try:
            prompt = load_prompt('focus-group-response', {
                'persona_details': persona_details,
                'current_topic': current_topic,
                'previous_messages': formatted_messages,
                'length_instructions': length_instructions,
                'is_creative_review': True,
                'creative_instructions': """

CREATIVE ASSET CONTEXT:
You are now viewing a creative asset (image) that is being shown to you as part of this focus group discussion.
Please provide your authentic reaction and feedback based on your personality, background, and preferences.

Consider:
- Your first impression of the visual
- How it relates to the discussion topic
- Any specific elements that catch your attention
- How it might appeal to people like you
- Any suggestions or concerns you might have

Be genuine and specific in your feedback, drawing on your personal experiences and preferences.
"""
            })
        except PromptLoaderError as e:
            raise FocusGroupResponseError(f"Error loading creative response prompt: {str(e)}")

        # Generate the response using multimodal capabilities
        print(f"🎨 Calling LLMService.generate_multimodal_content...")
        print(f"  - prompt length: {len(prompt)} characters")
        print(f"  - image_paths: {[full_asset_path]}")
        print(f"  - temperature: {temperature}")

        response = await LLMService.generate_multimodal_content(
            prompt=prompt,
            image_paths=[full_asset_path],
            temperature=temperature
        )

        print(f"✅ Creative review response generated successfully")
        print(f"  - response length: {len(response)} characters")
        print(f"  - response preview: {response[:100]}...")

        return response.strip()

    except LLMServiceError as e:
        raise FocusGroupResponseError(f"Error generating creative review response: {str(e)}")
    except Exception as e:
        raise FocusGroupResponseError(f"Unexpected error in creative review response generation: {str(e)}")


def get_upload_folder_path(focus_group_id: str) -> str:
    """
    Get the upload folder path for a focus group.

    Args:
        focus_group_id: The focus group ID

    Returns:
        The full path to the upload folder
    """
    base_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))  # Go up to backend/
    return os.path.join(base_dir, 'uploads', f'focus-group-{focus_group_id}')


def is_creative_review_activity(activity_or_question: Dict[str, Any]) -> bool:
    """
    Check if an activity/question is a creative review type.

    Args:
        activity_or_question: The activity or question object

    Returns:
        True if it's a creative review activity, False otherwise
    """
    return activity_or_question.get('type') == 'creative_review'


def extract_asset_filename_from_content(content: str) -> Optional[str]:
    """
    Extract asset filename from creative review activity content.

    Args:
        content: The activity content string

    Returns:
        The asset filename if found, None otherwise
    """
    # Look for patterns like "asset: filename.jpg" or similar
    import re

    # Try to find asset filename patterns in the content
    patterns = [
        # Match quoted filenames (most specific pattern first)
        r"titled\s+['\"]([^'\"]+\.(jpg|jpeg|png))['\"]",  # "titled 'filename.jpg'"
        r"asset\s+['\"]([^'\"]+\.(jpg|jpeg|png))['\"]",  # "asset 'filename.jpg'"
        r"image\s+['\"]([^'\"]+\.(jpg|jpeg|png))['\"]",  # "image 'filename.jpg'"
        r"['\"]([a-zA-Z0-9_\-]+\.(jpg|jpeg|png))['\"]",  # Any quoted filename
        # Match focus group asset pattern without quotes
        r'(fg-[a-f0-9]+-[a-f0-9]{32}\.(jpg|jpeg|png))',  # fg-{id}-{uuid}.{ext}
        # Other patterns
        r'asset:\s*([^\s]+\.(jpg|jpeg|png))',
        r'image:\s*([^\s]+\.(jpg|jpeg|png))',
        r'file:\s*([^\s]+\.(jpg|jpeg|png))',
        r'([a-zA-Z0-9_-]+\.(jpg|jpeg|png))'
    ]

    for pattern in patterns:
        match = re.search(pattern, content, re.IGNORECASE)
        if match:
            # Return the first capture group (the filename)
            return match.group(1)

    return None