semblance-dev/backend/app/services/ai_persona_service.py

"""
AI Persona Generation Service using Google's Gemini model.
This service handles the integration with the Gemini API to generate
synthetic persona data based on a predefined prompt.
"""

import os
import json
import uuid
from typing import Dict, Any, Optional, List
from pydantic import BaseModel, ValidationError
from datetime import datetime

from .llm_service import LLMService, LLMServiceError
from .customer_data_service import customer_data_service
from app.utils.prompt_loader import load_prompt, PromptLoaderError


class PersonaGenerationError(Exception):
    """Exception raised for errors in the persona generation process."""
    pass


def _sanitize_persona_data_for_json(persona_data: Dict[str, Any]) -> Dict[str, Any]:
    """
    Sanitize persona data to make it JSON serializable.

    Args:
        persona_data: The persona data dictionary that may contain non-serializable objects

    Returns:
        A sanitized dictionary that can be JSON serialized
    """
    sanitized = {}

    for key, value in persona_data.items():
        if isinstance(value, datetime):
            # Convert datetime to ISO string
            sanitized[key] = value.isoformat()
        elif isinstance(value, dict):
            # Recursively sanitize nested dictionaries
            sanitized[key] = _sanitize_persona_data_for_json(value)
        elif isinstance(value, list):
            # Sanitize list items
            sanitized[key] = [
                _sanitize_persona_data_for_json(item) if isinstance(item, dict)
                else item.isoformat() if isinstance(item, datetime)
                else item
                for item in value
            ]
        else:
            # Keep other values as-is
            sanitized[key] = value

    return sanitized


def _sanitize_json_response(response: str) -> str:
    """
    Sanitize JSON response from LLM to handle high-temperature artifacts.

    Args:
        response: Raw JSON response string from LLM

    Returns:
        Sanitized JSON string safe for parsing
    """
    import re

    # Step 1: Remove invalid control characters (but preserve valid whitespace)
    sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', response)

    # Step 2: Replace smart quotes and similar characters
    sanitized = sanitized.replace('"', '"').replace('"', '"')
    sanitized = sanitized.replace(''', "'").replace(''', "'")
    sanitized = sanitized.replace('…', '...')

    # Step 3: Remove trailing commas
    sanitized = re.sub(r',(\s*[}\]])', r'\1', sanitized)

    # Step 4: Try to fix common newline issues in strings
    # Replace unescaped newlines within string values
    lines = sanitized.split('\n')
    fixed_lines = []
    in_string = False
    string_char = None

    for line in lines:
        if not in_string:
            fixed_lines.append(line)
        else:
            # We're continuing a string from previous line
            fixed_lines[-1] += '\\n' + line.strip()

        # Track if we're inside a string
        i = 0
        while i < len(line):
            char = line[i]
            if char in ['"', "'"] and (i == 0 or line[i-1] != '\\'):
                if not in_string:
                    in_string = True
                    string_char = char
                elif char == string_char:
                    in_string = False
                    string_char = None
            i += 1

    return '\n'.join(fixed_lines).strip()


async def generate_basic_personas(
    audience_brief: str,
    research_objective: Optional[str] = None,
    count: int = 5,
    temperature: float = 1.0,
    customer_data_session_id: Optional[str] = None,
    llm_model: Optional[str] = None,
    max_retries: int = 2
) -> List[Dict[str, Any]]:
    """
    Generate basic profiles for multiple personas based on a research brief.

    Args:
        audience_brief: The audience brief to guide persona generation
        research_objective: Optional research objective to focus persona goals and scenarios
        count: Number of basic personas to generate
        temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)
        customer_data_session_id: Optional session ID for customer data context
        llm_model: Optional LLM model to use for generation
        max_retries: Maximum number of retry attempts for failed generations

    Returns:
        A list of dictionaries containing basic persona data

    Raises:
        PersonaGenerationError: If there's an issue with the AI generation or JSON parsing
    """
    from app.services.llm_usage_context import set_llm_context
    set_llm_context(feature="persona_generate")
    last_error = None

    for attempt in range(max_retries + 1):
        try:
            if attempt > 0:
                print(f"🔄 Backend: Retry attempt {attempt}/{max_retries} for basic persona generation")

            return await _generate_basic_personas_attempt(
                audience_brief=audience_brief,
                research_objective=research_objective,
                count=count,
                temperature=temperature,
                customer_data_session_id=customer_data_session_id,
                llm_model=llm_model,
                attempt=attempt + 1
            )

        except PersonaGenerationError as e:
            last_error = e
            if attempt < max_retries:
                print(f"⚠️ Backend: Attempt {attempt + 1} failed: {str(e)}")
                print(f"🔄 Backend: Will retry ({max_retries - attempt} attempts remaining)")
                continue
            else:
                print(f"❌ Backend: All {max_retries + 1} attempts failed")
                raise e
        except Exception as e:
            if isinstance(e, PersonaGenerationError):
                raise
            raise PersonaGenerationError(f"Error generating basic personas: {str(e)}")

    # This should never be reached, but just in case
    raise last_error if last_error else PersonaGenerationError("Failed to generate basic personas after all retries")


async def _generate_basic_personas_attempt(
    audience_brief: str,
    research_objective: Optional[str] = None,
    count: int = 5,
    temperature: float = 1.0,
    customer_data_session_id: Optional[str] = None,
    llm_model: Optional[str] = None,
    attempt: int = 1
) -> List[Dict[str, Any]]:
    """
    Internal function to attempt generating basic personas. Separated for retry logic.
    """
    try:
        # Load customer data context if session ID provided
        customer_data_context = ''
        if customer_data_session_id:
            customer_data_content = customer_data_service.get_parsed_markdown_content(customer_data_session_id)
            if customer_data_content:
                customer_data_context = f"The following customer data was uploaded and should be used to inform persona creation:\n\n{customer_data_content}"
            else:
                customer_data_context = "No customer data available for this session."
        else:
            customer_data_context = "No customer data provided."

        # Load and format the prompt with the audience brief and count
        try:
            final_prompt = load_prompt('persona-basic-generation', {
                'audience_brief': audience_brief,
                'research_objective': research_objective or '',
                'count': count,
                'customer_data_context': customer_data_context
            })
        except PromptLoaderError as e:
            raise PersonaGenerationError(f"Error loading prompt: {str(e)}")

        # Add additional safeguards for JSON parsing
        try:
            # Load system prompt and generate raw content
            try:
                system_prompt = load_prompt('persona-system')
            except PromptLoaderError as e:
                raise PersonaGenerationError(f"Error loading system prompt: {str(e)}")

            # Log the LLM API call with attempt number
            attempt_text = f" (attempt {attempt})" if attempt > 1 else ""
            print(f"🤖 Backend: Making LLM API call to {llm_model or 'gemini-3-pro-preview'} for basic persona generation{attempt_text}")

            raw_response = await LLMService.generate_content(
                prompt=final_prompt,
                temperature=temperature,
                system_prompt=system_prompt,
                model_name=llm_model
            )

            # Enhanced JSON cleaning for high-temperature responses
            clean_response = raw_response

            # Remove markdown code blocks if present
            if clean_response.startswith("```json"):
                clean_response = clean_response.strip("```json").strip("```").strip()
            elif clean_response.startswith("```"):
                clean_response = clean_response.strip("```").strip()

            # Try to find the JSON array in the response if there's extra text
            if not clean_response.startswith("["):
                # Look for the opening bracket
                start_idx = clean_response.find("[")
                if start_idx != -1:
                    # Find the matching closing bracket
                    end_idx = clean_response.rfind("]")
                    if end_idx != -1 and end_idx > start_idx:
                        clean_response = clean_response[start_idx:end_idx+1]

            # Sanitize JSON for high-temperature responses
            clean_response = _sanitize_json_response(clean_response)

            # Parse the JSON manually
            try:
                print(f"Attempting to parse JSON array{attempt_text}: {clean_response[:100]}...")
                personas_array = json.loads(clean_response)

                # Verify it's an array
                if not isinstance(personas_array, list):
                    raise PersonaGenerationError(f"Expected an array of personas but got {type(personas_array)}")

            except json.JSONDecodeError as e:
                # Enhanced error logging for high-temperature JSON issues
                error_pos = getattr(e, 'pos', 0)
                error_context = clean_response[max(0, error_pos-50):error_pos+50] if error_pos > 0 else clean_response[:100]

                print(f"❌ Backend: JSON Parse Error at position {error_pos}{attempt_text}: {str(e)}")
                print(f"❌ Backend: Error context{attempt_text}: ...{error_context}...")

                raise PersonaGenerationError(
                    f"Failed to parse JSON response on attempt {attempt}: {str(e)}. "
                    f"Context: ...{error_context[:100]}..."
                )

        except LLMServiceError as e:
            raise PersonaGenerationError(f"Error from LLM service on attempt {attempt}: {str(e)}")

        # Validate we got an array with the right count
        if not isinstance(personas_array, list):
            raise PersonaGenerationError(f"Expected an array of personas but got {type(personas_array)} on attempt {attempt}")

        # Check if we got at least one persona
        if len(personas_array) == 0:
            raise PersonaGenerationError(f"No personas were generated on attempt {attempt}")

        # If we got fewer personas than requested, log a warning but continue
        if len(personas_array) < count:
            print(f"⚠️ Backend: Warning on attempt {attempt}: Requested {count} personas but only got {len(personas_array)}")

        # Enhanced validation and completion of each persona
        required_fields = ["name", "age", "gender", "occupation", "education", "location", "techSavviness", "personality", "interests"]
        completed_personas = []

        for i, persona in enumerate(personas_array):
            missing_fields = [field for field in required_fields if field not in persona]

            # Attempt field completion for missing fields
            if missing_fields:
                print(f"⚠️ Backend: Persona {i+1} on attempt {attempt} is missing fields: {missing_fields}")
                print(f"🔧 Backend: Attempting to complete missing fields for persona {i+1}")

                # Try to complete missing fields based on existing data
                persona = _complete_missing_persona_fields(persona, missing_fields, attempt)

                # Re-check for still missing fields after completion attempt
                still_missing = [field for field in required_fields if field not in persona]
                if still_missing:
                    print(f"❌ Backend: Persona {i+1} validation failed on attempt {attempt} - Still missing fields after completion: {still_missing}")
                    print(f"❌ Backend: Persona {i+1} actual fields: {list(persona.keys())}")
                    print(f"❌ Backend: Persona {i+1} data: {json.dumps(persona, indent=2)[:500]}...")
                    if attempt == 1:  # Only log full response on first attempt to avoid spam
                        print(f"❌ Backend: Full LLM response for debugging: {clean_response[:1000]}...")
                    raise PersonaGenerationError(
                        f"Persona {i+1} ({persona.get('name', 'Unknown')}) is still missing required fields after completion attempt: {', '.join(still_missing)} on attempt {attempt}. "
                        f"Expected fields: {required_fields}. "
                        f"Actual fields: {list(persona.keys())}. "
                        f"This suggests the LLM did not follow the prompt instructions correctly."
                    )
                else:
                    print(f"✅ Backend: Successfully completed missing fields for persona {i+1}")

            # Validate that age is a single number, not a range
            age_value = persona.get("age", "")
            if isinstance(age_value, str) and "-" in age_value:
                raise PersonaGenerationError(
                    f"Persona {i+1} has an invalid age range '{age_value}' on attempt {attempt}. Age must be a single specific number (e.g., '35', not '35-42')"
                )

            # Validate that age is numeric
            age_str = str(age_value).strip()
            if not age_str.isdigit():
                raise PersonaGenerationError(
                    f"Persona {i+1} has an invalid age '{age_value}' on attempt {attempt}. Age must be a numeric value (e.g., '35')"
                )

            completed_personas.append(persona)

        print(f"✅ Backend: Successfully validated {len(completed_personas)} basic personas on attempt {attempt}")
        return completed_personas

    except Exception as e:
        if isinstance(e, PersonaGenerationError):
            raise
        raise PersonaGenerationError(f"Error generating basic personas on attempt {attempt}: {str(e)}")


def _complete_missing_persona_fields(persona: Dict[str, Any], missing_fields: List[str], attempt: int) -> Dict[str, Any]:
    """
    Attempt to complete missing persona fields with reasonable defaults based on existing data.

    Args:
        persona: The persona dict with some missing fields
        missing_fields: List of field names that are missing
        attempt: The current attempt number for logging

    Returns:
        Updated persona dict with completed fields where possible
    """
    completed_persona = persona.copy()

    # Define fallback values based on available data or reasonable defaults
    fallback_values = {
        "name": f"Generated Person {attempt}",
        "age": "30",
        "gender": "Non-binary",
        "occupation": "Professional",
        "education": "Bachelor's Degree",
        "location": "Urban Area",
        "techSavviness": 50,
        "personality": "Well-rounded individual with diverse interests",
        "interests": "Technology, reading, socializing"
    }

    # Smart completion based on existing persona data
    for field in missing_fields:
        if field == "name" and "gender" in persona:
            # Generate a more appropriate name based on gender
            gender = persona.get("gender", "").lower()
            if "male" in gender and "fe" not in gender:
                completed_persona[field] = f"John Person {attempt}"
            elif "female" in gender:
                completed_persona[field] = f"Jane Person {attempt}"
            else:
                completed_persona[field] = fallback_values[field]

        elif field == "age" and "occupation" in persona:
            # Estimate age based on occupation
            occupation = persona.get("occupation", "").lower()
            if "student" in occupation:
                completed_persona[field] = "22"
            elif "senior" in occupation or "manager" in occupation or "director" in occupation:
                completed_persona[field] = "45"
            elif "entry" in occupation or "junior" in occupation:
                completed_persona[field] = "25"
            else:
                completed_persona[field] = fallback_values[field]

        elif field == "techSavviness" and "occupation" in persona:
            # Estimate tech savviness based on occupation
            occupation = persona.get("occupation", "").lower()
            if any(tech_word in occupation for tech_word in ["engineer", "developer", "programmer", "tech", "software", "it", "data", "analyst"]):
                completed_persona[field] = 85
            elif any(word in occupation for word in ["teacher", "manager", "marketing", "business"]):
                completed_persona[field] = 65
            else:
                completed_persona[field] = fallback_values[field]

        elif field == "education" and "occupation" in persona:
            # Estimate education based on occupation
            occupation = persona.get("occupation", "").lower()
            if any(word in occupation for word in ["doctor", "engineer", "lawyer", "professor", "researcher"]):
                completed_persona[field] = "Master's Degree"
            elif any(word in occupation for word in ["technician", "assistant", "clerk"]):
                completed_persona[field] = "High School"
            else:
                completed_persona[field] = fallback_values[field]

        elif field == "personality" and any(key in persona for key in ["occupation", "interests"]):
            # Generate personality based on occupation or interests
            occupation = persona.get("occupation", "").lower()
            interests = persona.get("interests", "").lower()

            if "creative" in occupation or "art" in occupation or "design" in occupation:
                completed_persona[field] = "Creative and artistic individual with strong aesthetic sensibilities"
            elif "engineer" in occupation or "technical" in occupation:
                completed_persona[field] = "Analytical and detail-oriented professional who values precision"
            elif "teaching" in occupation or "education" in occupation:
                completed_persona[field] = "Patient and communicative individual who enjoys helping others learn"
            elif "sports" in interests or "fitness" in interests:
                completed_persona[field] = "Active and health-conscious person with competitive spirit"
            else:
                completed_persona[field] = fallback_values[field]

        else:
            # Use fallback value
            completed_persona[field] = fallback_values[field]

        print(f"🔧 Backend: Completed missing field '{field}' for persona with value: {completed_persona[field]}")

    return completed_persona


async def generate_persona(
    prompt_customization: Optional[str] = None,
    basic_persona: Optional[Dict[str, Any]] = None,
    temperature: float = 1.0,
    customer_data_session_id: Optional[str] = None,
    llm_model: Optional[str] = None,
    audience_brief: Optional[str] = None,
    research_objective: Optional[str] = None
) -> Dict[str, Any]:
    """
    Generate a synthetic persona using the specified LLM model.

    Args:
        prompt_customization: Optional string to customize the generation
        basic_persona: Optional dictionary containing basic persona data to start with
        temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)
        customer_data_session_id: Optional session ID for customer data context
        llm_model: Optional LLM model to use for generation
        audience_brief: Optional audience brief for research context
        research_objective: Optional research objective for research context

    Returns:
        A dictionary containing the generated persona data

    Raises:
        PersonaGenerationError: If there's an issue with the AI generation or JSON parsing
    """
    try:
        from app.services.llm_usage_context import set_llm_context
        set_llm_context(feature="persona_generate")
        # If audience_brief or research_objective provided but no prompt_customization,
        # generate customization so the LLM knows the research context
        if not prompt_customization and (audience_brief or research_objective):
            prompt_customization = customize_persona_prompt(
                audience_brief=audience_brief,
                research_objective=research_objective
            )
        # Load customer data context if session ID provided
        customer_data_context = ''
        if customer_data_session_id:
            customer_data_content = customer_data_service.get_parsed_markdown_content(customer_data_session_id)
            if customer_data_content:
                customer_data_context = f"The following customer data was uploaded and should be used to inform persona creation:\n\n{customer_data_content}"
            else:
                customer_data_context = "No customer data available for this session."
        else:
            customer_data_context = "No customer data provided."

        # Load the base prompt
        try:
            final_prompt = load_prompt('persona-detailed-generation', {
                'customer_data_context': customer_data_context
            })
        except PromptLoaderError as e:
            raise PersonaGenerationError(f"Error loading prompt: {str(e)}")

        # Add customization if provided
        if prompt_customization:
            final_prompt = f"{final_prompt}\n\nAdditional customization: {prompt_customization}"

        # Add basic persona data if provided
        if basic_persona:
            # Create a prompt section with the basic persona data
            basic_data_str = "\nUse this basic profile as a starting point:\n"
            basic_data_str += json.dumps(basic_persona, indent=2)
            basic_data_str += "\n\nMaintain the demographic information above while expanding the persona with goals, frustrations, motivations, etc."

            final_prompt = f"{final_prompt}\n{basic_data_str}"

        try:
            # Load system prompt and generate structured response
            try:
                system_prompt = load_prompt('persona-system')
            except PromptLoaderError as e:
                raise PersonaGenerationError(f"Error loading system prompt: {str(e)}")

            # Log the LLM API call
            persona_name = basic_persona.get('name', 'Unknown') if basic_persona else 'New Persona'
            print(f"🤖 Backend: Making LLM API call to {llm_model or 'gemini-3-pro-preview'} for detailed persona generation of '{persona_name}'")

            persona_data = await LLMService.generate_structured_response(
                prompt=final_prompt,
                temperature=temperature,
                system_prompt=system_prompt,
                model_name=llm_model
            )

        except LLMServiceError as e:
            raise PersonaGenerationError(f"Error from LLM service: {str(e)}")

        # Validate the required fields
        required_fields = ["name", "age", "gender", "occupation", "location", "techSavviness", "personality"]
        missing_fields = [field for field in required_fields if field not in persona_data]

        if missing_fields:
            raise PersonaGenerationError(f"Generated persona is missing required fields: {', '.join(missing_fields)}")

        # Validate that age is a single number, not a range
        age_value = persona_data.get("age", "")
        if isinstance(age_value, str) and "-" in age_value:
            raise PersonaGenerationError(
                f"Generated persona has an invalid age range '{age_value}'. Age must be a single specific number (e.g., '35', not '35-42')"
            )

        # Validate that age is numeric
        age_str = str(age_value).strip()
        if not age_str.isdigit():
            raise PersonaGenerationError(
                f"Generated persona has an invalid age '{age_value}'. Age must be a numeric value (e.g., '35')"
            )

        # Generate ID if missing
        if "id" not in persona_data:
            persona_data["id"] = f"generated-{uuid.uuid4()}"

        return persona_data

    except Exception as e:
        if isinstance(e, PersonaGenerationError):
            raise
        raise PersonaGenerationError(f"Error generating persona: {str(e)}")


async def generate_persona_summary(
    persona_data: Dict[str, Any],
    temperature: float = 1.0,
    llm_model: Optional[str] = None
) -> Dict[str, Any]:
    """
    Generate a concise summary of a persona for display on persona cards.

    Args:
        persona_data: The complete persona data dictionary
        temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)
        llm_model: Optional LLM model to use for generation

    Returns:
        A dictionary containing aiSynthesizedBio, qualitativeAttributes, and topPersonalityTraits

    Raises:
        PersonaGenerationError: If there's an issue with the AI generation or JSON parsing
    """
    try:
        # Sanitize persona data for JSON serialization
        sanitized_persona_data = _sanitize_persona_data_for_json(persona_data)

        # Load and format the prompt with the persona data
        try:
            final_prompt = load_prompt('persona-summary-generation', {
                'persona_data': json.dumps(sanitized_persona_data, indent=2)
            })
        except PromptLoaderError as e:
            raise PersonaGenerationError(f"Error loading summary prompt: {str(e)}")

        try:
            # Load system prompt and generate structured response
            try:
                system_prompt = load_prompt('persona-system')
            except PromptLoaderError as e:
                raise PersonaGenerationError(f"Error loading system prompt: {str(e)}")

            # Log the LLM API call
            persona_name = persona_data.get('name', 'Unknown')
            print(f"🤖 Backend: Making LLM API call to {llm_model or 'gemini-3-pro-preview'} for summary generation of '{persona_name}'")

            raw_response = await LLMService.generate_content(
                prompt=final_prompt,
                temperature=temperature,
                system_prompt=system_prompt,
                model_name=llm_model
            )

            # Clean up the response for proper JSON parsing
            clean_response = raw_response.strip()

            # Remove markdown code blocks if present
            if clean_response.startswith("```json"):
                clean_response = clean_response.strip("```json").strip("```").strip()
            elif clean_response.startswith("```"):
                clean_response = clean_response.strip("```").strip()

            # Try to find the JSON object in the response if there's extra text
            if not clean_response.startswith("{"):
                # Look for the opening brace
                start_idx = clean_response.find("{")
                if start_idx != -1:
                    # Find the matching closing brace
                    end_idx = clean_response.rfind("}")
                    if end_idx != -1 and end_idx > start_idx:
                        clean_response = clean_response[start_idx:end_idx+1]

            # Parse the JSON manually
            try:
                print(f"Attempting to parse summary JSON: {clean_response[:100]}...")
                summary_data = json.loads(clean_response)

                # Verify it's a dictionary with required fields
                if not isinstance(summary_data, dict):
                    raise PersonaGenerationError(f"Expected a summary object but got {type(summary_data)}")

                required_fields = ["aiSynthesizedBio", "qualitativeAttributes", "topPersonalityTraits"]
                missing_fields = [field for field in required_fields if field not in summary_data]

                if missing_fields:
                    raise PersonaGenerationError(f"Summary is missing required fields: {', '.join(missing_fields)}")

                # Validate field types
                if not isinstance(summary_data["aiSynthesizedBio"], str):
                    raise PersonaGenerationError("aiSynthesizedBio must be a string")
                if not isinstance(summary_data["qualitativeAttributes"], list):
                    raise PersonaGenerationError("qualitativeAttributes must be an array")
                if not isinstance(summary_data["topPersonalityTraits"], list):
                    raise PersonaGenerationError("topPersonalityTraits must be an array")

                return summary_data

            except json.JSONDecodeError as e:
                raise PersonaGenerationError(f"Failed to parse summary JSON response: {str(e)}. Raw response: {clean_response[:200]}...")

        except LLMServiceError as e:
            raise PersonaGenerationError(f"Error from LLM service: {str(e)}")

    except Exception as e:
        if isinstance(e, PersonaGenerationError):
            raise
        raise PersonaGenerationError(f"Error generating persona summary: {str(e)}")


async def generate_persona_download_summary(
    persona_data: Dict[str, Any],
    temperature: float = 1.0,
    llm_model: Optional[str] = None
) -> str:
    """
    Generate a comprehensive markdown summary of a persona for download/client review.

    Args:
        persona_data: The complete persona data dictionary
        temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)
        llm_model: Optional LLM model to use for generation

    Returns:
        A string containing the markdown-formatted persona summary

    Raises:
        PersonaGenerationError: If there's an issue with the AI generation
    """
    try:
        # Sanitize persona data for JSON serialization
        sanitized_persona_data = _sanitize_persona_data_for_json(persona_data)

        # Load and format the prompt with the persona data
        try:
            final_prompt = load_prompt('persona-download-summary', {
                'persona_data': json.dumps(sanitized_persona_data, indent=2)
            })
        except PromptLoaderError as e:
            raise PersonaGenerationError(f"Error loading download summary prompt: {str(e)}")

        try:
            # Load system prompt and generate markdown response
            try:
                system_prompt = load_prompt('persona-system')
            except PromptLoaderError as e:
                raise PersonaGenerationError(f"Error loading system prompt: {str(e)}")

            # Log the LLM API call
            persona_name = persona_data.get('name', 'Unknown')
            print(f"🤖 Backend: Making LLM API call to {llm_model or 'gemini-3-pro-preview'} for download summary of '{persona_name}'")

            # Generate the markdown content directly
            markdown_response = await LLMService.generate_content(
                prompt=final_prompt,
                temperature=temperature,
                system_prompt=system_prompt,
                model_name=llm_model
            )

            # Clean up the response if needed
            clean_response = markdown_response.strip()

            # Remove markdown code blocks if present
            if clean_response.startswith("```markdown"):
                clean_response = clean_response.strip("```markdown").strip("```").strip()
            elif clean_response.startswith("```"):
                clean_response = clean_response.strip("```").strip()

            return clean_response

        except LLMServiceError as e:
            raise PersonaGenerationError(f"Error from LLM service: {str(e)}")

    except Exception as e:
        if isinstance(e, PersonaGenerationError):
            raise
        raise PersonaGenerationError(f"Error generating persona download summary: {str(e)}")


def customize_persona_prompt(
    age_range: Optional[str] = None,
    gender: Optional[str] = None,
    occupation_type: Optional[str] = None,
    education_level: Optional[str] = None,
    location_type: Optional[str] = None,
    personality_traits: Optional[str] = None,
    interests: Optional[str] = None,
    audience_brief: Optional[str] = None,
    research_objective: Optional[str] = None
) -> Optional[str]:
    """
    Create a customized prompt for more specific persona generation.

    Args:
        age_range: Age range for the persona
        gender: Gender of the persona
        occupation_type: Type of occupation
        education_level: Level of education
        location_type: Geographic location
        personality_traits: Personality characteristics
        interests: Personal interests and hobbies
        audience_brief: Full audience brief providing context for persona generation
        research_objective: Research objective to focus persona goals, frustrations, and scenarios

    Returns:
        A string with customization instructions or None if no customizations provided
    """
    customizations = []

    # If an audience brief is provided, use it first as it provides the most context
    if audience_brief or research_objective:
        prompt = ""
        if audience_brief:
            prompt += f"""
Audience Brief:
{audience_brief}
"""
        if research_objective:
            prompt += f"""
Research Objective:
{research_objective}
"""

        prompt += "\nBased on the above context, create a persona that would be relevant to this research."

        if research_objective:
            prompt += f"""

CRITICAL RESEARCH ALIGNMENT: This persona MUST be designed around the research objective: '{research_objective}'.

LIFE SCENARIOS REQUIREMENTS:
- At least 3 out of 5 scenarios MUST show this persona directly encountering, using, deciding about, or being impacted by aspects of: {research_objective}
- Each research-aligned scenario must be a specific, realistic situation showing their authentic relationship with this topic
- Show varied contexts: work situations, personal decisions, social interactions, consumer experiences - all demonstrating how '{research_objective}' appears in their real life
- Scenarios should reveal the persona's thoughts, feelings, and behaviors when dealing with this research topic
- Include both positive and challenging experiences related to the research focus
- Make scenarios concrete and specific to this research objective, not generic situations"""

        if customizations:
            prompt += f"\nAdditionally, ensure the persona meets these specific requirements: {'; '.join(customizations)}"

        return prompt

    # Otherwise, use the individual parameters
    if age_range:
        customizations.append(f"Age range: {age_range}")
    if gender:
        customizations.append(f"Gender: {gender}")
    if occupation_type:
        customizations.append(f"Occupation type: {occupation_type}")
    if education_level:
        customizations.append(f"Education level: {education_level}")
    if location_type:
        customizations.append(f"Location: {location_type}")
    if personality_traits:
        customizations.append(f"Personality traits: {personality_traits}")
    if interests:
        customizations.append(f"Interests: {interests}")

    if not customizations:
        return None

    return "Create a persona with these characteristics: " + "; ".join(customizations)


async def enhance_audience_brief(
    audience_brief: str,
    research_objective: str,
    temperature: float = 1.0
) -> Dict[str, Any]:
    """
    Enhance audience brief and research objective with AI-generated improvements.

    Args:
        audience_brief: The audience brief to enhance
        research_objective: The research objective to enhance
        temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)

    Returns:
        A dictionary with:
        - 'enhanced_audience_brief': The enhanced audience brief text
        - 'enhanced_research_objective': The enhanced research objective text
        - 'assumptions': List of assumptions/additions made

    Raises:
        PersonaGenerationError: If there's an issue with the AI generation or JSON parsing
    """
    try:
        # Load and format the prompt with both fields
        try:
            final_prompt = load_prompt('audience-brief-enhancement', {
                'audience_brief': audience_brief,
                'research_objective': research_objective
            })
        except PromptLoaderError as e:
            raise PersonaGenerationError(f"Error loading enhancement prompt: {str(e)}")

        # Generate enhanced content using the LLM service
        try:
            raw_response = await LLMService.generate_content(
                prompt=final_prompt,
                temperature=temperature
            )

            # Clean up the response for proper JSON parsing
            clean_response = raw_response.strip()

            # Remove markdown code blocks if present
            if clean_response.startswith("```json"):
                clean_response = clean_response.strip("```json").strip("```").strip()
            elif clean_response.startswith("```"):
                clean_response = clean_response.strip("```").strip()

            # Try to find the JSON object in the response if there's extra text
            if not clean_response.startswith("{"):
                start_idx = clean_response.find("{")
                if start_idx != -1:
                    end_idx = clean_response.rfind("}")
                    if end_idx != -1 and end_idx > start_idx:
                        clean_response = clean_response[start_idx:end_idx+1]

            # Parse the JSON response
            try:
                enhancement_result = json.loads(clean_response)

                # Verify it's an object
                if not isinstance(enhancement_result, dict):
                    raise PersonaGenerationError(f"Expected a JSON object but got {type(enhancement_result)}")

                # Verify required keys exist
                required_keys = ['enhanced_audience_brief', 'enhanced_research_objective', 'assumptions']
                for key in required_keys:
                    if key not in enhancement_result:
                        raise PersonaGenerationError(f"Response missing required key: '{key}'")

                # Verify enhanced texts are strings
                if not isinstance(enhancement_result['enhanced_audience_brief'], str):
                    raise PersonaGenerationError("enhanced_audience_brief must be a string")
                if not isinstance(enhancement_result['enhanced_research_objective'], str):
                    raise PersonaGenerationError("enhanced_research_objective must be a string")

                # Verify assumptions is a list
                if not isinstance(enhancement_result['assumptions'], list):
                    raise PersonaGenerationError("assumptions must be an array")

                # Convert any non-string assumptions to strings
                for i, assumption in enumerate(enhancement_result['assumptions']):
                    if not isinstance(assumption, str):
                        enhancement_result['assumptions'][i] = str(assumption)

            except json.JSONDecodeError as e:
                raise PersonaGenerationError(f"Failed to parse JSON response: {str(e)}. Raw response: {clean_response[:200]}...")

        except LLMServiceError as e:
            raise PersonaGenerationError(f"Error from LLM service: {str(e)}")

        # Validate we got meaningful content
        if not enhancement_result['enhanced_audience_brief'].strip():
            raise PersonaGenerationError("Enhanced audience brief is empty")
        if not enhancement_result['enhanced_research_objective'].strip():
            raise PersonaGenerationError("Enhanced research objective is empty")

        return enhancement_result

    except Exception as e:
        if isinstance(e, PersonaGenerationError):
            raise
        raise PersonaGenerationError(f"Error enhancing audience brief: {str(e)}")