diff --git a/backend/app/services/ai_persona_service.py b/backend/app/services/ai_persona_service.py index 650f2ae5..eee84842 100644 --- a/backend/app/services/ai_persona_service.py +++ b/backend/app/services/ai_persona_service.py @@ -117,7 +117,8 @@ async def generate_basic_personas( count: int = 5, temperature: float = 1.0, customer_data_session_id: Optional[str] = None, - llm_model: Optional[str] = None + llm_model: Optional[str] = None, + max_retries: int = 2 ) -> List[Dict[str, Any]]: """ Generate basic profiles for multiple personas based on a research brief. @@ -129,13 +130,61 @@ async def generate_basic_personas( temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative) customer_data_session_id: Optional session ID for customer data context llm_model: Optional LLM model to use for generation - + max_retries: Maximum number of retry attempts for failed generations + Returns: A list of dictionaries containing basic persona data - + Raises: PersonaGenerationError: If there's an issue with the AI generation or JSON parsing """ + last_error = None + + for attempt in range(max_retries + 1): + try: + if attempt > 0: + print(f"🔄 Backend: Retry attempt {attempt}/{max_retries} for basic persona generation") + + return await _generate_basic_personas_attempt( + audience_brief=audience_brief, + research_objective=research_objective, + count=count, + temperature=temperature, + customer_data_session_id=customer_data_session_id, + llm_model=llm_model, + attempt=attempt + 1 + ) + + except PersonaGenerationError as e: + last_error = e + if attempt < max_retries: + print(f"⚠️ Backend: Attempt {attempt + 1} failed: {str(e)}") + print(f"🔄 Backend: Will retry ({max_retries - attempt} attempts remaining)") + continue + else: + print(f"❌ Backend: All {max_retries + 1} attempts failed") + raise e + except Exception as e: + if isinstance(e, PersonaGenerationError): + raise + raise PersonaGenerationError(f"Error generating basic personas: {str(e)}") + + # This should never be reached, but just in case + raise last_error if last_error else PersonaGenerationError("Failed to generate basic personas after all retries") + + +async def _generate_basic_personas_attempt( + audience_brief: str, + research_objective: Optional[str] = None, + count: int = 5, + temperature: float = 1.0, + customer_data_session_id: Optional[str] = None, + llm_model: Optional[str] = None, + attempt: int = 1 +) -> List[Dict[str, Any]]: + """ + Internal function to attempt generating basic personas. Separated for retry logic. + """ try: # Load customer data context if session ID provided customer_data_context = '' @@ -147,7 +196,7 @@ async def generate_basic_personas( customer_data_context = "No customer data available for this session." else: customer_data_context = "No customer data provided." - + # Load and format the prompt with the audience brief and count try: final_prompt = load_prompt('persona-basic-generation', { @@ -158,7 +207,7 @@ async def generate_basic_personas( }) except PromptLoaderError as e: raise PersonaGenerationError(f"Error loading prompt: {str(e)}") - + # Add additional safeguards for JSON parsing try: # Load system prompt and generate raw content @@ -166,26 +215,27 @@ async def generate_basic_personas( system_prompt = load_prompt('persona-system') except PromptLoaderError as e: raise PersonaGenerationError(f"Error loading system prompt: {str(e)}") - - # Log the LLM API call - print(f"🤖 Backend: Making LLM API call to {llm_model or 'gemini-2.5-pro'} for basic persona generation") - + + # Log the LLM API call with attempt number + attempt_text = f" (attempt {attempt})" if attempt > 1 else "" + print(f"🤖 Backend: Making LLM API call to {llm_model or 'gemini-2.5-pro'} for basic persona generation{attempt_text}") + raw_response = await LLMService.generate_content( prompt=final_prompt, temperature=temperature, system_prompt=system_prompt, model_name=llm_model ) - + # Enhanced JSON cleaning for high-temperature responses clean_response = raw_response - + # Remove markdown code blocks if present if clean_response.startswith("```json"): clean_response = clean_response.strip("```json").strip("```").strip() elif clean_response.startswith("```"): clean_response = clean_response.strip("```").strip() - + # Try to find the JSON array in the response if there's extra text if not clean_response.startswith("["): # Look for the opening bracket @@ -195,79 +245,198 @@ async def generate_basic_personas( end_idx = clean_response.rfind("]") if end_idx != -1 and end_idx > start_idx: clean_response = clean_response[start_idx:end_idx+1] - + # Sanitize JSON for high-temperature responses clean_response = _sanitize_json_response(clean_response) - + # Parse the JSON manually try: - print(f"Attempting to parse JSON array: {clean_response[:100]}...") + print(f"Attempting to parse JSON array{attempt_text}: {clean_response[:100]}...") personas_array = json.loads(clean_response) - + # Verify it's an array if not isinstance(personas_array, list): raise PersonaGenerationError(f"Expected an array of personas but got {type(personas_array)}") - + except json.JSONDecodeError as e: # Enhanced error logging for high-temperature JSON issues error_pos = getattr(e, 'pos', 0) error_context = clean_response[max(0, error_pos-50):error_pos+50] if error_pos > 0 else clean_response[:100] - - print(f"JSON Parse Error at position {error_pos}: {str(e)}") - print(f"Error context: ...{error_context}...") - print(f"Temperature might be too high (>{temperature or 'unknown'}) causing malformed JSON") - + + print(f"❌ Backend: JSON Parse Error at position {error_pos}{attempt_text}: {str(e)}") + print(f"❌ Backend: Error context{attempt_text}: ...{error_context}...") + raise PersonaGenerationError( - f"Failed to parse JSON response: {str(e)}. " - f"This often happens with high temperature values (>{temperature or 'unknown'}). " - f"Try lowering the temperature to 1.0 or below for more reliable JSON formatting. " + f"Failed to parse JSON response on attempt {attempt}: {str(e)}. " f"Context: ...{error_context[:100]}..." ) - + except LLMServiceError as e: - raise PersonaGenerationError(f"Error from LLM service: {str(e)}") - + raise PersonaGenerationError(f"Error from LLM service on attempt {attempt}: {str(e)}") + # Validate we got an array with the right count if not isinstance(personas_array, list): - raise PersonaGenerationError(f"Expected an array of personas but got {type(personas_array)}") - + raise PersonaGenerationError(f"Expected an array of personas but got {type(personas_array)} on attempt {attempt}") + # Check if we got at least one persona if len(personas_array) == 0: - raise PersonaGenerationError("No personas were generated") - + raise PersonaGenerationError(f"No personas were generated on attempt {attempt}") + # If we got fewer personas than requested, log a warning but continue if len(personas_array) < count: - print(f"Warning: Requested {count} personas but only got {len(personas_array)}") - - # Basic validation of each persona - required_fields = ["name", "age", "gender", "occupation", "personality"] + print(f"⚠️ Backend: Warning on attempt {attempt}: Requested {count} personas but only got {len(personas_array)}") + + # Enhanced validation and completion of each persona + required_fields = ["name", "age", "gender", "occupation", "education", "location", "techSavviness", "personality", "interests"] + completed_personas = [] + for i, persona in enumerate(personas_array): missing_fields = [field for field in required_fields if field not in persona] + + # Attempt field completion for missing fields if missing_fields: - raise PersonaGenerationError( - f"Persona {i+1} is missing required fields: {', '.join(missing_fields)}" - ) - + print(f"⚠️ Backend: Persona {i+1} on attempt {attempt} is missing fields: {missing_fields}") + print(f"🔧 Backend: Attempting to complete missing fields for persona {i+1}") + + # Try to complete missing fields based on existing data + persona = _complete_missing_persona_fields(persona, missing_fields, attempt) + + # Re-check for still missing fields after completion attempt + still_missing = [field for field in required_fields if field not in persona] + if still_missing: + print(f"❌ Backend: Persona {i+1} validation failed on attempt {attempt} - Still missing fields after completion: {still_missing}") + print(f"❌ Backend: Persona {i+1} actual fields: {list(persona.keys())}") + print(f"❌ Backend: Persona {i+1} data: {json.dumps(persona, indent=2)[:500]}...") + if attempt == 1: # Only log full response on first attempt to avoid spam + print(f"❌ Backend: Full LLM response for debugging: {clean_response[:1000]}...") + raise PersonaGenerationError( + f"Persona {i+1} ({persona.get('name', 'Unknown')}) is still missing required fields after completion attempt: {', '.join(still_missing)} on attempt {attempt}. " + f"Expected fields: {required_fields}. " + f"Actual fields: {list(persona.keys())}. " + f"This suggests the LLM did not follow the prompt instructions correctly." + ) + else: + print(f"✅ Backend: Successfully completed missing fields for persona {i+1}") + # Validate that age is a single number, not a range age_value = persona.get("age", "") if isinstance(age_value, str) and "-" in age_value: raise PersonaGenerationError( - f"Persona {i+1} has an invalid age range '{age_value}'. Age must be a single specific number (e.g., '35', not '35-42')" + f"Persona {i+1} has an invalid age range '{age_value}' on attempt {attempt}. Age must be a single specific number (e.g., '35', not '35-42')" ) - + # Validate that age is numeric age_str = str(age_value).strip() if not age_str.isdigit(): raise PersonaGenerationError( - f"Persona {i+1} has an invalid age '{age_value}'. Age must be a numeric value (e.g., '35')" + f"Persona {i+1} has an invalid age '{age_value}' on attempt {attempt}. Age must be a numeric value (e.g., '35')" ) - - return personas_array - + + completed_personas.append(persona) + + print(f"✅ Backend: Successfully validated {len(completed_personas)} basic personas on attempt {attempt}") + return completed_personas + except Exception as e: if isinstance(e, PersonaGenerationError): raise - raise PersonaGenerationError(f"Error generating basic personas: {str(e)}") + raise PersonaGenerationError(f"Error generating basic personas on attempt {attempt}: {str(e)}") + + +def _complete_missing_persona_fields(persona: Dict[str, Any], missing_fields: List[str], attempt: int) -> Dict[str, Any]: + """ + Attempt to complete missing persona fields with reasonable defaults based on existing data. + + Args: + persona: The persona dict with some missing fields + missing_fields: List of field names that are missing + attempt: The current attempt number for logging + + Returns: + Updated persona dict with completed fields where possible + """ + completed_persona = persona.copy() + + # Define fallback values based on available data or reasonable defaults + fallback_values = { + "name": f"Generated Person {attempt}", + "age": "30", + "gender": "Non-binary", + "occupation": "Professional", + "education": "Bachelor's Degree", + "location": "Urban Area", + "techSavviness": 50, + "personality": "Well-rounded individual with diverse interests", + "interests": "Technology, reading, socializing" + } + + # Smart completion based on existing persona data + for field in missing_fields: + if field == "name" and "gender" in persona: + # Generate a more appropriate name based on gender + gender = persona.get("gender", "").lower() + if "male" in gender and "fe" not in gender: + completed_persona[field] = f"John Person {attempt}" + elif "female" in gender: + completed_persona[field] = f"Jane Person {attempt}" + else: + completed_persona[field] = fallback_values[field] + + elif field == "age" and "occupation" in persona: + # Estimate age based on occupation + occupation = persona.get("occupation", "").lower() + if "student" in occupation: + completed_persona[field] = "22" + elif "senior" in occupation or "manager" in occupation or "director" in occupation: + completed_persona[field] = "45" + elif "entry" in occupation or "junior" in occupation: + completed_persona[field] = "25" + else: + completed_persona[field] = fallback_values[field] + + elif field == "techSavviness" and "occupation" in persona: + # Estimate tech savviness based on occupation + occupation = persona.get("occupation", "").lower() + if any(tech_word in occupation for tech_word in ["engineer", "developer", "programmer", "tech", "software", "it", "data", "analyst"]): + completed_persona[field] = 85 + elif any(word in occupation for word in ["teacher", "manager", "marketing", "business"]): + completed_persona[field] = 65 + else: + completed_persona[field] = fallback_values[field] + + elif field == "education" and "occupation" in persona: + # Estimate education based on occupation + occupation = persona.get("occupation", "").lower() + if any(word in occupation for word in ["doctor", "engineer", "lawyer", "professor", "researcher"]): + completed_persona[field] = "Master's Degree" + elif any(word in occupation for word in ["technician", "assistant", "clerk"]): + completed_persona[field] = "High School" + else: + completed_persona[field] = fallback_values[field] + + elif field == "personality" and any(key in persona for key in ["occupation", "interests"]): + # Generate personality based on occupation or interests + occupation = persona.get("occupation", "").lower() + interests = persona.get("interests", "").lower() + + if "creative" in occupation or "art" in occupation or "design" in occupation: + completed_persona[field] = "Creative and artistic individual with strong aesthetic sensibilities" + elif "engineer" in occupation or "technical" in occupation: + completed_persona[field] = "Analytical and detail-oriented professional who values precision" + elif "teaching" in occupation or "education" in occupation: + completed_persona[field] = "Patient and communicative individual who enjoys helping others learn" + elif "sports" in interests or "fitness" in interests: + completed_persona[field] = "Active and health-conscious person with competitive spirit" + else: + completed_persona[field] = fallback_values[field] + + else: + # Use fallback value + completed_persona[field] = fallback_values[field] + + print(f"🔧 Backend: Completed missing field '{field}' for persona with value: {completed_persona[field]}") + + return completed_persona async def generate_persona( diff --git a/backend/app/services/llm_service.py b/backend/app/services/llm_service.py index a9499470..8acba2cb 100644 --- a/backend/app/services/llm_service.py +++ b/backend/app/services/llm_service.py @@ -23,7 +23,7 @@ gemini_client = genai.Client(api_key=GEMINI_API_KEY) # Set up OpenAI API key OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'REDACTED_OPENAI_KEY') -openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY) +openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0) # The default model we're using DEFAULT_MODEL = "gemini-2.5-pro" diff --git a/backend/prompts/persona-basic-generation.md b/backend/prompts/persona-basic-generation.md index c2da0fba..7a2552eb 100644 --- a/backend/prompts/persona-basic-generation.md +++ b/backend/prompts/persona-basic-generation.md @@ -11,6 +11,18 @@ Research Objective: Customer Data Context: {customer_data_context} +**CRITICAL REQUIRED FIELDS - EVERY PERSONA MUST INCLUDE ALL OF THESE:** +The following fields are absolutely mandatory for each persona. Missing any of these will cause the generation to fail: +- "name" (string): Full name of the persona +- "age" (string): Specific age as a single number (e.g., "35") +- "gender" (string): Gender identity +- "occupation" (string): Current job/profession +- "education" (string): Education level +- "location" (string): Geographic location +- "techSavviness" (number): Tech skill level from 0-100 +- "personality" (string): Personality description +- "interests" (string): Personal interests and hobbies + For each persona, provide these basic demographic and personality details: - Make sure personas are diverse and represent different segments of the population relevant to the audience brief - If a research objective is provided, ensure personas would have different perspectives and experiences related to that specific research topic @@ -54,7 +66,15 @@ EXAMPLE_JSON_END CRITICAL AGE REQUIREMENT: The "age" field MUST contain a single, specific number (e.g., "35", "42") representing the persona's exact age. DO NOT use age ranges (e.g., "35-42", "30-35"). These are individual personas and each person has one specific age, not a range. -IMPORTANT: +**VALIDATION REQUIREMENTS - READ AND FOLLOW:** +Before submitting your response, you MUST verify that: +1. Every persona contains ALL 9 required fields listed above +2. No persona is missing any required field +3. All field values are properly formatted (strings in quotes, numbers without quotes) +4. The JSON is valid and properly escaped +5. You have generated exactly {count} personas + +IMPORTANT: - Return EXACTLY {count} personas in a JSON array format - Do not include any comments (like "// Second persona") in the JSON - Do not include any text before or after the JSON array @@ -64,4 +84,5 @@ IMPORTANT: - All string values must be valid JSON strings with proper escaping (use \" for quotes, \\n for newlines, etc.) - Ensure diversity among the personas (different ages, genders, backgrounds, etc.) - Make each persona relevant to both the audience brief AND research objective provided -- If no research objective is provided, focus solely on the audience brief \ No newline at end of file +- If no research objective is provided, focus solely on the audience brief +- DOUBLE-CHECK: Every persona must have name, age, gender, occupation, education, location, techSavviness, personality, and interests fields \ No newline at end of file diff --git a/dist/index.html b/dist/index.html index 85660fbd..8013a1d3 100644 --- a/dist/index.html +++ b/dist/index.html @@ -7,7 +7,7 @@ - + diff --git a/src/components/FocusGroupModerator.tsx b/src/components/FocusGroupModerator.tsx index 63fc96af..16c85b98 100644 --- a/src/components/FocusGroupModerator.tsx +++ b/src/components/FocusGroupModerator.tsx @@ -1089,8 +1089,8 @@ export default function FocusGroupModerator({ draftToEdit, onDraftSaved, preSele // Generate discussion guide based on form input (after database is updated) const guide = await generateDiscussionGuide(values, focusGroupId); - // Check if generation was cancelled (returns empty string) - if (!guide || guide.trim() === '') { + // Check if generation was cancelled (returns empty string or object) + if (!guide || (typeof guide === 'string' && guide.trim() === '')) { console.log('Discussion guide generation was cancelled'); return; // Exit early, don't process or show success toasts }