- Model renames: gpt-5.2 → gpt-5.4-2026-03-05, gemini-3-pro-preview → gemini-3.1-pro-preview; retire gpt-4.1 via alias fallback - New: llm_usage_context.py (ContextVar-based attribution), model_pricing.py (tiered pricing + 60s cache), usage_event.py (append-only telemetry), quota.py (user/FG quota enforcement with 80% warning) - Wire _record_usage into all 3 LLM methods; set_llm_context at every service entry point - Fix admin_required decorator (was sync, never awaited User.find_by_id); add active_required and with_user_context decorators - Inject user_id into ContextVar from JWT on every authenticated request - Add DB indexes for usage_events, model_pricing, users collections - Seed script for model pricing (gpt-5.4 single-tier, gemini-3.1 two-tier 200k threshold) - Fix parse_json_response NameError (logger undefined at module level) - 70 passing tests: conftest.py with sys.modules stubs, test_usage_infrastructure.py (52 tests), rewrite stale test_llm_service.py (18 tests) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
930 lines
No EOL
40 KiB
Python
Executable file
930 lines
No EOL
40 KiB
Python
Executable file
"""
|
|
AI Persona Generation Service using Google's Gemini model.
|
|
This service handles the integration with the Gemini API to generate
|
|
synthetic persona data based on a predefined prompt.
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import uuid
|
|
from typing import Dict, Any, Optional, List
|
|
from pydantic import BaseModel, ValidationError
|
|
from datetime import datetime
|
|
|
|
from .llm_service import LLMService, LLMServiceError
|
|
from .customer_data_service import customer_data_service
|
|
from app.utils.prompt_loader import load_prompt, PromptLoaderError
|
|
|
|
|
|
|
|
|
|
|
|
class PersonaGenerationError(Exception):
|
|
"""Exception raised for errors in the persona generation process."""
|
|
pass
|
|
|
|
|
|
def _sanitize_persona_data_for_json(persona_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Sanitize persona data to make it JSON serializable.
|
|
|
|
Args:
|
|
persona_data: The persona data dictionary that may contain non-serializable objects
|
|
|
|
Returns:
|
|
A sanitized dictionary that can be JSON serialized
|
|
"""
|
|
sanitized = {}
|
|
|
|
for key, value in persona_data.items():
|
|
if isinstance(value, datetime):
|
|
# Convert datetime to ISO string
|
|
sanitized[key] = value.isoformat()
|
|
elif isinstance(value, dict):
|
|
# Recursively sanitize nested dictionaries
|
|
sanitized[key] = _sanitize_persona_data_for_json(value)
|
|
elif isinstance(value, list):
|
|
# Sanitize list items
|
|
sanitized[key] = [
|
|
_sanitize_persona_data_for_json(item) if isinstance(item, dict)
|
|
else item.isoformat() if isinstance(item, datetime)
|
|
else item
|
|
for item in value
|
|
]
|
|
else:
|
|
# Keep other values as-is
|
|
sanitized[key] = value
|
|
|
|
return sanitized
|
|
|
|
|
|
def _sanitize_json_response(response: str) -> str:
|
|
"""
|
|
Sanitize JSON response from LLM to handle high-temperature artifacts.
|
|
|
|
Args:
|
|
response: Raw JSON response string from LLM
|
|
|
|
Returns:
|
|
Sanitized JSON string safe for parsing
|
|
"""
|
|
import re
|
|
|
|
# Step 1: Remove invalid control characters (but preserve valid whitespace)
|
|
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', response)
|
|
|
|
# Step 2: Replace smart quotes and similar characters
|
|
sanitized = sanitized.replace('"', '"').replace('"', '"')
|
|
sanitized = sanitized.replace(''', "'").replace(''', "'")
|
|
sanitized = sanitized.replace('…', '...')
|
|
|
|
# Step 3: Remove trailing commas
|
|
sanitized = re.sub(r',(\s*[}\]])', r'\1', sanitized)
|
|
|
|
# Step 4: Try to fix common newline issues in strings
|
|
# Replace unescaped newlines within string values
|
|
lines = sanitized.split('\n')
|
|
fixed_lines = []
|
|
in_string = False
|
|
string_char = None
|
|
|
|
for line in lines:
|
|
if not in_string:
|
|
fixed_lines.append(line)
|
|
else:
|
|
# We're continuing a string from previous line
|
|
fixed_lines[-1] += '\\n' + line.strip()
|
|
|
|
# Track if we're inside a string
|
|
i = 0
|
|
while i < len(line):
|
|
char = line[i]
|
|
if char in ['"', "'"] and (i == 0 or line[i-1] != '\\'):
|
|
if not in_string:
|
|
in_string = True
|
|
string_char = char
|
|
elif char == string_char:
|
|
in_string = False
|
|
string_char = None
|
|
i += 1
|
|
|
|
return '\n'.join(fixed_lines).strip()
|
|
|
|
|
|
async def generate_basic_personas(
|
|
audience_brief: str,
|
|
research_objective: Optional[str] = None,
|
|
count: int = 5,
|
|
temperature: float = 1.0,
|
|
customer_data_session_id: Optional[str] = None,
|
|
llm_model: Optional[str] = None,
|
|
max_retries: int = 2
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Generate basic profiles for multiple personas based on a research brief.
|
|
|
|
Args:
|
|
audience_brief: The audience brief to guide persona generation
|
|
research_objective: Optional research objective to focus persona goals and scenarios
|
|
count: Number of basic personas to generate
|
|
temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)
|
|
customer_data_session_id: Optional session ID for customer data context
|
|
llm_model: Optional LLM model to use for generation
|
|
max_retries: Maximum number of retry attempts for failed generations
|
|
|
|
Returns:
|
|
A list of dictionaries containing basic persona data
|
|
|
|
Raises:
|
|
PersonaGenerationError: If there's an issue with the AI generation or JSON parsing
|
|
"""
|
|
from app.services.llm_usage_context import set_llm_context
|
|
set_llm_context(feature="persona_generate")
|
|
last_error = None
|
|
|
|
for attempt in range(max_retries + 1):
|
|
try:
|
|
if attempt > 0:
|
|
print(f"🔄 Backend: Retry attempt {attempt}/{max_retries} for basic persona generation")
|
|
|
|
return await _generate_basic_personas_attempt(
|
|
audience_brief=audience_brief,
|
|
research_objective=research_objective,
|
|
count=count,
|
|
temperature=temperature,
|
|
customer_data_session_id=customer_data_session_id,
|
|
llm_model=llm_model,
|
|
attempt=attempt + 1
|
|
)
|
|
|
|
except PersonaGenerationError as e:
|
|
last_error = e
|
|
if attempt < max_retries:
|
|
print(f"⚠️ Backend: Attempt {attempt + 1} failed: {str(e)}")
|
|
print(f"🔄 Backend: Will retry ({max_retries - attempt} attempts remaining)")
|
|
continue
|
|
else:
|
|
print(f"❌ Backend: All {max_retries + 1} attempts failed")
|
|
raise e
|
|
except Exception as e:
|
|
if isinstance(e, PersonaGenerationError):
|
|
raise
|
|
raise PersonaGenerationError(f"Error generating basic personas: {str(e)}")
|
|
|
|
# This should never be reached, but just in case
|
|
raise last_error if last_error else PersonaGenerationError("Failed to generate basic personas after all retries")
|
|
|
|
|
|
async def _generate_basic_personas_attempt(
|
|
audience_brief: str,
|
|
research_objective: Optional[str] = None,
|
|
count: int = 5,
|
|
temperature: float = 1.0,
|
|
customer_data_session_id: Optional[str] = None,
|
|
llm_model: Optional[str] = None,
|
|
attempt: int = 1
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Internal function to attempt generating basic personas. Separated for retry logic.
|
|
"""
|
|
try:
|
|
# Load customer data context if session ID provided
|
|
customer_data_context = ''
|
|
if customer_data_session_id:
|
|
customer_data_content = customer_data_service.get_parsed_markdown_content(customer_data_session_id)
|
|
if customer_data_content:
|
|
customer_data_context = f"The following customer data was uploaded and should be used to inform persona creation:\n\n{customer_data_content}"
|
|
else:
|
|
customer_data_context = "No customer data available for this session."
|
|
else:
|
|
customer_data_context = "No customer data provided."
|
|
|
|
# Load and format the prompt with the audience brief and count
|
|
try:
|
|
final_prompt = load_prompt('persona-basic-generation', {
|
|
'audience_brief': audience_brief,
|
|
'research_objective': research_objective or '',
|
|
'count': count,
|
|
'customer_data_context': customer_data_context
|
|
})
|
|
except PromptLoaderError as e:
|
|
raise PersonaGenerationError(f"Error loading prompt: {str(e)}")
|
|
|
|
# Add additional safeguards for JSON parsing
|
|
try:
|
|
# Load system prompt and generate raw content
|
|
try:
|
|
system_prompt = load_prompt('persona-system')
|
|
except PromptLoaderError as e:
|
|
raise PersonaGenerationError(f"Error loading system prompt: {str(e)}")
|
|
|
|
# Log the LLM API call with attempt number
|
|
attempt_text = f" (attempt {attempt})" if attempt > 1 else ""
|
|
print(f"🤖 Backend: Making LLM API call to {llm_model or 'gemini-3-pro-preview'} for basic persona generation{attempt_text}")
|
|
|
|
raw_response = await LLMService.generate_content(
|
|
prompt=final_prompt,
|
|
temperature=temperature,
|
|
system_prompt=system_prompt,
|
|
model_name=llm_model
|
|
)
|
|
|
|
# Enhanced JSON cleaning for high-temperature responses
|
|
clean_response = raw_response
|
|
|
|
# Remove markdown code blocks if present
|
|
if clean_response.startswith("```json"):
|
|
clean_response = clean_response.strip("```json").strip("```").strip()
|
|
elif clean_response.startswith("```"):
|
|
clean_response = clean_response.strip("```").strip()
|
|
|
|
# Try to find the JSON array in the response if there's extra text
|
|
if not clean_response.startswith("["):
|
|
# Look for the opening bracket
|
|
start_idx = clean_response.find("[")
|
|
if start_idx != -1:
|
|
# Find the matching closing bracket
|
|
end_idx = clean_response.rfind("]")
|
|
if end_idx != -1 and end_idx > start_idx:
|
|
clean_response = clean_response[start_idx:end_idx+1]
|
|
|
|
# Sanitize JSON for high-temperature responses
|
|
clean_response = _sanitize_json_response(clean_response)
|
|
|
|
# Parse the JSON manually
|
|
try:
|
|
print(f"Attempting to parse JSON array{attempt_text}: {clean_response[:100]}...")
|
|
personas_array = json.loads(clean_response)
|
|
|
|
# Verify it's an array
|
|
if not isinstance(personas_array, list):
|
|
raise PersonaGenerationError(f"Expected an array of personas but got {type(personas_array)}")
|
|
|
|
except json.JSONDecodeError as e:
|
|
# Enhanced error logging for high-temperature JSON issues
|
|
error_pos = getattr(e, 'pos', 0)
|
|
error_context = clean_response[max(0, error_pos-50):error_pos+50] if error_pos > 0 else clean_response[:100]
|
|
|
|
print(f"❌ Backend: JSON Parse Error at position {error_pos}{attempt_text}: {str(e)}")
|
|
print(f"❌ Backend: Error context{attempt_text}: ...{error_context}...")
|
|
|
|
raise PersonaGenerationError(
|
|
f"Failed to parse JSON response on attempt {attempt}: {str(e)}. "
|
|
f"Context: ...{error_context[:100]}..."
|
|
)
|
|
|
|
except LLMServiceError as e:
|
|
raise PersonaGenerationError(f"Error from LLM service on attempt {attempt}: {str(e)}")
|
|
|
|
# Validate we got an array with the right count
|
|
if not isinstance(personas_array, list):
|
|
raise PersonaGenerationError(f"Expected an array of personas but got {type(personas_array)} on attempt {attempt}")
|
|
|
|
# Check if we got at least one persona
|
|
if len(personas_array) == 0:
|
|
raise PersonaGenerationError(f"No personas were generated on attempt {attempt}")
|
|
|
|
# If we got fewer personas than requested, log a warning but continue
|
|
if len(personas_array) < count:
|
|
print(f"⚠️ Backend: Warning on attempt {attempt}: Requested {count} personas but only got {len(personas_array)}")
|
|
|
|
# Enhanced validation and completion of each persona
|
|
required_fields = ["name", "age", "gender", "occupation", "education", "location", "techSavviness", "personality", "interests"]
|
|
completed_personas = []
|
|
|
|
for i, persona in enumerate(personas_array):
|
|
missing_fields = [field for field in required_fields if field not in persona]
|
|
|
|
# Attempt field completion for missing fields
|
|
if missing_fields:
|
|
print(f"⚠️ Backend: Persona {i+1} on attempt {attempt} is missing fields: {missing_fields}")
|
|
print(f"🔧 Backend: Attempting to complete missing fields for persona {i+1}")
|
|
|
|
# Try to complete missing fields based on existing data
|
|
persona = _complete_missing_persona_fields(persona, missing_fields, attempt)
|
|
|
|
# Re-check for still missing fields after completion attempt
|
|
still_missing = [field for field in required_fields if field not in persona]
|
|
if still_missing:
|
|
print(f"❌ Backend: Persona {i+1} validation failed on attempt {attempt} - Still missing fields after completion: {still_missing}")
|
|
print(f"❌ Backend: Persona {i+1} actual fields: {list(persona.keys())}")
|
|
print(f"❌ Backend: Persona {i+1} data: {json.dumps(persona, indent=2)[:500]}...")
|
|
if attempt == 1: # Only log full response on first attempt to avoid spam
|
|
print(f"❌ Backend: Full LLM response for debugging: {clean_response[:1000]}...")
|
|
raise PersonaGenerationError(
|
|
f"Persona {i+1} ({persona.get('name', 'Unknown')}) is still missing required fields after completion attempt: {', '.join(still_missing)} on attempt {attempt}. "
|
|
f"Expected fields: {required_fields}. "
|
|
f"Actual fields: {list(persona.keys())}. "
|
|
f"This suggests the LLM did not follow the prompt instructions correctly."
|
|
)
|
|
else:
|
|
print(f"✅ Backend: Successfully completed missing fields for persona {i+1}")
|
|
|
|
# Validate that age is a single number, not a range
|
|
age_value = persona.get("age", "")
|
|
if isinstance(age_value, str) and "-" in age_value:
|
|
raise PersonaGenerationError(
|
|
f"Persona {i+1} has an invalid age range '{age_value}' on attempt {attempt}. Age must be a single specific number (e.g., '35', not '35-42')"
|
|
)
|
|
|
|
# Validate that age is numeric
|
|
age_str = str(age_value).strip()
|
|
if not age_str.isdigit():
|
|
raise PersonaGenerationError(
|
|
f"Persona {i+1} has an invalid age '{age_value}' on attempt {attempt}. Age must be a numeric value (e.g., '35')"
|
|
)
|
|
|
|
completed_personas.append(persona)
|
|
|
|
print(f"✅ Backend: Successfully validated {len(completed_personas)} basic personas on attempt {attempt}")
|
|
return completed_personas
|
|
|
|
except Exception as e:
|
|
if isinstance(e, PersonaGenerationError):
|
|
raise
|
|
raise PersonaGenerationError(f"Error generating basic personas on attempt {attempt}: {str(e)}")
|
|
|
|
|
|
def _complete_missing_persona_fields(persona: Dict[str, Any], missing_fields: List[str], attempt: int) -> Dict[str, Any]:
|
|
"""
|
|
Attempt to complete missing persona fields with reasonable defaults based on existing data.
|
|
|
|
Args:
|
|
persona: The persona dict with some missing fields
|
|
missing_fields: List of field names that are missing
|
|
attempt: The current attempt number for logging
|
|
|
|
Returns:
|
|
Updated persona dict with completed fields where possible
|
|
"""
|
|
completed_persona = persona.copy()
|
|
|
|
# Define fallback values based on available data or reasonable defaults
|
|
fallback_values = {
|
|
"name": f"Generated Person {attempt}",
|
|
"age": "30",
|
|
"gender": "Non-binary",
|
|
"occupation": "Professional",
|
|
"education": "Bachelor's Degree",
|
|
"location": "Urban Area",
|
|
"techSavviness": 50,
|
|
"personality": "Well-rounded individual with diverse interests",
|
|
"interests": "Technology, reading, socializing"
|
|
}
|
|
|
|
# Smart completion based on existing persona data
|
|
for field in missing_fields:
|
|
if field == "name" and "gender" in persona:
|
|
# Generate a more appropriate name based on gender
|
|
gender = persona.get("gender", "").lower()
|
|
if "male" in gender and "fe" not in gender:
|
|
completed_persona[field] = f"John Person {attempt}"
|
|
elif "female" in gender:
|
|
completed_persona[field] = f"Jane Person {attempt}"
|
|
else:
|
|
completed_persona[field] = fallback_values[field]
|
|
|
|
elif field == "age" and "occupation" in persona:
|
|
# Estimate age based on occupation
|
|
occupation = persona.get("occupation", "").lower()
|
|
if "student" in occupation:
|
|
completed_persona[field] = "22"
|
|
elif "senior" in occupation or "manager" in occupation or "director" in occupation:
|
|
completed_persona[field] = "45"
|
|
elif "entry" in occupation or "junior" in occupation:
|
|
completed_persona[field] = "25"
|
|
else:
|
|
completed_persona[field] = fallback_values[field]
|
|
|
|
elif field == "techSavviness" and "occupation" in persona:
|
|
# Estimate tech savviness based on occupation
|
|
occupation = persona.get("occupation", "").lower()
|
|
if any(tech_word in occupation for tech_word in ["engineer", "developer", "programmer", "tech", "software", "it", "data", "analyst"]):
|
|
completed_persona[field] = 85
|
|
elif any(word in occupation for word in ["teacher", "manager", "marketing", "business"]):
|
|
completed_persona[field] = 65
|
|
else:
|
|
completed_persona[field] = fallback_values[field]
|
|
|
|
elif field == "education" and "occupation" in persona:
|
|
# Estimate education based on occupation
|
|
occupation = persona.get("occupation", "").lower()
|
|
if any(word in occupation for word in ["doctor", "engineer", "lawyer", "professor", "researcher"]):
|
|
completed_persona[field] = "Master's Degree"
|
|
elif any(word in occupation for word in ["technician", "assistant", "clerk"]):
|
|
completed_persona[field] = "High School"
|
|
else:
|
|
completed_persona[field] = fallback_values[field]
|
|
|
|
elif field == "personality" and any(key in persona for key in ["occupation", "interests"]):
|
|
# Generate personality based on occupation or interests
|
|
occupation = persona.get("occupation", "").lower()
|
|
interests = persona.get("interests", "").lower()
|
|
|
|
if "creative" in occupation or "art" in occupation or "design" in occupation:
|
|
completed_persona[field] = "Creative and artistic individual with strong aesthetic sensibilities"
|
|
elif "engineer" in occupation or "technical" in occupation:
|
|
completed_persona[field] = "Analytical and detail-oriented professional who values precision"
|
|
elif "teaching" in occupation or "education" in occupation:
|
|
completed_persona[field] = "Patient and communicative individual who enjoys helping others learn"
|
|
elif "sports" in interests or "fitness" in interests:
|
|
completed_persona[field] = "Active and health-conscious person with competitive spirit"
|
|
else:
|
|
completed_persona[field] = fallback_values[field]
|
|
|
|
else:
|
|
# Use fallback value
|
|
completed_persona[field] = fallback_values[field]
|
|
|
|
print(f"🔧 Backend: Completed missing field '{field}' for persona with value: {completed_persona[field]}")
|
|
|
|
return completed_persona
|
|
|
|
|
|
async def generate_persona(
|
|
prompt_customization: Optional[str] = None,
|
|
basic_persona: Optional[Dict[str, Any]] = None,
|
|
temperature: float = 1.0,
|
|
customer_data_session_id: Optional[str] = None,
|
|
llm_model: Optional[str] = None,
|
|
audience_brief: Optional[str] = None,
|
|
research_objective: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Generate a synthetic persona using the specified LLM model.
|
|
|
|
Args:
|
|
prompt_customization: Optional string to customize the generation
|
|
basic_persona: Optional dictionary containing basic persona data to start with
|
|
temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)
|
|
customer_data_session_id: Optional session ID for customer data context
|
|
llm_model: Optional LLM model to use for generation
|
|
audience_brief: Optional audience brief for research context
|
|
research_objective: Optional research objective for research context
|
|
|
|
Returns:
|
|
A dictionary containing the generated persona data
|
|
|
|
Raises:
|
|
PersonaGenerationError: If there's an issue with the AI generation or JSON parsing
|
|
"""
|
|
try:
|
|
from app.services.llm_usage_context import set_llm_context
|
|
set_llm_context(feature="persona_generate")
|
|
# If audience_brief or research_objective provided but no prompt_customization,
|
|
# generate customization so the LLM knows the research context
|
|
if not prompt_customization and (audience_brief or research_objective):
|
|
prompt_customization = customize_persona_prompt(
|
|
audience_brief=audience_brief,
|
|
research_objective=research_objective
|
|
)
|
|
# Load customer data context if session ID provided
|
|
customer_data_context = ''
|
|
if customer_data_session_id:
|
|
customer_data_content = customer_data_service.get_parsed_markdown_content(customer_data_session_id)
|
|
if customer_data_content:
|
|
customer_data_context = f"The following customer data was uploaded and should be used to inform persona creation:\n\n{customer_data_content}"
|
|
else:
|
|
customer_data_context = "No customer data available for this session."
|
|
else:
|
|
customer_data_context = "No customer data provided."
|
|
|
|
# Load the base prompt
|
|
try:
|
|
final_prompt = load_prompt('persona-detailed-generation', {
|
|
'customer_data_context': customer_data_context
|
|
})
|
|
except PromptLoaderError as e:
|
|
raise PersonaGenerationError(f"Error loading prompt: {str(e)}")
|
|
|
|
# Add customization if provided
|
|
if prompt_customization:
|
|
final_prompt = f"{final_prompt}\n\nAdditional customization: {prompt_customization}"
|
|
|
|
# Add basic persona data if provided
|
|
if basic_persona:
|
|
# Create a prompt section with the basic persona data
|
|
basic_data_str = "\nUse this basic profile as a starting point:\n"
|
|
basic_data_str += json.dumps(basic_persona, indent=2)
|
|
basic_data_str += "\n\nMaintain the demographic information above while expanding the persona with goals, frustrations, motivations, etc."
|
|
|
|
final_prompt = f"{final_prompt}\n{basic_data_str}"
|
|
|
|
try:
|
|
# Load system prompt and generate structured response
|
|
try:
|
|
system_prompt = load_prompt('persona-system')
|
|
except PromptLoaderError as e:
|
|
raise PersonaGenerationError(f"Error loading system prompt: {str(e)}")
|
|
|
|
# Log the LLM API call
|
|
persona_name = basic_persona.get('name', 'Unknown') if basic_persona else 'New Persona'
|
|
print(f"🤖 Backend: Making LLM API call to {llm_model or 'gemini-3-pro-preview'} for detailed persona generation of '{persona_name}'")
|
|
|
|
persona_data = await LLMService.generate_structured_response(
|
|
prompt=final_prompt,
|
|
temperature=temperature,
|
|
system_prompt=system_prompt,
|
|
model_name=llm_model
|
|
)
|
|
|
|
except LLMServiceError as e:
|
|
raise PersonaGenerationError(f"Error from LLM service: {str(e)}")
|
|
|
|
# Validate the required fields
|
|
required_fields = ["name", "age", "gender", "occupation", "location", "techSavviness", "personality"]
|
|
missing_fields = [field for field in required_fields if field not in persona_data]
|
|
|
|
if missing_fields:
|
|
raise PersonaGenerationError(f"Generated persona is missing required fields: {', '.join(missing_fields)}")
|
|
|
|
# Validate that age is a single number, not a range
|
|
age_value = persona_data.get("age", "")
|
|
if isinstance(age_value, str) and "-" in age_value:
|
|
raise PersonaGenerationError(
|
|
f"Generated persona has an invalid age range '{age_value}'. Age must be a single specific number (e.g., '35', not '35-42')"
|
|
)
|
|
|
|
# Validate that age is numeric
|
|
age_str = str(age_value).strip()
|
|
if not age_str.isdigit():
|
|
raise PersonaGenerationError(
|
|
f"Generated persona has an invalid age '{age_value}'. Age must be a numeric value (e.g., '35')"
|
|
)
|
|
|
|
# Generate ID if missing
|
|
if "id" not in persona_data:
|
|
persona_data["id"] = f"generated-{uuid.uuid4()}"
|
|
|
|
return persona_data
|
|
|
|
except Exception as e:
|
|
if isinstance(e, PersonaGenerationError):
|
|
raise
|
|
raise PersonaGenerationError(f"Error generating persona: {str(e)}")
|
|
|
|
|
|
async def generate_persona_summary(
|
|
persona_data: Dict[str, Any],
|
|
temperature: float = 1.0,
|
|
llm_model: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Generate a concise summary of a persona for display on persona cards.
|
|
|
|
Args:
|
|
persona_data: The complete persona data dictionary
|
|
temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)
|
|
llm_model: Optional LLM model to use for generation
|
|
|
|
Returns:
|
|
A dictionary containing aiSynthesizedBio, qualitativeAttributes, and topPersonalityTraits
|
|
|
|
Raises:
|
|
PersonaGenerationError: If there's an issue with the AI generation or JSON parsing
|
|
"""
|
|
try:
|
|
# Sanitize persona data for JSON serialization
|
|
sanitized_persona_data = _sanitize_persona_data_for_json(persona_data)
|
|
|
|
# Load and format the prompt with the persona data
|
|
try:
|
|
final_prompt = load_prompt('persona-summary-generation', {
|
|
'persona_data': json.dumps(sanitized_persona_data, indent=2)
|
|
})
|
|
except PromptLoaderError as e:
|
|
raise PersonaGenerationError(f"Error loading summary prompt: {str(e)}")
|
|
|
|
try:
|
|
# Load system prompt and generate structured response
|
|
try:
|
|
system_prompt = load_prompt('persona-system')
|
|
except PromptLoaderError as e:
|
|
raise PersonaGenerationError(f"Error loading system prompt: {str(e)}")
|
|
|
|
# Log the LLM API call
|
|
persona_name = persona_data.get('name', 'Unknown')
|
|
print(f"🤖 Backend: Making LLM API call to {llm_model or 'gemini-3-pro-preview'} for summary generation of '{persona_name}'")
|
|
|
|
raw_response = await LLMService.generate_content(
|
|
prompt=final_prompt,
|
|
temperature=temperature,
|
|
system_prompt=system_prompt,
|
|
model_name=llm_model
|
|
)
|
|
|
|
# Clean up the response for proper JSON parsing
|
|
clean_response = raw_response.strip()
|
|
|
|
# Remove markdown code blocks if present
|
|
if clean_response.startswith("```json"):
|
|
clean_response = clean_response.strip("```json").strip("```").strip()
|
|
elif clean_response.startswith("```"):
|
|
clean_response = clean_response.strip("```").strip()
|
|
|
|
# Try to find the JSON object in the response if there's extra text
|
|
if not clean_response.startswith("{"):
|
|
# Look for the opening brace
|
|
start_idx = clean_response.find("{")
|
|
if start_idx != -1:
|
|
# Find the matching closing brace
|
|
end_idx = clean_response.rfind("}")
|
|
if end_idx != -1 and end_idx > start_idx:
|
|
clean_response = clean_response[start_idx:end_idx+1]
|
|
|
|
# Parse the JSON manually
|
|
try:
|
|
print(f"Attempting to parse summary JSON: {clean_response[:100]}...")
|
|
summary_data = json.loads(clean_response)
|
|
|
|
# Verify it's a dictionary with required fields
|
|
if not isinstance(summary_data, dict):
|
|
raise PersonaGenerationError(f"Expected a summary object but got {type(summary_data)}")
|
|
|
|
required_fields = ["aiSynthesizedBio", "qualitativeAttributes", "topPersonalityTraits"]
|
|
missing_fields = [field for field in required_fields if field not in summary_data]
|
|
|
|
if missing_fields:
|
|
raise PersonaGenerationError(f"Summary is missing required fields: {', '.join(missing_fields)}")
|
|
|
|
# Validate field types
|
|
if not isinstance(summary_data["aiSynthesizedBio"], str):
|
|
raise PersonaGenerationError("aiSynthesizedBio must be a string")
|
|
if not isinstance(summary_data["qualitativeAttributes"], list):
|
|
raise PersonaGenerationError("qualitativeAttributes must be an array")
|
|
if not isinstance(summary_data["topPersonalityTraits"], list):
|
|
raise PersonaGenerationError("topPersonalityTraits must be an array")
|
|
|
|
return summary_data
|
|
|
|
except json.JSONDecodeError as e:
|
|
raise PersonaGenerationError(f"Failed to parse summary JSON response: {str(e)}. Raw response: {clean_response[:200]}...")
|
|
|
|
except LLMServiceError as e:
|
|
raise PersonaGenerationError(f"Error from LLM service: {str(e)}")
|
|
|
|
except Exception as e:
|
|
if isinstance(e, PersonaGenerationError):
|
|
raise
|
|
raise PersonaGenerationError(f"Error generating persona summary: {str(e)}")
|
|
|
|
|
|
async def generate_persona_download_summary(
|
|
persona_data: Dict[str, Any],
|
|
temperature: float = 1.0,
|
|
llm_model: Optional[str] = None
|
|
) -> str:
|
|
"""
|
|
Generate a comprehensive markdown summary of a persona for download/client review.
|
|
|
|
Args:
|
|
persona_data: The complete persona data dictionary
|
|
temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)
|
|
llm_model: Optional LLM model to use for generation
|
|
|
|
Returns:
|
|
A string containing the markdown-formatted persona summary
|
|
|
|
Raises:
|
|
PersonaGenerationError: If there's an issue with the AI generation
|
|
"""
|
|
try:
|
|
# Sanitize persona data for JSON serialization
|
|
sanitized_persona_data = _sanitize_persona_data_for_json(persona_data)
|
|
|
|
# Load and format the prompt with the persona data
|
|
try:
|
|
final_prompt = load_prompt('persona-download-summary', {
|
|
'persona_data': json.dumps(sanitized_persona_data, indent=2)
|
|
})
|
|
except PromptLoaderError as e:
|
|
raise PersonaGenerationError(f"Error loading download summary prompt: {str(e)}")
|
|
|
|
try:
|
|
# Load system prompt and generate markdown response
|
|
try:
|
|
system_prompt = load_prompt('persona-system')
|
|
except PromptLoaderError as e:
|
|
raise PersonaGenerationError(f"Error loading system prompt: {str(e)}")
|
|
|
|
# Log the LLM API call
|
|
persona_name = persona_data.get('name', 'Unknown')
|
|
print(f"🤖 Backend: Making LLM API call to {llm_model or 'gemini-3-pro-preview'} for download summary of '{persona_name}'")
|
|
|
|
# Generate the markdown content directly
|
|
markdown_response = await LLMService.generate_content(
|
|
prompt=final_prompt,
|
|
temperature=temperature,
|
|
system_prompt=system_prompt,
|
|
model_name=llm_model
|
|
)
|
|
|
|
# Clean up the response if needed
|
|
clean_response = markdown_response.strip()
|
|
|
|
# Remove markdown code blocks if present
|
|
if clean_response.startswith("```markdown"):
|
|
clean_response = clean_response.strip("```markdown").strip("```").strip()
|
|
elif clean_response.startswith("```"):
|
|
clean_response = clean_response.strip("```").strip()
|
|
|
|
return clean_response
|
|
|
|
except LLMServiceError as e:
|
|
raise PersonaGenerationError(f"Error from LLM service: {str(e)}")
|
|
|
|
except Exception as e:
|
|
if isinstance(e, PersonaGenerationError):
|
|
raise
|
|
raise PersonaGenerationError(f"Error generating persona download summary: {str(e)}")
|
|
|
|
|
|
def customize_persona_prompt(
|
|
age_range: Optional[str] = None,
|
|
gender: Optional[str] = None,
|
|
occupation_type: Optional[str] = None,
|
|
education_level: Optional[str] = None,
|
|
location_type: Optional[str] = None,
|
|
personality_traits: Optional[str] = None,
|
|
interests: Optional[str] = None,
|
|
audience_brief: Optional[str] = None,
|
|
research_objective: Optional[str] = None
|
|
) -> Optional[str]:
|
|
"""
|
|
Create a customized prompt for more specific persona generation.
|
|
|
|
Args:
|
|
age_range: Age range for the persona
|
|
gender: Gender of the persona
|
|
occupation_type: Type of occupation
|
|
education_level: Level of education
|
|
location_type: Geographic location
|
|
personality_traits: Personality characteristics
|
|
interests: Personal interests and hobbies
|
|
audience_brief: Full audience brief providing context for persona generation
|
|
research_objective: Research objective to focus persona goals, frustrations, and scenarios
|
|
|
|
Returns:
|
|
A string with customization instructions or None if no customizations provided
|
|
"""
|
|
customizations = []
|
|
|
|
# If an audience brief is provided, use it first as it provides the most context
|
|
if audience_brief or research_objective:
|
|
prompt = ""
|
|
if audience_brief:
|
|
prompt += f"""
|
|
Audience Brief:
|
|
{audience_brief}
|
|
"""
|
|
if research_objective:
|
|
prompt += f"""
|
|
Research Objective:
|
|
{research_objective}
|
|
"""
|
|
|
|
prompt += "\nBased on the above context, create a persona that would be relevant to this research."
|
|
|
|
if research_objective:
|
|
prompt += f"""
|
|
|
|
CRITICAL RESEARCH ALIGNMENT: This persona MUST be designed around the research objective: '{research_objective}'.
|
|
|
|
LIFE SCENARIOS REQUIREMENTS:
|
|
- At least 3 out of 5 scenarios MUST show this persona directly encountering, using, deciding about, or being impacted by aspects of: {research_objective}
|
|
- Each research-aligned scenario must be a specific, realistic situation showing their authentic relationship with this topic
|
|
- Show varied contexts: work situations, personal decisions, social interactions, consumer experiences - all demonstrating how '{research_objective}' appears in their real life
|
|
- Scenarios should reveal the persona's thoughts, feelings, and behaviors when dealing with this research topic
|
|
- Include both positive and challenging experiences related to the research focus
|
|
- Make scenarios concrete and specific to this research objective, not generic situations"""
|
|
|
|
if customizations:
|
|
prompt += f"\nAdditionally, ensure the persona meets these specific requirements: {'; '.join(customizations)}"
|
|
|
|
return prompt
|
|
|
|
# Otherwise, use the individual parameters
|
|
if age_range:
|
|
customizations.append(f"Age range: {age_range}")
|
|
if gender:
|
|
customizations.append(f"Gender: {gender}")
|
|
if occupation_type:
|
|
customizations.append(f"Occupation type: {occupation_type}")
|
|
if education_level:
|
|
customizations.append(f"Education level: {education_level}")
|
|
if location_type:
|
|
customizations.append(f"Location: {location_type}")
|
|
if personality_traits:
|
|
customizations.append(f"Personality traits: {personality_traits}")
|
|
if interests:
|
|
customizations.append(f"Interests: {interests}")
|
|
|
|
if not customizations:
|
|
return None
|
|
|
|
return "Create a persona with these characteristics: " + "; ".join(customizations)
|
|
|
|
|
|
async def enhance_audience_brief(
|
|
audience_brief: str,
|
|
research_objective: str,
|
|
temperature: float = 1.0
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Enhance audience brief and research objective with AI-generated improvements.
|
|
|
|
Args:
|
|
audience_brief: The audience brief to enhance
|
|
research_objective: The research objective to enhance
|
|
temperature: Controls randomness in generation (0.0 = deterministic, 1.0 = creative)
|
|
|
|
Returns:
|
|
A dictionary with:
|
|
- 'enhanced_audience_brief': The enhanced audience brief text
|
|
- 'enhanced_research_objective': The enhanced research objective text
|
|
- 'assumptions': List of assumptions/additions made
|
|
|
|
Raises:
|
|
PersonaGenerationError: If there's an issue with the AI generation or JSON parsing
|
|
"""
|
|
try:
|
|
# Load and format the prompt with both fields
|
|
try:
|
|
final_prompt = load_prompt('audience-brief-enhancement', {
|
|
'audience_brief': audience_brief,
|
|
'research_objective': research_objective
|
|
})
|
|
except PromptLoaderError as e:
|
|
raise PersonaGenerationError(f"Error loading enhancement prompt: {str(e)}")
|
|
|
|
# Generate enhanced content using the LLM service
|
|
try:
|
|
raw_response = await LLMService.generate_content(
|
|
prompt=final_prompt,
|
|
temperature=temperature
|
|
)
|
|
|
|
# Clean up the response for proper JSON parsing
|
|
clean_response = raw_response.strip()
|
|
|
|
# Remove markdown code blocks if present
|
|
if clean_response.startswith("```json"):
|
|
clean_response = clean_response.strip("```json").strip("```").strip()
|
|
elif clean_response.startswith("```"):
|
|
clean_response = clean_response.strip("```").strip()
|
|
|
|
# Try to find the JSON object in the response if there's extra text
|
|
if not clean_response.startswith("{"):
|
|
start_idx = clean_response.find("{")
|
|
if start_idx != -1:
|
|
end_idx = clean_response.rfind("}")
|
|
if end_idx != -1 and end_idx > start_idx:
|
|
clean_response = clean_response[start_idx:end_idx+1]
|
|
|
|
# Parse the JSON response
|
|
try:
|
|
enhancement_result = json.loads(clean_response)
|
|
|
|
# Verify it's an object
|
|
if not isinstance(enhancement_result, dict):
|
|
raise PersonaGenerationError(f"Expected a JSON object but got {type(enhancement_result)}")
|
|
|
|
# Verify required keys exist
|
|
required_keys = ['enhanced_audience_brief', 'enhanced_research_objective', 'assumptions']
|
|
for key in required_keys:
|
|
if key not in enhancement_result:
|
|
raise PersonaGenerationError(f"Response missing required key: '{key}'")
|
|
|
|
# Verify enhanced texts are strings
|
|
if not isinstance(enhancement_result['enhanced_audience_brief'], str):
|
|
raise PersonaGenerationError("enhanced_audience_brief must be a string")
|
|
if not isinstance(enhancement_result['enhanced_research_objective'], str):
|
|
raise PersonaGenerationError("enhanced_research_objective must be a string")
|
|
|
|
# Verify assumptions is a list
|
|
if not isinstance(enhancement_result['assumptions'], list):
|
|
raise PersonaGenerationError("assumptions must be an array")
|
|
|
|
# Convert any non-string assumptions to strings
|
|
for i, assumption in enumerate(enhancement_result['assumptions']):
|
|
if not isinstance(assumption, str):
|
|
enhancement_result['assumptions'][i] = str(assumption)
|
|
|
|
except json.JSONDecodeError as e:
|
|
raise PersonaGenerationError(f"Failed to parse JSON response: {str(e)}. Raw response: {clean_response[:200]}...")
|
|
|
|
except LLMServiceError as e:
|
|
raise PersonaGenerationError(f"Error from LLM service: {str(e)}")
|
|
|
|
# Validate we got meaningful content
|
|
if not enhancement_result['enhanced_audience_brief'].strip():
|
|
raise PersonaGenerationError("Enhanced audience brief is empty")
|
|
if not enhancement_result['enhanced_research_objective'].strip():
|
|
raise PersonaGenerationError("Enhanced research objective is empty")
|
|
|
|
return enhancement_result
|
|
|
|
except Exception as e:
|
|
if isinstance(e, PersonaGenerationError):
|
|
raise
|
|
raise PersonaGenerationError(f"Error enhancing audience brief: {str(e)}") |