751 lines
No EOL
34 KiB
Python
Executable file
751 lines
No EOL
34 KiB
Python
Executable file
"""
|
|
Bulk Persona Export Service
|
|
|
|
Handles bulk export of persona profiles to various formats (markdown, JSON, CSV)
|
|
with real-time progress tracking via WebSocket events.
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import logging
|
|
import zipfile
|
|
import tempfile
|
|
import uuid
|
|
import asyncio
|
|
from typing import Dict, List, Any, Optional, Tuple
|
|
from datetime import datetime
|
|
# Removed PersonaExportService dependency - using direct conversion
|
|
from app.models.persona import Persona
|
|
from app.websocket_manager_async import get_async_websocket_manager
|
|
from app.services.task_manager import CancellableTask
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class BulkPersonaExportService:
|
|
"""Service for bulk exporting persona profiles with progress tracking."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the bulk persona export service."""
|
|
self.websocket_manager = get_async_websocket_manager()
|
|
|
|
def _create_temp_directory(self) -> str:
|
|
"""Create a temporary directory for export files."""
|
|
temp_dir = os.path.join(
|
|
os.path.dirname(__file__),
|
|
"..",
|
|
"..",
|
|
"temp"
|
|
)
|
|
os.makedirs(temp_dir, exist_ok=True)
|
|
|
|
# Create unique subdirectory for this export
|
|
export_id = str(uuid.uuid4())
|
|
export_dir = os.path.join(temp_dir, f"export_{export_id}")
|
|
os.makedirs(export_dir, exist_ok=True)
|
|
|
|
return export_dir
|
|
|
|
def _sanitize_filename(self, filename: str) -> str:
|
|
"""Sanitize filename for safe file system use."""
|
|
# Remove or replace invalid characters
|
|
invalid_chars = '<>:"/\\|?*'
|
|
for char in invalid_chars:
|
|
filename = filename.replace(char, '_')
|
|
|
|
# Limit length and ensure it's not empty
|
|
filename = filename[:200].strip()
|
|
if not filename:
|
|
filename = "persona"
|
|
|
|
return filename
|
|
|
|
async def _emit_progress(self, user_id: str, task_id: str, progress: int,
|
|
current_item: str, completed_count: int, total_count: int,
|
|
current_persona_name: Optional[str] = None):
|
|
"""Emit progress update via WebSocket."""
|
|
try:
|
|
if self.websocket_manager:
|
|
await self.websocket_manager.emit_to_user(
|
|
user_id,
|
|
'bulk_export_progress',
|
|
{
|
|
'task_id': task_id,
|
|
'task_type': 'bulk_persona_export',
|
|
'progress': progress,
|
|
'current_item': current_item,
|
|
'completed_count': completed_count,
|
|
'total_count': total_count,
|
|
'current_persona_name': current_persona_name
|
|
}
|
|
)
|
|
logger.debug(f"Emitted progress: {progress}% - {current_item}")
|
|
else:
|
|
logger.warning("WebSocket manager not available for progress updates")
|
|
except Exception as e:
|
|
logger.error(f"Failed to emit progress update: {e}")
|
|
|
|
def _create_markdown_table(self, data: List[tuple], headers: List[str] = None) -> List[str]:
|
|
"""Create a markdown table from data tuples."""
|
|
if not data:
|
|
return []
|
|
|
|
# Use default headers if none provided
|
|
if not headers:
|
|
headers = ["Field", "Value"]
|
|
|
|
# Create table
|
|
table_lines = []
|
|
|
|
# Headers
|
|
header_line = "| " + " | ".join(headers) + " |"
|
|
separator_line = "|" + "|".join(["-" * (len(h) + 2) for h in headers]) + "|"
|
|
|
|
table_lines.append(header_line)
|
|
table_lines.append(separator_line)
|
|
|
|
# Data rows
|
|
for row in data:
|
|
# Ensure all values are strings and escape pipe characters
|
|
escaped_row = []
|
|
for value in row:
|
|
str_value = str(value) if value is not None else ""
|
|
# Escape pipe characters in cell content
|
|
str_value = str_value.replace("|", "\\|")
|
|
# Replace newlines with spaces for table formatting
|
|
str_value = str_value.replace("\n", " ").replace("\r", " ")
|
|
escaped_row.append(str_value)
|
|
|
|
row_line = "| " + " | ".join(escaped_row) + " |"
|
|
table_lines.append(row_line)
|
|
|
|
return table_lines
|
|
|
|
def _create_comprehensive_markdown(self, persona_data: Dict[str, Any]) -> str:
|
|
"""Create comprehensive markdown from persona data with all fields included."""
|
|
try:
|
|
name = persona_data.get('name', 'Unknown Persona')
|
|
|
|
# Build comprehensive markdown from all available data
|
|
markdown_parts = [f"# {name} - Complete Persona Profile\n"]
|
|
|
|
# AI-Generated Summary (if available) - put at top as overview
|
|
if 'aiSynthesizedBio' in persona_data and persona_data['aiSynthesizedBio']:
|
|
markdown_parts.append("## Overview")
|
|
markdown_parts.append(persona_data['aiSynthesizedBio'])
|
|
markdown_parts.append("")
|
|
|
|
# Core Demographics Section - Table Format
|
|
demo_fields = [
|
|
('age', 'Age'), ('gender', 'Gender'), ('occupation', 'Occupation'),
|
|
('education', 'Education'), ('location', 'Location'), ('ethnicity', 'Ethnicity'),
|
|
('householdIncome', 'Household Income'), ('householdComposition', 'Household Composition'),
|
|
('socialGrade', 'Social Grade')
|
|
]
|
|
|
|
demographics_data = []
|
|
for field, label in demo_fields:
|
|
if field in persona_data and persona_data[field]:
|
|
demographics_data.append((label, persona_data[field]))
|
|
|
|
if demographics_data:
|
|
markdown_parts.append("## Demographics")
|
|
table_lines = self._create_markdown_table(demographics_data, ["Attribute", "Value"])
|
|
markdown_parts.extend(table_lines)
|
|
markdown_parts.append("")
|
|
|
|
# Behavioral Profile & Preferences Section - Table Format
|
|
behavioral_fields = [
|
|
('techSavviness', 'Tech Savviness'), ('personality', 'Personality'),
|
|
('brandLoyalty', 'Brand Loyalty'), ('priceConsciousness', 'Price Consciousness'),
|
|
('environmentalConcern', 'Environmental Concern'), ('interests', 'Interests'),
|
|
('shoppingHabits', 'Shopping Habits'), ('mediaConsumption', 'Media Consumption'),
|
|
('deviceUsage', 'Device Usage'), ('brandPreferences', 'Brand Preferences'),
|
|
('hasPurchasingPower', 'Has Purchasing Power'), ('hasChildren', 'Has Children')
|
|
]
|
|
|
|
behavioral_data = []
|
|
for field, label in behavioral_fields:
|
|
if field in persona_data and persona_data[field] is not None:
|
|
value = persona_data[field]
|
|
if isinstance(value, bool):
|
|
value = "Yes" if value else "No"
|
|
behavioral_data.append((label, value))
|
|
|
|
if behavioral_data:
|
|
markdown_parts.append("## Behavioral Profile & Preferences")
|
|
table_lines = self._create_markdown_table(behavioral_data, ["Attribute", "Value"])
|
|
markdown_parts.extend(table_lines)
|
|
markdown_parts.append("")
|
|
|
|
# Goals, Motivations & Aspirations
|
|
goal_sections = [
|
|
("Goals", "goals"), ("Motivations", "motivations"),
|
|
("Frustrations", "frustrations"), ("Fears", "fears"),
|
|
("Scenarios", "scenarios")
|
|
]
|
|
|
|
for section_name, field in goal_sections:
|
|
if field in persona_data and persona_data[field]:
|
|
items = persona_data[field]
|
|
if isinstance(items, list) and items:
|
|
markdown_parts.append(f"## {section_name}")
|
|
for item in items:
|
|
if item and item.strip():
|
|
markdown_parts.append(f"- {item}")
|
|
markdown_parts.append("")
|
|
|
|
# Think, Feel, Do Psychology Framework - 3-Column Table
|
|
if 'thinkFeelDo' in persona_data and persona_data['thinkFeelDo']:
|
|
tfd = persona_data['thinkFeelDo']
|
|
markdown_parts.append("## Psychological Profile - Think, Feel, Do")
|
|
|
|
# Get the lists for each category
|
|
thinks = tfd.get('thinks', []) if isinstance(tfd.get('thinks'), list) else []
|
|
feels = tfd.get('feels', []) if isinstance(tfd.get('feels'), list) else []
|
|
does = tfd.get('does', []) if isinstance(tfd.get('does'), list) else []
|
|
|
|
# Create table data by combining the three lists
|
|
max_items = max(len(thinks), len(feels), len(does))
|
|
if max_items > 0:
|
|
tfd_data = []
|
|
for i in range(max_items):
|
|
think_item = thinks[i] if i < len(thinks) else ""
|
|
feel_item = feels[i] if i < len(feels) else ""
|
|
do_item = does[i] if i < len(does) else ""
|
|
|
|
# Only add row if at least one cell has content
|
|
if think_item or feel_item or do_item:
|
|
tfd_data.append((think_item, feel_item, do_item))
|
|
|
|
if tfd_data:
|
|
table_lines = self._create_markdown_table(tfd_data, ["Thinks", "Feels", "Does"])
|
|
markdown_parts.extend(table_lines)
|
|
markdown_parts.append("")
|
|
|
|
# OCEAN Personality Traits (Big Five) - Enhanced Table Format
|
|
if 'oceanTraits' in persona_data and persona_data['oceanTraits']:
|
|
ocean = persona_data['oceanTraits']
|
|
markdown_parts.append("## OCEAN Personality Traits (Big Five)")
|
|
|
|
trait_descriptions = {
|
|
'openness': 'Openness to Experience',
|
|
'conscientiousness': 'Conscientiousness',
|
|
'extraversion': 'Extraversion',
|
|
'agreeableness': 'Agreeableness',
|
|
'neuroticism': 'Neuroticism'
|
|
}
|
|
|
|
def get_level_description(score):
|
|
"""Get descriptive level for OCEAN trait score."""
|
|
if score < 0.3:
|
|
return "Low"
|
|
elif score < 0.7:
|
|
return "Moderate"
|
|
else:
|
|
return "High"
|
|
|
|
ocean_data = []
|
|
for trait, score in ocean.items():
|
|
if score is not None:
|
|
trait_name = trait_descriptions.get(trait, trait.title())
|
|
# Handle different score formats - scores are already decimal (0.0-1.0)
|
|
if isinstance(score, (int, float)):
|
|
# If score is already 0-1 range, multiply by 100; if it's 0-100 range, use as is
|
|
if score <= 1.0:
|
|
percentage = f"{round(float(score) * 100)}%"
|
|
else:
|
|
percentage = f"{round(float(score))}%"
|
|
level = get_level_description(float(score) if score <= 1.0 else float(score) / 100)
|
|
else:
|
|
percentage = f"{score}%"
|
|
level = "N/A"
|
|
ocean_data.append((trait_name, percentage, level))
|
|
|
|
if ocean_data:
|
|
table_lines = self._create_markdown_table(ocean_data, ["Trait", "Score", "Level"])
|
|
markdown_parts.extend(table_lines)
|
|
markdown_parts.append("")
|
|
|
|
# Top Personality Traits
|
|
if 'topPersonalityTraits' in persona_data and persona_data['topPersonalityTraits']:
|
|
traits = persona_data['topPersonalityTraits']
|
|
if isinstance(traits, list) and traits:
|
|
markdown_parts.append("## Top Personality Traits")
|
|
markdown_parts.append(", ".join([trait for trait in traits if trait]))
|
|
markdown_parts.append("")
|
|
|
|
# Qualitative Attributes
|
|
if 'qualitativeAttributes' in persona_data and persona_data['qualitativeAttributes']:
|
|
attrs = persona_data['qualitativeAttributes']
|
|
if isinstance(attrs, list) and attrs:
|
|
markdown_parts.append("## Key Qualitative Attributes")
|
|
markdown_parts.append(", ".join([attr for attr in attrs if attr]))
|
|
markdown_parts.append("")
|
|
|
|
# Lifestyle & Consumer Behavior - Table Format
|
|
lifestyle_fields = [
|
|
('coreValues', 'Core Values'), ('lifestyleChoices', 'Lifestyle Choices'),
|
|
('socialActivities', 'Social Activities'), ('categoryKnowledge', 'Category Knowledge'),
|
|
('paymentMethods', 'Payment Methods'), ('purchaseBehaviour', 'Purchase Behavior'),
|
|
('decisionInfluences', 'Decision Influences'), ('painPoints', 'Pain Points'),
|
|
('journeyContext', 'Journey Context')
|
|
]
|
|
|
|
lifestyle_data = []
|
|
for field, label in lifestyle_fields:
|
|
if field in persona_data and persona_data[field]:
|
|
lifestyle_data.append((label, persona_data[field]))
|
|
|
|
if lifestyle_data:
|
|
markdown_parts.append("## Lifestyle & Consumer Behavior")
|
|
table_lines = self._create_markdown_table(lifestyle_data, ["Attribute", "Value"])
|
|
markdown_parts.extend(table_lines)
|
|
markdown_parts.append("")
|
|
|
|
# Generation Context & Research - Table Format
|
|
context_data = []
|
|
if 'audience_brief' in persona_data and persona_data['audience_brief']:
|
|
context_data.append(("Audience Brief", persona_data['audience_brief']))
|
|
if 'research_objective' in persona_data and persona_data['research_objective']:
|
|
context_data.append(("Research Objective", persona_data['research_objective']))
|
|
|
|
if context_data:
|
|
markdown_parts.append("## Generation Context")
|
|
table_lines = self._create_markdown_table(context_data, ["Type", "Description"])
|
|
markdown_parts.extend(table_lines)
|
|
markdown_parts.append("")
|
|
|
|
# Additional Data Fields (catch any remaining fields) - Table Format
|
|
processed_fields = {
|
|
'name', 'aiSynthesizedBio', 'age', 'gender', 'occupation', 'education', 'location',
|
|
'ethnicity', 'householdIncome', 'householdComposition', 'socialGrade', 'techSavviness',
|
|
'personality', 'brandLoyalty', 'priceConsciousness', 'environmentalConcern', 'interests',
|
|
'shoppingHabits', 'mediaConsumption', 'deviceUsage', 'brandPreferences', 'hasPurchasingPower',
|
|
'hasChildren', 'goals', 'motivations', 'frustrations', 'fears', 'scenarios', 'thinkFeelDo',
|
|
'oceanTraits', 'topPersonalityTraits', 'qualitativeAttributes', 'coreValues', 'lifestyleChoices',
|
|
'socialActivities', 'categoryKnowledge', 'paymentMethods', 'purchaseBehaviour', 'decisionInfluences',
|
|
'painPoints', 'journeyContext', 'audience_brief', 'research_objective', '_id', 'created_at',
|
|
'created_by', 'updated_at', 'folder_ids'
|
|
}
|
|
|
|
additional_data = []
|
|
for key, value in persona_data.items():
|
|
if key not in processed_fields and value is not None:
|
|
if isinstance(value, list):
|
|
if value: # Non-empty list
|
|
formatted_value = ", ".join([str(v) for v in value if v])
|
|
if formatted_value:
|
|
additional_data.append((key.replace('_', ' ').title(), formatted_value))
|
|
elif isinstance(value, dict):
|
|
if value: # Non-empty dict
|
|
formatted_dict = ", ".join([f"{k}: {v}" for k, v in value.items() if v is not None])
|
|
if formatted_dict:
|
|
additional_data.append((key.replace('_', ' ').title(), formatted_dict))
|
|
else:
|
|
if str(value).strip(): # Non-empty value
|
|
additional_data.append((key.replace('_', ' ').title(), str(value)))
|
|
|
|
if additional_data:
|
|
markdown_parts.append("## Additional Data")
|
|
table_lines = self._create_markdown_table(additional_data, ["Attribute", "Value"])
|
|
markdown_parts.extend(table_lines)
|
|
markdown_parts.append("")
|
|
|
|
# Metadata - Table Format
|
|
metadata_data = []
|
|
if 'created_at' in persona_data and persona_data['created_at']:
|
|
metadata_data.append(("Created", persona_data['created_at']))
|
|
if 'updated_at' in persona_data and persona_data['updated_at']:
|
|
metadata_data.append(("Last Updated", persona_data['updated_at']))
|
|
if 'created_by' in persona_data and persona_data['created_by']:
|
|
metadata_data.append(("Created By", persona_data['created_by']))
|
|
if 'folder_ids' in persona_data and persona_data['folder_ids']:
|
|
folder_count = len(persona_data['folder_ids'])
|
|
metadata_data.append(("Folder Assignments", f"{folder_count} folder(s)"))
|
|
|
|
if metadata_data:
|
|
markdown_parts.append("## Metadata")
|
|
table_lines = self._create_markdown_table(metadata_data, ["Field", "Value"])
|
|
markdown_parts.extend(table_lines)
|
|
|
|
return "\n".join(markdown_parts)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to create comprehensive markdown: {e}")
|
|
return f"# {persona_data.get('name', 'Unknown Persona')}\n\nError generating profile.\n\n```json\n{json.dumps(persona_data, indent=2, default=str)}\n```"
|
|
|
|
async def export_personas_bulk(
|
|
self,
|
|
persona_ids: List[str],
|
|
user_id: str,
|
|
export_format: str = 'markdown'
|
|
) -> Tuple[bool, str, Optional[str]]:
|
|
"""
|
|
Export multiple personas to specified format with progress tracking.
|
|
|
|
Args:
|
|
persona_ids: List of persona IDs to export
|
|
user_id: ID of user requesting export
|
|
export_format: Format for export ('markdown', 'json', 'csv')
|
|
|
|
Returns:
|
|
Tuple of (success, file_path_or_error_message, task_id)
|
|
"""
|
|
task_id = str(uuid.uuid4())
|
|
export_dir = None
|
|
|
|
try:
|
|
async with CancellableTask("bulk_persona_export", user_id, {"export_format": export_format}) as registered_task_id:
|
|
task_id = registered_task_id or task_id
|
|
|
|
logger.info(f"Starting bulk export for {len(persona_ids)} personas (user: {user_id}, format: {export_format})")
|
|
|
|
# Create temp directory
|
|
export_dir = self._create_temp_directory()
|
|
|
|
# Emit initial progress
|
|
await self._emit_progress(
|
|
user_id, task_id, 0,
|
|
"Initializing export...", 0, len(persona_ids)
|
|
)
|
|
|
|
# Fetch all personas
|
|
await self._emit_progress(
|
|
user_id, task_id, 5,
|
|
"Fetching persona data...", 0, len(persona_ids)
|
|
)
|
|
|
|
personas = []
|
|
for persona_id in persona_ids:
|
|
persona = await Persona.find_by_id(persona_id)
|
|
if persona:
|
|
personas.append(persona)
|
|
else:
|
|
logger.warning(f"Persona not found: {persona_id}")
|
|
|
|
if not personas:
|
|
await self._emit_progress(
|
|
user_id, task_id, 100,
|
|
"No valid personas found", 0, len(persona_ids)
|
|
)
|
|
return False, "No valid personas found for export", task_id
|
|
|
|
# Process personas based on format
|
|
if export_format == 'markdown':
|
|
return await self._export_as_markdown_zip(
|
|
personas, user_id, task_id, export_dir
|
|
)
|
|
elif export_format == 'json':
|
|
return await self._export_as_json_zip(
|
|
personas, user_id, task_id, export_dir
|
|
)
|
|
elif export_format == 'csv':
|
|
return await self._export_as_csv_zip(
|
|
personas, user_id, task_id, export_dir
|
|
)
|
|
else:
|
|
return False, f"Unsupported export format: {export_format}", task_id
|
|
|
|
except asyncio.CancelledError:
|
|
logger.info(f"Bulk export cancelled by user: {user_id}")
|
|
if export_dir and os.path.exists(export_dir):
|
|
import shutil
|
|
shutil.rmtree(export_dir, ignore_errors=True)
|
|
|
|
if self.websocket_manager:
|
|
await self.websocket_manager.emit_to_user(
|
|
user_id,
|
|
'task_cancelled',
|
|
{
|
|
'task_id': task_id,
|
|
'message': 'Export cancelled successfully'
|
|
}
|
|
)
|
|
|
|
return False, "Export cancelled by user", task_id
|
|
|
|
except Exception as e:
|
|
logger.error(f"Bulk export error: {e}")
|
|
if export_dir and os.path.exists(export_dir):
|
|
import shutil
|
|
shutil.rmtree(export_dir, ignore_errors=True)
|
|
|
|
if self.websocket_manager:
|
|
await self.websocket_manager.emit_to_user(
|
|
user_id,
|
|
'task_failed',
|
|
{
|
|
'task_id': task_id,
|
|
'message': f'Export failed: {str(e)}'
|
|
}
|
|
)
|
|
|
|
return False, f"Export failed: {str(e)}", task_id
|
|
|
|
async def _export_as_markdown_zip(
|
|
self,
|
|
personas: List[Dict[str, Any]],
|
|
user_id: str,
|
|
task_id: str,
|
|
export_dir: str
|
|
) -> Tuple[bool, str, str]:
|
|
"""Export personas as markdown files in a ZIP archive."""
|
|
try:
|
|
zip_path = os.path.join(export_dir, f"persona_profiles_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.zip")
|
|
|
|
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
|
total_personas = len(personas)
|
|
|
|
for i, persona in enumerate(personas):
|
|
# Check for cancellation
|
|
current_task = asyncio.current_task()
|
|
if current_task and current_task.cancelled():
|
|
raise asyncio.CancelledError("Task was cancelled")
|
|
|
|
persona_name = persona.get('name', f'Persona_{i+1}')
|
|
|
|
# Update progress
|
|
progress = int(10 + (i / total_personas) * 80) # 10-90%
|
|
await self._emit_progress(
|
|
user_id, task_id, progress,
|
|
f"Processing persona {i+1} of {total_personas}",
|
|
i, total_personas, persona_name
|
|
)
|
|
|
|
# Make persona data serializable and convert directly to markdown
|
|
from app.routes.personas import make_serializable
|
|
serializable_persona = make_serializable(persona)
|
|
|
|
# Generate comprehensive markdown directly from persona data
|
|
markdown_content = self._create_comprehensive_markdown(serializable_persona)
|
|
|
|
# Create safe filename
|
|
safe_name = self._sanitize_filename(persona_name)
|
|
filename = f"{safe_name}.md"
|
|
|
|
# Add to ZIP
|
|
zipf.writestr(filename, markdown_content.encode('utf-8'))
|
|
|
|
logger.info(f"Added {filename} to ZIP ({len(markdown_content)} chars)")
|
|
|
|
# Final progress update
|
|
await self._emit_progress(
|
|
user_id, task_id, 95,
|
|
"Finalizing ZIP file...", total_personas, total_personas
|
|
)
|
|
|
|
# Verify ZIP was created
|
|
if not os.path.exists(zip_path):
|
|
return False, "Failed to create ZIP file", task_id
|
|
|
|
file_size = os.path.getsize(zip_path)
|
|
logger.info(f"Created ZIP file: {zip_path} ({file_size} bytes)")
|
|
|
|
# Success notification
|
|
await self._emit_progress(
|
|
user_id, task_id, 100,
|
|
f"Export completed! {total_personas} personas exported.",
|
|
total_personas, total_personas
|
|
)
|
|
|
|
if self.websocket_manager:
|
|
await self.websocket_manager.emit_to_user(
|
|
user_id,
|
|
'task_completed',
|
|
{
|
|
'task_id': task_id,
|
|
'message': f'Successfully exported {total_personas} persona profiles',
|
|
'file_path': zip_path,
|
|
'file_size': file_size
|
|
}
|
|
)
|
|
|
|
return True, zip_path, task_id
|
|
|
|
except Exception as e:
|
|
logger.error(f"Markdown ZIP export error: {e}")
|
|
return False, f"Markdown export failed: {str(e)}", task_id
|
|
|
|
async def _export_as_json_zip(
|
|
self,
|
|
personas: List[Dict[str, Any]],
|
|
user_id: str,
|
|
task_id: str,
|
|
export_dir: str
|
|
) -> Tuple[bool, str, str]:
|
|
"""Export personas as JSON files in a ZIP archive."""
|
|
try:
|
|
zip_path = os.path.join(export_dir, f"persona_data_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.zip")
|
|
|
|
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
|
total_personas = len(personas)
|
|
|
|
for i, persona in enumerate(personas):
|
|
# Check for cancellation
|
|
current_task = asyncio.current_task()
|
|
if current_task and current_task.cancelled():
|
|
raise asyncio.CancelledError("Task was cancelled")
|
|
|
|
persona_name = persona.get('name', f'Persona_{i+1}')
|
|
|
|
# Update progress
|
|
progress = int(10 + (i / total_personas) * 80) # 10-90%
|
|
await self._emit_progress(
|
|
user_id, task_id, progress,
|
|
f"Processing persona {i+1} of {total_personas}",
|
|
i, total_personas, persona_name
|
|
)
|
|
|
|
# Make persona data serializable and convert to JSON
|
|
from app.routes.personas import make_serializable
|
|
serializable_persona = make_serializable(persona)
|
|
json_content = json.dumps(serializable_persona, indent=2, ensure_ascii=False, default=str)
|
|
|
|
# Create safe filename
|
|
safe_name = self._sanitize_filename(persona_name)
|
|
filename = f"{safe_name}.json"
|
|
|
|
# Add to ZIP
|
|
zipf.writestr(filename, json_content.encode('utf-8'))
|
|
|
|
# Final steps
|
|
await self._emit_progress(
|
|
user_id, task_id, 100,
|
|
f"Export completed! {total_personas} personas exported.",
|
|
total_personas, total_personas
|
|
)
|
|
|
|
if self.websocket_manager:
|
|
await self.websocket_manager.emit_to_user(
|
|
user_id,
|
|
'task_completed',
|
|
{
|
|
'task_id': task_id,
|
|
'message': f'Successfully exported {total_personas} persona JSON files',
|
|
'file_path': zip_path,
|
|
'file_size': os.path.getsize(zip_path)
|
|
}
|
|
)
|
|
|
|
return True, zip_path, task_id
|
|
|
|
except Exception as e:
|
|
logger.error(f"JSON ZIP export error: {e}")
|
|
return False, f"JSON export failed: {str(e)}", task_id
|
|
|
|
async def _export_as_csv_zip(
|
|
self,
|
|
personas: List[Dict[str, Any]],
|
|
user_id: str,
|
|
task_id: str,
|
|
export_dir: str
|
|
) -> Tuple[bool, str, str]:
|
|
"""Export personas as individual CSV files in a ZIP archive."""
|
|
try:
|
|
import csv
|
|
import io
|
|
|
|
zip_path = os.path.join(export_dir, f"persona_csvs_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.zip")
|
|
|
|
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
|
total_personas = len(personas)
|
|
|
|
for i, persona in enumerate(personas):
|
|
# Check for cancellation
|
|
current_task = asyncio.current_task()
|
|
if current_task and current_task.cancelled():
|
|
raise asyncio.CancelledError("Task was cancelled")
|
|
|
|
persona_name = persona.get('name', f'Persona_{i+1}')
|
|
|
|
# Update progress
|
|
progress = int(10 + (i / total_personas) * 80) # 10-90%
|
|
await self._emit_progress(
|
|
user_id, task_id, progress,
|
|
f"Processing persona {i+1} of {total_personas}",
|
|
i, total_personas, persona_name
|
|
)
|
|
|
|
# Make persona data serializable and flatten for CSV
|
|
from app.routes.personas import make_serializable
|
|
serializable_persona = make_serializable(persona)
|
|
|
|
# Flatten nested objects and arrays for CSV format
|
|
flat_persona = {}
|
|
for key, value in serializable_persona.items():
|
|
if isinstance(value, dict):
|
|
# Flatten nested objects: {"oceanTraits": {"openness": 0.7}} -> {"oceanTraits_openness": 0.7}
|
|
for subkey, subvalue in value.items():
|
|
flat_persona[f"{key}_{subkey}"] = subvalue
|
|
elif isinstance(value, list):
|
|
# Convert arrays to semicolon-separated strings
|
|
if value:
|
|
if isinstance(value[0], str):
|
|
flat_persona[key] = "; ".join(str(v) for v in value if v)
|
|
else:
|
|
flat_persona[key] = json.dumps(value)
|
|
else:
|
|
flat_persona[key] = ""
|
|
else:
|
|
flat_persona[key] = value if value is not None else ""
|
|
|
|
# Create CSV content using built-in csv module
|
|
output = io.StringIO()
|
|
if flat_persona: # Only proceed if we have data
|
|
fieldnames = list(flat_persona.keys())
|
|
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerow(flat_persona)
|
|
|
|
csv_content = output.getvalue()
|
|
output.close()
|
|
|
|
# Create safe filename
|
|
safe_name = self._sanitize_filename(persona_name)
|
|
filename = f"{safe_name}.csv"
|
|
|
|
# Add to ZIP
|
|
zipf.writestr(filename, csv_content.encode('utf-8'))
|
|
|
|
logger.info(f"Added {filename} to ZIP ({len(csv_content)} chars)")
|
|
|
|
# Final progress update
|
|
await self._emit_progress(
|
|
user_id, task_id, 95,
|
|
"Finalizing ZIP file...", total_personas, total_personas
|
|
)
|
|
|
|
# Verify ZIP was created
|
|
if not os.path.exists(zip_path):
|
|
return False, "Failed to create ZIP file", task_id
|
|
|
|
file_size = os.path.getsize(zip_path)
|
|
logger.info(f"Created CSV ZIP file: {zip_path} ({file_size} bytes)")
|
|
|
|
# Success notification
|
|
await self._emit_progress(
|
|
user_id, task_id, 100,
|
|
f"Export completed! {total_personas} personas exported.",
|
|
total_personas, total_personas
|
|
)
|
|
|
|
if self.websocket_manager:
|
|
await self.websocket_manager.emit_to_user(
|
|
user_id,
|
|
'task_completed',
|
|
{
|
|
'task_id': task_id,
|
|
'message': f'Successfully exported {total_personas} persona CSV files',
|
|
'file_path': zip_path,
|
|
'file_size': file_size
|
|
}
|
|
)
|
|
|
|
return True, zip_path, task_id
|
|
|
|
except Exception as e:
|
|
logger.error(f"CSV ZIP export error: {e}")
|
|
return False, f"CSV export failed: {str(e)}", task_id |