semblance-dev/backend/app/utils/discussion_guide_schema.py
2025-12-19 19:26:16 +00:00

461 lines
No EOL
18 KiB
Python
Executable file

"""
Discussion Guide JSON Schema Validation
Provides schema validation and utilities for structured discussion guides.
"""
from typing import Dict, List, Any, Optional, Union
from dataclasses import dataclass
import json
@dataclass
class DiscussionGuideActivity:
"""Represents an activity within a discussion guide section."""
id: str
type: str # moderator_statement, open_question, probe_question, activity, etc.
content: str
time_limit: Optional[int] = None
metadata: Optional[Dict[str, Any]] = None
@dataclass
class DiscussionGuideQuestion:
"""Represents a question within a discussion guide section."""
id: str
type: str # open_question, probe_question, follow_up, etc.
content: str
time_limit: Optional[int] = None
probes: Optional[List[str]] = None
metadata: Optional[Dict[str, Any]] = None
@dataclass
class DiscussionGuideSubsection:
"""Represents a subsection within a main discussion section."""
id: str
title: str
duration: int
questions: List[DiscussionGuideQuestion]
activities: Optional[List[DiscussionGuideActivity]] = None
metadata: Optional[Dict[str, Any]] = None
@dataclass
class DiscussionGuideSection:
"""Represents a main section of a discussion guide."""
id: str
title: str
duration: int
type: str # introduction, warmup, main_content, conclusion
content: Optional[str] = None
questions: Optional[List[DiscussionGuideQuestion]] = None
activities: Optional[List[DiscussionGuideActivity]] = None
subsections: Optional[List[DiscussionGuideSubsection]] = None
metadata: Optional[Dict[str, Any]] = None
@dataclass
class StructuredDiscussionGuide:
"""Represents a complete structured discussion guide."""
title: str
total_duration: int
sections: List[DiscussionGuideSection]
metadata: Optional[Dict[str, Any]] = None
class DiscussionGuideValidator:
"""Validates and processes discussion guide JSON structures."""
@staticmethod
def create_visual_asset_metadata(filename: str, display_reference: str) -> Dict[str, Any]:
"""
Create visual asset metadata for questions/activities.
Args:
filename: The system filename (e.g., 'fg-123-abc.jpg')
display_reference: User-friendly reference (e.g., 'Asset 1' or custom name)
Returns:
Visual asset metadata dictionary
"""
return {
"visual_asset": {
"filename": filename,
"display_reference": display_reference
}
}
@staticmethod
def generate_display_reference(assets: List[Dict[str, Any]], asset_index: int) -> str:
"""
Generate a display reference for an asset based on user assignment or default numbering.
Args:
assets: List of asset metadata objects
asset_index: Index of the current asset
Returns:
Display reference string
"""
asset = assets[asset_index]
# Use user-assigned name if available, otherwise use numbered reference
if asset.get("user_assigned_name"):
return asset["user_assigned_name"]
else:
return f"Asset {asset_index + 1}"
@staticmethod
def validate_json_structure(guide_json: Dict[str, Any]) -> tuple[bool, List[str]]:
"""
Validate a discussion guide JSON structure.
Args:
guide_json: The JSON structure to validate
Returns:
Tuple of (is_valid, list_of_errors)
"""
errors = []
# Check required top-level fields
required_fields = ['title', 'total_duration', 'sections']
for field in required_fields:
if field not in guide_json:
errors.append(f"Missing required field: {field}")
if 'sections' in guide_json:
if not isinstance(guide_json['sections'], list):
errors.append("'sections' must be a list")
elif len(guide_json['sections']) == 0:
errors.append("'sections' cannot be empty")
else:
# Validate each section
for i, section in enumerate(guide_json['sections']):
section_errors = DiscussionGuideValidator._validate_section(section, i)
errors.extend(section_errors)
# Validate total duration matches sum of sections
if 'sections' in guide_json and 'total_duration' in guide_json:
try:
total_section_duration = sum(section.get('duration', 0) for section in guide_json['sections'])
if abs(total_section_duration - guide_json['total_duration']) > 5: # Allow 5 minute tolerance
errors.append(f"Total duration ({guide_json['total_duration']}) doesn't match sum of sections ({total_section_duration})")
except (TypeError, ValueError):
errors.append("Invalid duration values in sections")
return len(errors) == 0, errors
@staticmethod
def _validate_section(section: Dict[str, Any], index: int) -> List[str]:
"""Validate a single section."""
errors = []
section_prefix = f"Section {index + 1}"
# Check required section fields
required_fields = ['id', 'title', 'duration', 'type']
for field in required_fields:
if field not in section:
errors.append(f"{section_prefix}: Missing required field '{field}'")
# Validate section type
if 'type' in section:
valid_types = ['introduction', 'warmup', 'main_content', 'conclusion', 'activity', 'break']
if section['type'] not in valid_types:
errors.append(f"{section_prefix}: Invalid section type '{section['type']}'")
# Validate duration
if 'duration' in section:
try:
duration = int(section['duration'])
if duration <= 0:
errors.append(f"{section_prefix}: Duration must be positive")
except (TypeError, ValueError):
errors.append(f"{section_prefix}: Duration must be a number")
# Validate questions if present
if 'questions' in section and section['questions']:
if not isinstance(section['questions'], list):
errors.append(f"{section_prefix}: 'questions' must be a list")
else:
for j, question in enumerate(section['questions']):
question_errors = DiscussionGuideValidator._validate_question(question, index, j)
errors.extend(question_errors)
# Validate activities if present
if 'activities' in section and section['activities']:
if not isinstance(section['activities'], list):
errors.append(f"{section_prefix}: 'activities' must be a list")
else:
for j, activity in enumerate(section['activities']):
activity_errors = DiscussionGuideValidator._validate_activity(activity, index, j)
errors.extend(activity_errors)
# Validate subsections if present
if 'subsections' in section and section['subsections']:
if not isinstance(section['subsections'], list):
errors.append(f"{section_prefix}: 'subsections' must be a list")
else:
for j, subsection in enumerate(section['subsections']):
subsection_errors = DiscussionGuideValidator._validate_subsection(subsection, index, j)
errors.extend(subsection_errors)
return errors
@staticmethod
def _validate_question(question: Dict[str, Any], section_index: int, question_index: int) -> List[str]:
"""Validate a single question."""
errors = []
question_prefix = f"Section {section_index + 1}, Question {question_index + 1}"
# Check required question fields
required_fields = ['id', 'type', 'content']
for field in required_fields:
if field not in question:
errors.append(f"{question_prefix}: Missing required field '{field}'")
# Validate question type (accept any string)
if 'type' in question and not isinstance(question['type'], str):
errors.append(f"{question_prefix}: Question type must be a string")
# Validate content
if 'content' in question and not isinstance(question['content'], str):
errors.append(f"{question_prefix}: 'content' must be a string")
return errors
@staticmethod
def _validate_activity(activity: Dict[str, Any], section_index: int, activity_index: int) -> List[str]:
"""Validate a single activity."""
errors = []
activity_prefix = f"Section {section_index + 1}, Activity {activity_index + 1}"
# Check required activity fields
required_fields = ['id', 'type', 'content']
for field in required_fields:
if field not in activity:
errors.append(f"{activity_prefix}: Missing required field '{field}'")
# Validate activity type (accept any string)
if 'type' in activity and not isinstance(activity['type'], str):
errors.append(f"{activity_prefix}: Activity type must be a string")
return errors
@staticmethod
def _validate_subsection(subsection: Dict[str, Any], section_index: int, subsection_index: int) -> List[str]:
"""Validate a single subsection."""
errors = []
subsection_prefix = f"Section {section_index + 1}, Subsection {subsection_index + 1}"
# Check required subsection fields
required_fields = ['id', 'title', 'duration', 'questions']
for field in required_fields:
if field not in subsection:
errors.append(f"{subsection_prefix}: Missing required field '{field}'")
# Validate questions
if 'questions' in subsection and subsection['questions']:
if not isinstance(subsection['questions'], list):
errors.append(f"{subsection_prefix}: 'questions' must be a list")
else:
for j, question in enumerate(subsection['questions']):
question_errors = DiscussionGuideValidator._validate_question(question, section_index, j)
errors.extend(question_errors)
return errors
@staticmethod
def create_fallback_structure(title: str, duration: int, content: str) -> Dict[str, Any]:
"""
Create a fallback discussion guide structure if JSON generation fails.
Args:
title: The focus group title
duration: Total duration in minutes
content: Raw content to structure
Returns:
A basic valid discussion guide structure
"""
# Calculate section durations
intro_duration = max(5, int(duration * 0.1))
warmup_duration = max(5, int(duration * 0.15))
main_duration = max(20, int(duration * 0.6))
conclusion_duration = max(5, int(duration * 0.15))
return {
"title": title,
"total_duration": duration,
"sections": [
{
"id": "introduction",
"title": "Introduction",
"duration": intro_duration,
"type": "introduction",
"activities": [
{
"id": "welcome",
"type": "moderator_statement",
"content": f"Welcome everyone to our focus group on {title}. Let's begin by introducing ourselves and the purpose of today's discussion."
}
]
},
{
"id": "warmup",
"title": "Warm-up Questions",
"duration": warmup_duration,
"type": "warmup",
"questions": [
{
"id": "intro_question",
"type": "open_question",
"content": "Let's start with brief introductions. Please share your name and one thing you're excited about today."
}
]
},
{
"id": "main_discussion",
"title": "Main Discussion",
"duration": main_duration,
"type": "main_content",
"questions": [
{
"id": "main_question",
"type": "open_question",
"content": "What are your initial thoughts on the topic we're discussing today?"
}
]
},
{
"id": "conclusion",
"title": "Conclusion",
"duration": conclusion_duration,
"type": "conclusion",
"questions": [
{
"id": "final_thoughts",
"type": "open_question",
"content": "Before we wrap up, are there any final thoughts or comments you'd like to share?"
}
]
}
]
}
@staticmethod
def parse_from_json(json_string: str) -> tuple[Optional[StructuredDiscussionGuide], List[str]]:
"""
Parse a JSON string into a StructuredDiscussionGuide object.
Args:
json_string: The JSON string to parse
Returns:
Tuple of (parsed_guide, list_of_errors)
"""
try:
guide_json = json.loads(json_string)
except json.JSONDecodeError as e:
return None, [f"Invalid JSON: {str(e)}"]
# Validate structure
is_valid, errors = DiscussionGuideValidator.validate_json_structure(guide_json)
if not is_valid:
return None, errors
try:
# Convert to structured objects
sections = []
for section_data in guide_json['sections']:
section = DiscussionGuideValidator._parse_section(section_data)
sections.append(section)
guide = StructuredDiscussionGuide(
title=guide_json['title'],
total_duration=guide_json['total_duration'],
sections=sections,
metadata=guide_json.get('metadata')
)
return guide, []
except Exception as e:
return None, [f"Error parsing structure: {str(e)}"]
@staticmethod
def _parse_section(section_data: Dict[str, Any]) -> DiscussionGuideSection:
"""Parse a section from JSON data."""
questions = []
if section_data.get('questions'):
for q_data in section_data['questions']:
question = DiscussionGuideQuestion(
id=q_data['id'],
type=q_data['type'],
content=q_data['content'],
time_limit=q_data.get('time_limit'),
probes=q_data.get('probes'),
metadata=q_data.get('metadata')
)
questions.append(question)
activities = []
if section_data.get('activities'):
for a_data in section_data['activities']:
activity = DiscussionGuideActivity(
id=a_data['id'],
type=a_data['type'],
content=a_data['content'],
time_limit=a_data.get('time_limit'),
metadata=a_data.get('metadata')
)
activities.append(activity)
subsections = []
if section_data.get('subsections'):
for s_data in section_data['subsections']:
subsection_questions = []
for q_data in s_data.get('questions', []):
question = DiscussionGuideQuestion(
id=q_data['id'],
type=q_data['type'],
content=q_data['content'],
time_limit=q_data.get('time_limit'),
probes=q_data.get('probes'),
metadata=q_data.get('metadata')
)
subsection_questions.append(question)
subsection_activities = []
if s_data.get('activities'):
for a_data in s_data['activities']:
activity = DiscussionGuideActivity(
id=a_data['id'],
type=a_data['type'],
content=a_data['content'],
time_limit=a_data.get('time_limit'),
metadata=a_data.get('metadata')
)
subsection_activities.append(activity)
subsection = DiscussionGuideSubsection(
id=s_data['id'],
title=s_data['title'],
duration=s_data['duration'],
questions=subsection_questions,
activities=subsection_activities if subsection_activities else None,
metadata=s_data.get('metadata')
)
subsections.append(subsection)
return DiscussionGuideSection(
id=section_data['id'],
title=section_data['title'],
duration=section_data['duration'],
type=section_data['type'],
content=section_data.get('content'),
questions=questions if questions else None,
activities=activities if activities else None,
subsections=subsections if subsections else None,
metadata=section_data.get('metadata')
)