1581 lines
70 KiB
Python
1581 lines
70 KiB
Python
from google import genai
|
|
import mimetypes
|
|
import time
|
|
import os
|
|
import logging
|
|
import requests
|
|
import json
|
|
import datetime
|
|
import base64
|
|
from typing import Dict, Any, Optional, List, Tuple
|
|
from dotenv import load_dotenv
|
|
from video_splitter import VideoSplitter
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
import threading
|
|
|
|
# Load environment variables from .env file
|
|
load_dotenv()
|
|
|
|
logger = logging.getLogger('video_query')
|
|
|
|
class VideoProcessor:
|
|
"""
|
|
Class to handle video uploads and processing with Gemini API.
|
|
"""
|
|
# Default prompts for different modes
|
|
PROMPTS = {
|
|
"meeting_summary": "Generate a detailed summary of the meeting in the attached video recording, including discussion points and action items with owners",
|
|
"process_documentation": "Generate detailed process documentation suitable for reference or training based on the process illustrated in the attached video recording. Write the documentation so that a new user will be able to follow step by step and accomplish the task illustrated in the video",
|
|
"documentation_with_charts": "Analyze this video to create comprehensive process documentation with workflow diagrams for a knowledge base article. Follow these requirements exactly:\n\n1. CONTENT REQUIREMENTS:\n - Provide a detailed step-by-step explanation of the process shown\n - Be extremely verbose and thorough - include all relevant details, context, and nuances\n - Structure as a complete knowledge base article with clear sections\n - Include overview, detailed steps, tips, and troubleshooting where applicable\n\n2. MERMAID DIAGRAM REQUIREMENTS:\n - Create workflow diagrams using valid Mermaid syntax where helpful\n - CRITICAL: Use only simple alphanumeric text in node descriptions and labels\n - CRITICAL: No special characters like quotes brackets colons semicolons or symbols in node text\n - CRITICAL: Use underscores instead of spaces in node IDs and labels\n - CRITICAL: Keep all text simple to avoid syntax errors\n - Example format: Start_Process --> Complete_Task --> End_Process\n - Use flowchart format: graph TD or graph LR\n\n3. OUTPUT STRUCTURE:\n - Title and overview section\n - Prerequisites section if applicable\n - Detailed step-by-step process\n - Mermaid workflow diagram(s) showing the process flow\n - Tips and best practices\n - Troubleshooting common issues\n\nEnsure all Mermaid diagrams use simple text without special characters to prevent parsing errors.",
|
|
"custom": "" # Custom prompt will be provided by the user
|
|
}
|
|
|
|
# Maximum video duration in minutes (Gemini limitation)
|
|
MAX_VIDEO_DURATION = 55
|
|
|
|
# Threshold for chunked upload (10MB)
|
|
CHUNKED_UPLOAD_THRESHOLD = 10 * 1024 * 1024
|
|
|
|
# Webhook URL for tracking usage
|
|
WEBHOOK_URL = "https://hook.us1.make.celonis.com/8ri1h8b2he4wudp2jku69mgcxumzxf3v"
|
|
|
|
# Parallel processing configuration
|
|
# Default max workers for parallel chunk processing
|
|
# Free tier: 5 RPM (use 3-4 workers to be safe)
|
|
# Paid tier: 150 RPM (can use more workers)
|
|
DEFAULT_MAX_WORKERS = 4 # Conservative default for free tier
|
|
|
|
# Model configuration
|
|
DEFAULT_PROCESSING_MODEL = "gemini-2.5-pro" # Model for individual video processing
|
|
DEFAULT_SYNTHESIS_MODEL = "gemini-2.5-pro" # Model for batch synthesis (updated for consistency)
|
|
|
|
def __init__(self, api_key: Optional[str] = None, max_parallel_chunks: int = None):
|
|
"""
|
|
Initialize with API key from environment variable or direct setting
|
|
|
|
Args:
|
|
api_key: Google API key for Gemini
|
|
max_parallel_chunks: Maximum number of chunks to process in parallel
|
|
(default: 4, recommended 3-4 for free tier, up to 10+ for paid tier)
|
|
"""
|
|
self.api_key = api_key or os.getenv("GOOGLE_API_KEY")
|
|
if not self.api_key:
|
|
logger.error("API key not provided")
|
|
raise ValueError("API key not provided - set GOOGLE_API_KEY environment variable or pass when initializing")
|
|
|
|
# Initialize the Gemini client
|
|
logger.info("Initializing Gemini API client")
|
|
self.client = genai.Client(api_key=self.api_key)
|
|
logger.info("Gemini API client initialized successfully")
|
|
|
|
# Set parallel processing configuration
|
|
self.max_parallel_chunks = max_parallel_chunks or self.DEFAULT_MAX_WORKERS
|
|
logger.info(f"Parallel processing enabled with max {self.max_parallel_chunks} concurrent chunks")
|
|
|
|
# Initialize video splitter
|
|
self.video_splitter = VideoSplitter()
|
|
|
|
# Thread lock for rate limiting
|
|
self._rate_limit_lock = threading.Lock()
|
|
|
|
# Load configuration from environment variables
|
|
self.processing_model = os.getenv("VIDEO_PROCESSOR_MODEL", self.DEFAULT_PROCESSING_MODEL)
|
|
self.synthesis_model = os.getenv("VIDEO_SYNTHESIS_MODEL", self.DEFAULT_SYNTHESIS_MODEL)
|
|
self.log_prompts = os.getenv("BATCH_PROCESSING_LOG_PROMPTS", "false").lower() == "true"
|
|
self.log_summaries = os.getenv("BATCH_PROCESSING_LOG_SUMMARIES", "false").lower() == "true"
|
|
|
|
logger.info(f"Configuration: processing_model={self.processing_model}, synthesis_model={self.synthesis_model}")
|
|
logger.info(f"Logging: prompts={self.log_prompts}, summaries={self.log_summaries}")
|
|
|
|
def send_usage_webhook(self, user_email: str, prompt: str) -> None:
|
|
"""
|
|
Send usage data to webhook for tracking purposes
|
|
|
|
Args:
|
|
user_email: Email of the user who processed the video
|
|
prompt: The prompt used for processing
|
|
"""
|
|
try:
|
|
current_datetime = datetime.datetime.now().isoformat()
|
|
|
|
webhook_data = {
|
|
"tool": "VIDEOQUERY",
|
|
"date": current_datetime,
|
|
"user": user_email,
|
|
"model": "GEMINI",
|
|
"settings": "no settings",
|
|
"subTool": "no subTool",
|
|
"prompt": prompt,
|
|
"negativePrompt": "no NEGATIVE_PROMPT",
|
|
"image": "no image"
|
|
}
|
|
|
|
logger.info(f"Sending usage data to webhook for user: {user_email}")
|
|
|
|
response = requests.post(
|
|
self.WEBHOOK_URL,
|
|
headers={"Content-Type": "application/json"},
|
|
data=json.dumps(webhook_data),
|
|
timeout=10 # 10 second timeout
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
logger.info("Successfully sent usage data to webhook")
|
|
else:
|
|
logger.warning(f"Webhook request failed with status code: {response.status_code}")
|
|
logger.warning(f"Response: {response.text}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error sending usage data to webhook: {str(e)}")
|
|
# Don't raise the exception - webhook failure shouldn't block the main flow
|
|
|
|
def process_video(self, video_path: str, prompt: str, user_email: str = "anonymous") -> Dict[str, Any]:
|
|
"""
|
|
Process a video with the given prompt using Gemini API
|
|
|
|
Args:
|
|
video_path: Path to the video file
|
|
prompt: Text prompt to use for video analysis
|
|
user_email: Email of the user processing the video (for usage tracking)
|
|
|
|
Returns:
|
|
Dictionary with processing result or error
|
|
"""
|
|
start_time = time.time()
|
|
|
|
result = {
|
|
"success": False,
|
|
"message": "",
|
|
"content": "",
|
|
"processing_time_seconds": 0
|
|
}
|
|
|
|
logger.info(f"Processing video: {video_path}")
|
|
logger.info(f"Prompt: {prompt[:100]}..." if len(prompt) > 100 else f"Prompt: {prompt}")
|
|
|
|
if not os.path.exists(video_path):
|
|
error_msg = f"Video file not found at '{video_path}'"
|
|
logger.error(error_msg)
|
|
result["message"] = error_msg
|
|
return result
|
|
|
|
try:
|
|
# Get file size
|
|
file_size = os.path.getsize(video_path)
|
|
file_size_mb = file_size / (1024 * 1024)
|
|
logger.info(f"File size: {file_size_mb:.2f} MB")
|
|
|
|
# Get video duration to detect too-short videos
|
|
try:
|
|
video_duration = self.video_splitter.get_video_duration(video_path)
|
|
if video_duration:
|
|
logger.info(f"Video duration: {video_duration:.2f} seconds ({video_duration/60:.2f} minutes)")
|
|
|
|
# Check if video is too short (< 1 second can cause INVALID_ARGUMENT)
|
|
if video_duration < 1.0:
|
|
error_msg = f"Video too short: {video_duration:.2f}s. Gemini requires videos >= 1 second"
|
|
logger.error(error_msg)
|
|
result["message"] = error_msg
|
|
return result
|
|
else:
|
|
logger.warning("Could not determine video duration")
|
|
except Exception as dur_err:
|
|
logger.warning(f"Error checking video duration: {str(dur_err)}")
|
|
|
|
# Determine MIME type for the video
|
|
mime_type, _ = mimetypes.guess_type(video_path)
|
|
if not mime_type:
|
|
logger.info(f"Could not determine MIME type, using default: video/mp4")
|
|
mime_type = "video/mp4" # Fallback
|
|
else:
|
|
logger.info(f"MIME type: {mime_type}")
|
|
|
|
# Validate MIME type is video
|
|
if not mime_type.startswith('video/'):
|
|
error_msg = f"Invalid file type: {mime_type}. Only video files are supported (mp4, avi, mov, mkv, webm, etc.)"
|
|
logger.error(error_msg)
|
|
result["message"] = error_msg
|
|
return result
|
|
|
|
# Validate video file is readable and not corrupted
|
|
# This provides a secondary check in case validation at upload didn't happen
|
|
try:
|
|
import subprocess
|
|
probe_result = subprocess.run(
|
|
['ffprobe', '-v', 'error', '-show_entries', 'format=duration,format_name',
|
|
'-of', 'default=noprint_wrappers=1', video_path],
|
|
capture_output=True, text=True, timeout=10
|
|
)
|
|
if probe_result.returncode != 0:
|
|
error_detail = probe_result.stderr.strip()
|
|
|
|
# Provide user-friendly error messages for common issues
|
|
if "moov atom not found" in error_detail.lower():
|
|
error_msg = "Video file is incomplete or corrupted (missing header). The file may not have uploaded completely. Please try uploading again."
|
|
elif "invalid data found" in error_detail.lower():
|
|
error_msg = "Video file contains invalid data and cannot be processed. Please check the file or try re-encoding it."
|
|
else:
|
|
error_msg = f"Video file appears to be corrupted or unreadable. Technical details: {error_detail}"
|
|
|
|
logger.error(f"Video validation failed: {error_msg}")
|
|
result["message"] = error_msg
|
|
return result
|
|
logger.info(f"Video file validation successful. Format info: {probe_result.stdout[:100]}")
|
|
except subprocess.TimeoutExpired:
|
|
logger.warning("Video validation timed out after 10s - proceeding anyway")
|
|
except FileNotFoundError:
|
|
logger.warning("ffprobe not found - skipping video file validation")
|
|
except Exception as val_err:
|
|
logger.warning(f"Could not validate video file: {str(val_err)} - proceeding anyway")
|
|
|
|
# Use different approach based on file size
|
|
# Small files (< 10MB): use inline base64 data (faster, no upload wait)
|
|
# Large files (>= 10MB): use file upload API (handles larger files)
|
|
# Note: Base64 adds ~37% overhead, so 10MB file = ~13.7MB base64
|
|
SIZE_THRESHOLD_MB = 10
|
|
uploaded_file = None
|
|
|
|
if file_size_mb < SIZE_THRESHOLD_MB:
|
|
# Small file: Use base64 encoding for inline data
|
|
logger.info(f"File < {SIZE_THRESHOLD_MB}MB, using inline base64 data")
|
|
with open(video_path, "rb") as video_file_obj:
|
|
video_data = video_file_obj.read()
|
|
video_base64 = base64.b64encode(video_data).decode('utf-8')
|
|
|
|
logger.info(f"Base64 encoding complete. Size: {len(video_base64)} characters")
|
|
|
|
# Create the content parts using inline data
|
|
prompt_parts = [
|
|
{"text": prompt},
|
|
{"inline_data": {
|
|
"mime_type": mime_type,
|
|
"data": video_base64
|
|
}}
|
|
]
|
|
else:
|
|
# Large file: Use file upload API
|
|
logger.info(f"File >= {SIZE_THRESHOLD_MB}MB, using file upload API")
|
|
upload_start = time.time()
|
|
|
|
uploaded_file = self.client.files.upload(
|
|
file=video_path
|
|
)
|
|
logger.info(f"Upload complete in {time.time() - upload_start:.1f}s. File URI: {uploaded_file.uri}")
|
|
logger.info(f"Initial file state: {uploaded_file.state}")
|
|
|
|
# Wait for file to be processed with timeout
|
|
max_wait_time = 300 # 5 minutes timeout
|
|
wait_start = time.time()
|
|
check_count = 0
|
|
|
|
while uploaded_file.state == "PROCESSING":
|
|
check_count += 1
|
|
elapsed = time.time() - wait_start
|
|
|
|
if elapsed > max_wait_time:
|
|
error_msg = f"File processing timeout after {max_wait_time}s. File may be too large or corrupted."
|
|
logger.error(error_msg)
|
|
# Try to delete the file
|
|
try:
|
|
self.client.files.delete(name=uploaded_file.name)
|
|
except:
|
|
pass
|
|
result["message"] = error_msg
|
|
return result
|
|
|
|
logger.info(f"File is still processing (check #{check_count}, {elapsed:.0f}s elapsed), waiting...")
|
|
time.sleep(3)
|
|
|
|
try:
|
|
uploaded_file = self.client.files.get(name=uploaded_file.name)
|
|
logger.info(f"Updated file state: {uploaded_file.state}")
|
|
except Exception as status_err:
|
|
logger.error(f"Error checking file status: {str(status_err)}")
|
|
# Wait a bit longer and try again
|
|
time.sleep(5)
|
|
try:
|
|
uploaded_file = self.client.files.get(name=uploaded_file.name)
|
|
except Exception as retry_err:
|
|
error_msg = f"Failed to check file upload status: {str(retry_err)}"
|
|
logger.error(error_msg)
|
|
result["message"] = error_msg
|
|
return result
|
|
|
|
if uploaded_file.state != "ACTIVE":
|
|
error_msg = f"File upload failed. State: {uploaded_file.state}"
|
|
logger.error(error_msg)
|
|
logger.error("This may indicate the video is corrupted, unsupported format, or contains invalid data")
|
|
result["message"] = error_msg
|
|
return result
|
|
|
|
logger.info("File is ACTIVE and ready for processing")
|
|
|
|
# Create content parts using file reference
|
|
prompt_parts = [
|
|
{"text": prompt},
|
|
{"file_data": {
|
|
"file_uri": uploaded_file.uri,
|
|
"mime_type": mime_type
|
|
}}
|
|
]
|
|
|
|
# Rate limiting: Wait to avoid hitting API limits
|
|
# Free tier: 5 RPM, so minimum 12 seconds between requests
|
|
with self._rate_limit_lock:
|
|
time.sleep(2) # 2 second delay between API calls
|
|
|
|
# Use the client to generate content with the new SDK API
|
|
logger.info("Sending prompt to Gemini for processing...")
|
|
api_start = time.time()
|
|
|
|
# Add retry logic for network failures
|
|
max_retries = 3
|
|
retry_delay = 5 # seconds
|
|
last_exception = None
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
response = self.client.models.generate_content(
|
|
model=self.processing_model,
|
|
contents=prompt_parts
|
|
)
|
|
# If successful, break out of retry loop
|
|
break
|
|
except Exception as e:
|
|
last_exception = e
|
|
error_str = str(e).lower()
|
|
|
|
# Log detailed error information for INVALID_ARGUMENT
|
|
if 'invalid_argument' in error_str or '400' in error_str:
|
|
logger.error("=" * 80)
|
|
logger.error("INVALID_ARGUMENT ERROR DETAILS:")
|
|
logger.error(f" Video path: {video_path}")
|
|
logger.error(f" File size: {file_size_mb:.2f} MB")
|
|
logger.error(f" MIME type: {mime_type}")
|
|
if 'video_duration' in locals():
|
|
logger.error(f" Duration: {video_duration:.2f}s ({video_duration/60:.2f} min)")
|
|
logger.error(f" Prompt length: {len(prompt)} characters")
|
|
logger.error(f" Upload method: {'File Upload API' if uploaded_file else 'Inline Base64'}")
|
|
if uploaded_file:
|
|
logger.error(f" File state: {uploaded_file.state}")
|
|
logger.error(f" File URI: {uploaded_file.uri}")
|
|
logger.error(f" Error message: {str(e)}")
|
|
logger.error("=" * 80)
|
|
|
|
# Check if it's a retryable network error
|
|
if any(err in error_str for err in ['name resolution', 'connection', 'timeout', 'network']):
|
|
if attempt < max_retries - 1:
|
|
logger.warning(f"Network error on attempt {attempt + 1}/{max_retries}: {str(e)}")
|
|
logger.info(f"Retrying in {retry_delay} seconds...")
|
|
time.sleep(retry_delay)
|
|
continue
|
|
else:
|
|
logger.error(f"All {max_retries} attempts failed with network errors")
|
|
raise
|
|
else:
|
|
# Non-retryable error, raise immediately
|
|
logger.error(f"Non-retryable error: {str(e)}")
|
|
raise
|
|
|
|
api_time = time.time() - api_start
|
|
logger.info(f"Received response from Gemini (API call took {api_time:.1f}s)")
|
|
|
|
# Extract the response content
|
|
content = ""
|
|
if response.parts:
|
|
logger.info(f"Response has {len(response.parts)} parts")
|
|
for i, part in enumerate(response.parts):
|
|
if hasattr(part, 'text'):
|
|
part_text = part.text
|
|
content_preview = part_text[:100] + '...' if len(part_text) > 100 else part_text
|
|
logger.info(f"Part {i} (text): {content_preview}")
|
|
content += part_text
|
|
else:
|
|
logger.info(f"Part {i} (no text): {type(part)}")
|
|
else:
|
|
logger.warning("No parts in response")
|
|
if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
|
|
logger.warning(f"Prompt feedback: {response.prompt_feedback}")
|
|
|
|
# Set success result
|
|
result["success"] = True
|
|
result["content"] = content
|
|
result["processing_time_seconds"] = round(time.time() - start_time, 2)
|
|
logger.info(f"Processed result with {len(content)} characters")
|
|
logger.info(f"Total processing time: {result['processing_time_seconds']}s")
|
|
|
|
# Send usage data to webhook for tracking
|
|
self.send_usage_webhook(user_email, prompt)
|
|
|
|
# Clean up uploaded file if it was used
|
|
if uploaded_file:
|
|
try:
|
|
logger.info(f"Deleting uploaded file: {uploaded_file.name}")
|
|
self.client.files.delete(name=uploaded_file.name)
|
|
logger.info("File deleted successfully from Gemini storage")
|
|
except Exception as del_err:
|
|
logger.warning(f"Could not delete file from Gemini storage: {str(del_err)}")
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
import traceback
|
|
error_details = traceback.format_exc()
|
|
logger.error(f"Error processing video: {str(e)}")
|
|
logger.error(error_details)
|
|
result["message"] = f"Error processing video: {str(e)}"
|
|
result["error_details"] = error_details
|
|
return result
|
|
|
|
def combine_chunk_responses(self, responses: List[str], prompt: str,
|
|
num_chunks: int) -> str:
|
|
"""
|
|
Intelligently combine responses from multiple video chunks.
|
|
|
|
Args:
|
|
responses: List of response texts from each chunk
|
|
prompt: Original prompt used for processing
|
|
num_chunks: Total number of chunks processed
|
|
|
|
Returns:
|
|
Combined response text
|
|
"""
|
|
logger.info(f"Combining {len(responses)} chunk responses")
|
|
|
|
# Detect the prompt type to determine combination strategy
|
|
prompt_lower = prompt.lower()
|
|
is_meeting_summary = "meeting" in prompt_lower or "summary" in prompt_lower
|
|
is_documentation = "documentation" in prompt_lower or "process" in prompt_lower
|
|
is_with_charts = "mermaid" in prompt_lower or "diagram" in prompt_lower or "chart" in prompt_lower
|
|
|
|
if is_with_charts:
|
|
return self._combine_with_charts(responses, num_chunks)
|
|
elif is_meeting_summary:
|
|
return self._combine_meeting_summary(responses, num_chunks)
|
|
elif is_documentation:
|
|
return self._combine_documentation(responses, num_chunks)
|
|
else:
|
|
return self._combine_generic(responses, num_chunks)
|
|
|
|
def _combine_generic(self, responses: List[str], num_chunks: int) -> str:
|
|
"""Generic combination: simple sequential joining with section headers."""
|
|
logger.info("Using generic combination strategy")
|
|
combined = []
|
|
|
|
combined.append(f"# Complete Video Analysis\n")
|
|
combined.append(f"*This video was processed in {num_chunks} parts due to its length.*\n")
|
|
|
|
for i, response in enumerate(responses, 1):
|
|
combined.append(f"\n## Part {i} of {num_chunks}\n")
|
|
combined.append(response.strip())
|
|
|
|
return "\n".join(combined)
|
|
|
|
def _combine_meeting_summary(self, responses: List[str], num_chunks: int) -> str:
|
|
"""Combination strategy optimized for meeting summaries."""
|
|
logger.info("Using meeting summary combination strategy")
|
|
|
|
# First, try to synthesize the segments into a unified summary
|
|
try:
|
|
logger.info("Attempting to synthesize segments into unified meeting summary")
|
|
synthesized = self._synthesize_meeting_segments(responses, num_chunks)
|
|
if synthesized:
|
|
return synthesized
|
|
else:
|
|
logger.warning("Synthesis failed, falling back to segment concatenation")
|
|
except Exception as e:
|
|
logger.warning(f"Error during synthesis: {e}, falling back to segment concatenation")
|
|
|
|
# Fallback: simple concatenation with formatting
|
|
combined = []
|
|
|
|
combined.append(f"# Complete Meeting Recording Summary\n")
|
|
combined.append(f"*This recording was analyzed in {num_chunks} segments.*\n")
|
|
combined.append(f"\n---\n")
|
|
|
|
# Combine all discussion points with clear time markers
|
|
for i, response in enumerate(responses, 1):
|
|
time_range = self._format_time_range(i, num_chunks)
|
|
combined.append(f"\n## Segment {i}: {time_range}\n")
|
|
combined.append(response.strip())
|
|
combined.append(f"\n---\n")
|
|
|
|
# Add consolidated note
|
|
combined.append(f"\n### Notes")
|
|
combined.append(f"- Review all segments above for discussion points and action items")
|
|
combined.append(f"- Total recording duration: ~{num_chunks * 50} minutes")
|
|
combined.append(f"- Recording was split into {num_chunks} segments for analysis")
|
|
|
|
return "\n".join(combined)
|
|
|
|
def _synthesize_meeting_segments(self, responses: List[str], num_chunks: int) -> Optional[str]:
|
|
"""
|
|
Use AI to synthesize multiple segment summaries into one unified meeting summary.
|
|
|
|
Args:
|
|
responses: List of segment summaries
|
|
num_chunks: Number of segments
|
|
|
|
Returns:
|
|
Unified meeting summary or None if synthesis fails
|
|
"""
|
|
try:
|
|
# Prepare the segments for synthesis
|
|
segments_text = ""
|
|
for i, response in enumerate(responses, 1):
|
|
time_range = self._format_time_range(i, num_chunks)
|
|
segments_text += f"\n\n### Segment {i} ({time_range}):\n{response.strip()}\n"
|
|
|
|
# Create synthesis prompt
|
|
synthesis_prompt = f"""You are analyzing a meeting recording that was split into {num_chunks} segments due to its length. Below are the summaries from each segment. Your task is to create ONE unified, comprehensive meeting summary that integrates all the information.
|
|
|
|
SEGMENT SUMMARIES:
|
|
{segments_text}
|
|
|
|
Please provide a SINGLE, UNIFIED meeting summary that:
|
|
1. Combines all discussion points into one cohesive narrative (not separated by segments)
|
|
2. Consolidates all action items into one master list (removing duplicates if any)
|
|
3. Identifies main themes and outcomes across the entire meeting
|
|
4. Maintains chronological flow where relevant
|
|
5. Uses clear sections: Meeting Summary, Discussion Points, Action Items (with owners)
|
|
|
|
Format the output as a professional meeting summary document. Do not reference the segments in your output - write as if this was analyzed as one continuous meeting."""
|
|
|
|
logger.info("Sending synthesis request to Gemini")
|
|
|
|
# Add retry logic for network failures
|
|
max_retries = 3
|
|
retry_delay = 5
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
synthesis_response = self.client.models.generate_content(
|
|
model=self.synthesis_model,
|
|
contents=synthesis_prompt
|
|
)
|
|
break
|
|
except Exception as e:
|
|
error_str = str(e).lower()
|
|
if any(err in error_str for err in ['name resolution', 'connection', 'timeout', 'network']):
|
|
if attempt < max_retries - 1:
|
|
logger.warning(f"Network error during synthesis (attempt {attempt + 1}/{max_retries}): {str(e)}")
|
|
logger.info(f"Retrying in {retry_delay} seconds...")
|
|
time.sleep(retry_delay)
|
|
continue
|
|
else:
|
|
logger.error(f"Synthesis failed after {max_retries} attempts")
|
|
raise
|
|
else:
|
|
raise
|
|
|
|
if synthesis_response.parts:
|
|
synthesized_content = ""
|
|
for part in synthesis_response.parts:
|
|
if hasattr(part, 'text'):
|
|
synthesized_content += part.text
|
|
|
|
if synthesized_content:
|
|
logger.info("Successfully synthesized unified meeting summary")
|
|
# Add header noting this was synthesized
|
|
final_output = "# Meeting Summary\n\n"
|
|
final_output += f"*Synthesized from {num_chunks}-segment analysis*\n\n"
|
|
final_output += "---\n\n"
|
|
final_output += synthesized_content
|
|
return final_output
|
|
|
|
logger.warning("No content in synthesis response")
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during synthesis: {str(e)}")
|
|
return None
|
|
|
|
def _combine_documentation(self, responses: List[str], num_chunks: int) -> str:
|
|
"""Combination strategy optimized for process documentation."""
|
|
logger.info("Using documentation combination strategy")
|
|
combined = []
|
|
|
|
combined.append(f"# Complete Process Documentation\n")
|
|
combined.append(f"*This process was documented from a {num_chunks}-part video recording.*\n")
|
|
|
|
combined.append(f"\n## Overview\n")
|
|
combined.append(f"This documentation covers the complete process shown in the video. "
|
|
f"The content has been organized sequentially across all segments.\n")
|
|
|
|
for i, response in enumerate(responses, 1):
|
|
combined.append(f"\n## Section {i}: {self._format_time_range(i, num_chunks)}\n")
|
|
combined.append(response.strip())
|
|
|
|
combined.append(f"\n\n---\n*End of documentation*")
|
|
|
|
return "\n".join(combined)
|
|
|
|
def _combine_with_charts(self, responses: List[str], num_chunks: int) -> str:
|
|
"""Combination strategy for documentation with Mermaid diagrams."""
|
|
logger.info("Using documentation with charts combination strategy")
|
|
combined = []
|
|
|
|
combined.append(f"# Complete Process Documentation with Workflow Diagrams\n")
|
|
combined.append(f"*This analysis spans {num_chunks} video segments.*\n")
|
|
|
|
# First, add all text content
|
|
combined.append(f"\n## Overview and Detailed Steps\n")
|
|
|
|
for i, response in enumerate(responses, 1):
|
|
combined.append(f"\n### Part {i}: {self._format_time_range(i, num_chunks)}\n")
|
|
|
|
# Separate mermaid diagrams from text content
|
|
parts = response.split("```mermaid")
|
|
text_part = parts[0].strip()
|
|
combined.append(text_part)
|
|
|
|
# Add mermaid diagrams in a dedicated section
|
|
if len(parts) > 1:
|
|
for j, diagram_part in enumerate(parts[1:], 1):
|
|
if "```" in diagram_part:
|
|
diagram_code = diagram_part.split("```")[0]
|
|
combined.append(f"\n**Workflow Diagram {i}.{j}:**\n")
|
|
combined.append(f"```mermaid{diagram_code}```\n")
|
|
|
|
# Add any remaining text after the diagram
|
|
remaining_text = "```".join(diagram_part.split("```")[1:]).strip()
|
|
if remaining_text:
|
|
combined.append(remaining_text)
|
|
|
|
combined.append(f"\n\n---\n*Complete documentation generated from {num_chunks}-part video analysis*")
|
|
|
|
return "\n".join(combined)
|
|
|
|
def _format_time_range(self, part_num: int, total_parts: int,
|
|
chunk_duration: int = 50) -> str:
|
|
"""Format time range for a video part."""
|
|
start_min = (part_num - 1) * chunk_duration
|
|
end_min = part_num * chunk_duration if part_num < total_parts else "End"
|
|
|
|
if isinstance(end_min, int):
|
|
return f"{start_min}-{end_min} minutes"
|
|
else:
|
|
return f"{start_min}+ minutes"
|
|
|
|
def _process_single_chunk(self, chunk_info: Tuple[int, str, str, int, str]) -> Tuple[int, Dict[str, Any]]:
|
|
"""
|
|
Process a single video chunk. Used for parallel processing.
|
|
|
|
Args:
|
|
chunk_info: Tuple of (chunk_index, chunk_path, chunk_prompt, total_chunks, user_email)
|
|
|
|
Returns:
|
|
Tuple of (chunk_index, result_dict)
|
|
"""
|
|
chunk_index, chunk_path, chunk_prompt, total_chunks, user_email = chunk_info
|
|
|
|
logger.info(f"[Parallel] Processing chunk {chunk_index + 1}/{total_chunks}: {chunk_path}")
|
|
|
|
try:
|
|
chunk_result = self.process_video(chunk_path, chunk_prompt, user_email)
|
|
logger.info(f"[Parallel] Completed chunk {chunk_index + 1}/{total_chunks}")
|
|
return (chunk_index, chunk_result)
|
|
except Exception as e:
|
|
logger.error(f"[Parallel] Error processing chunk {chunk_index + 1}/{total_chunks}: {str(e)}")
|
|
return (chunk_index, {
|
|
"success": False,
|
|
"message": f"Error processing chunk {chunk_index + 1}: {str(e)}",
|
|
"content": ""
|
|
})
|
|
|
|
def _process_chunks_parallel(self, chunk_paths: List[str], prompt: str,
|
|
user_email: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Process multiple video chunks in parallel using ThreadPoolExecutor.
|
|
|
|
Args:
|
|
chunk_paths: List of paths to video chunk files
|
|
prompt: Original prompt for video analysis
|
|
user_email: User email for tracking
|
|
|
|
Returns:
|
|
List of result dictionaries in order of chunks
|
|
"""
|
|
num_chunks = len(chunk_paths)
|
|
logger.info(f"Starting parallel processing of {num_chunks} chunks with {self.max_parallel_chunks} workers")
|
|
|
|
# Prepare chunk information for parallel processing
|
|
chunk_infos = []
|
|
for i, chunk_path in enumerate(chunk_paths):
|
|
chunk_prompt = self._create_chunk_prompt(prompt, i + 1, num_chunks)
|
|
chunk_infos.append((i, chunk_path, chunk_prompt, num_chunks, user_email))
|
|
|
|
# Process chunks in parallel
|
|
results = [None] * num_chunks # Pre-allocate results list to maintain order
|
|
|
|
with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor:
|
|
# Submit all chunks for processing
|
|
future_to_chunk = {
|
|
executor.submit(self._process_single_chunk, chunk_info): chunk_info[0]
|
|
for chunk_info in chunk_infos
|
|
}
|
|
|
|
# Collect results as they complete
|
|
completed = 0
|
|
for future in as_completed(future_to_chunk):
|
|
chunk_index = future_to_chunk[future]
|
|
try:
|
|
chunk_index, result = future.result()
|
|
results[chunk_index] = result
|
|
completed += 1
|
|
logger.info(f"[Parallel] Progress: {completed}/{num_chunks} chunks completed")
|
|
except Exception as e:
|
|
logger.error(f"[Parallel] Unexpected error for chunk {chunk_index + 1}: {str(e)}")
|
|
results[chunk_index] = {
|
|
"success": False,
|
|
"message": f"Unexpected error: {str(e)}",
|
|
"content": ""
|
|
}
|
|
|
|
logger.info(f"[Parallel] All {num_chunks} chunks processed")
|
|
return results
|
|
|
|
def process_long_video(self, video_path: str, prompt: str,
|
|
user_email: str = "anonymous", use_parallel: bool = True) -> Dict[str, Any]:
|
|
"""
|
|
Process a long video by splitting it into chunks and combining the results.
|
|
Supports both parallel and sequential processing.
|
|
|
|
Args:
|
|
video_path: Path to the video file
|
|
prompt: Text prompt to use for video analysis
|
|
user_email: Email of the user processing the video (for usage tracking)
|
|
use_parallel: If True, process chunks in parallel; if False, process sequentially
|
|
|
|
Returns:
|
|
Dictionary with processing result or error
|
|
"""
|
|
start_time = time.time()
|
|
|
|
result = {
|
|
"success": False,
|
|
"message": "",
|
|
"content": "",
|
|
"chunks_processed": 0,
|
|
"processing_mode": "parallel" if use_parallel else "sequential",
|
|
"processing_time_seconds": 0
|
|
}
|
|
|
|
chunk_paths = []
|
|
|
|
try:
|
|
# Check if video needs splitting
|
|
num_chunks, duration_minutes = self.video_splitter.get_chunk_info(video_path)
|
|
|
|
if num_chunks <= 1:
|
|
logger.info("Video does not need splitting, processing normally")
|
|
return self.process_video(video_path, prompt, user_email)
|
|
|
|
logger.info(f"Long video detected: {duration_minutes:.2f} minutes, will be split into {num_chunks} chunks")
|
|
logger.info(f"Processing mode: {'PARALLEL' if use_parallel else 'SEQUENTIAL'}")
|
|
|
|
# Split the video
|
|
logger.info("Starting video splitting...")
|
|
chunk_paths = self.video_splitter.split_video(video_path)
|
|
logger.info(f"Video split into {len(chunk_paths)} chunks successfully")
|
|
|
|
# Process chunks (parallel or sequential)
|
|
if use_parallel:
|
|
# Parallel processing using ThreadPoolExecutor
|
|
chunk_results = self._process_chunks_parallel(chunk_paths, prompt, user_email)
|
|
else:
|
|
# Sequential processing (original logic)
|
|
chunk_results = []
|
|
for i, chunk_path in enumerate(chunk_paths, 1):
|
|
logger.info(f"[Sequential] Processing chunk {i}/{len(chunk_paths)}: {chunk_path}")
|
|
|
|
# Modify prompt to indicate this is part of a multi-part video
|
|
chunk_prompt = self._create_chunk_prompt(prompt, i, len(chunk_paths))
|
|
|
|
# Process this chunk
|
|
chunk_result = self.process_video(chunk_path, chunk_prompt, user_email)
|
|
chunk_results.append(chunk_result)
|
|
|
|
logger.info(f"[Sequential] Completed chunk {i}/{len(chunk_paths)}")
|
|
|
|
# Check for failures in any chunk
|
|
chunk_responses = []
|
|
for i, chunk_result in enumerate(chunk_results, 1):
|
|
if not chunk_result["success"]:
|
|
error_msg = f"Failed to process chunk {i}/{len(chunk_paths)}: {chunk_result.get('message', 'Unknown error')}"
|
|
logger.error(error_msg)
|
|
result["message"] = error_msg
|
|
result["chunks_processed"] = i - 1
|
|
return result
|
|
|
|
chunk_responses.append(chunk_result["content"])
|
|
|
|
# Combine all responses
|
|
logger.info("Combining responses from all chunks...")
|
|
combined_content = self.combine_chunk_responses(
|
|
chunk_responses,
|
|
prompt,
|
|
len(chunk_paths)
|
|
)
|
|
|
|
result["success"] = True
|
|
result["content"] = combined_content
|
|
result["chunks_processed"] = len(chunk_paths)
|
|
result["processing_time_seconds"] = round(time.time() - start_time, 2)
|
|
result["message"] = f"Successfully processed video in {len(chunk_paths)} chunks"
|
|
|
|
logger.info(f"Long video processing completed successfully: {len(chunk_paths)} chunks")
|
|
logger.info(f"Total processing time: {result['processing_time_seconds']}s")
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
import traceback
|
|
error_details = traceback.format_exc()
|
|
logger.error(f"Error processing long video: {str(e)}")
|
|
logger.error(error_details)
|
|
result["message"] = f"Error processing long video: {str(e)}"
|
|
result["error_details"] = error_details
|
|
return result
|
|
|
|
finally:
|
|
# Always clean up chunk files
|
|
if chunk_paths:
|
|
logger.info("Cleaning up temporary chunk files...")
|
|
self.video_splitter.cleanup_chunks(chunk_paths)
|
|
|
|
def _create_chunk_prompt(self, original_prompt: str, chunk_num: int,
|
|
total_chunks: int) -> str:
|
|
"""
|
|
Create a prompt for a video chunk that provides context about its position.
|
|
|
|
Args:
|
|
original_prompt: The original user prompt
|
|
chunk_num: Current chunk number (1-indexed)
|
|
total_chunks: Total number of chunks
|
|
|
|
Returns:
|
|
Modified prompt for the chunk
|
|
"""
|
|
# For meeting summaries, modify the prompt to focus on just summarizing what's in this segment
|
|
prompt_lower = original_prompt.lower()
|
|
is_meeting = "meeting" in prompt_lower
|
|
|
|
if is_meeting:
|
|
# For meetings, ask for a partial summary of this segment only
|
|
if chunk_num == 1:
|
|
context = f"[SEGMENT {chunk_num} of {total_chunks} - First 50 minutes] "
|
|
context += "Provide a summary of the discussion points and any action items covered in THIS segment only. "
|
|
context += "Do not try to provide a complete meeting summary - just summarize what happens in this part. "
|
|
elif chunk_num == total_chunks:
|
|
context = f"[SEGMENT {chunk_num} of {total_chunks} - Final segment] "
|
|
context += "Provide a summary of the discussion points and any action items covered in THIS final segment only. "
|
|
context += "This continues from previous segments, but only summarize what happens in this part. "
|
|
else:
|
|
context = f"[SEGMENT {chunk_num} of {total_chunks} - Middle segment] "
|
|
context += "Provide a summary of the discussion points and any action items covered in THIS segment only. "
|
|
context += "This is a middle portion of a longer recording - only summarize what happens in this part. "
|
|
|
|
return context + original_prompt
|
|
else:
|
|
# For other types, use the original approach
|
|
context = f"[PART {chunk_num} of {total_chunks}] "
|
|
|
|
if chunk_num == 1:
|
|
context += "This is the first segment of a longer video. "
|
|
elif chunk_num == total_chunks:
|
|
context += "This is the final segment continuing from previous parts. "
|
|
else:
|
|
context += "This is a middle segment continuing from previous parts. "
|
|
|
|
return context + original_prompt
|
|
|
|
def process_video_auto(self, video_path: str, prompt: str,
|
|
user_email: str = "anonymous") -> Dict[str, Any]:
|
|
"""
|
|
Automatically process a video, handling both short and long videos.
|
|
This method detects if the video needs splitting and processes accordingly.
|
|
|
|
Args:
|
|
video_path: Path to the video file
|
|
prompt: Text prompt to use for video analysis
|
|
user_email: Email of the user processing the video (for usage tracking)
|
|
|
|
Returns:
|
|
Dictionary with processing result or error
|
|
"""
|
|
logger.info(f"Auto-processing video: {video_path}")
|
|
|
|
# Check if video needs splitting
|
|
if self.video_splitter.needs_splitting(video_path):
|
|
logger.info("Video requires splitting, using long video processing")
|
|
return self.process_long_video(video_path, prompt, user_email)
|
|
else:
|
|
logger.info("Video is within single-chunk limit, using standard processing")
|
|
return self.process_video(video_path, prompt, user_email)
|
|
|
|
def process_video_batch(self, video_paths: List[str], filenames: List[str],
|
|
prompt: str, user_email: str, batch_id: str) -> Dict[str, Any]:
|
|
"""
|
|
Process multiple videos as a continuous batch.
|
|
Videos are treated as sequential segments of one long video.
|
|
|
|
Args:
|
|
video_paths: List of paths to video files (in order)
|
|
filenames: List of original filenames
|
|
prompt: Text prompt to use for analysis
|
|
user_email: Email of the user
|
|
batch_id: Unique identifier for this batch
|
|
|
|
Returns:
|
|
Dictionary with unified result
|
|
"""
|
|
logger.info(f"Batch {batch_id}: Processing {len(video_paths)} videos")
|
|
|
|
try:
|
|
# Get durations and calculate total
|
|
video_infos = []
|
|
total_duration_seconds = 0
|
|
|
|
for video_path, filename in zip(video_paths, filenames):
|
|
try:
|
|
duration = self.video_splitter.get_video_duration(video_path)
|
|
video_infos.append({
|
|
'path': video_path,
|
|
'filename': filename,
|
|
'duration': duration
|
|
})
|
|
total_duration_seconds += duration
|
|
logger.info(f"Batch {batch_id}: {filename} - {duration/60:.1f} minutes")
|
|
except Exception as e:
|
|
logger.error(f"Batch {batch_id}: Failed to get duration for {filename}: {e}")
|
|
raise
|
|
|
|
total_duration_minutes = total_duration_seconds / 60
|
|
chunk_duration_minutes = self.video_splitter.chunk_duration_seconds / 60
|
|
|
|
logger.info(f"Batch {batch_id}: Total duration: {total_duration_minutes:.1f} minutes")
|
|
|
|
# Decide processing strategy
|
|
needs_chunking = (
|
|
total_duration_minutes > chunk_duration_minutes or
|
|
any(v['duration'] > self.video_splitter.chunk_duration_seconds
|
|
for v in video_infos)
|
|
)
|
|
|
|
if needs_chunking:
|
|
logger.info(f"Batch {batch_id}: Using chunked processing")
|
|
return self._process_batch_with_chunking(
|
|
video_infos, prompt, user_email, batch_id
|
|
)
|
|
else:
|
|
logger.info(f"Batch {batch_id}: Processing directly (no chunking needed)")
|
|
return self._process_batch_direct(
|
|
video_infos, prompt, user_email, batch_id
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Batch {batch_id}: Processing error: {str(e)}")
|
|
raise
|
|
|
|
def _process_batch_direct(self, video_infos: List[Dict], prompt: str,
|
|
user_email: str, batch_id: str) -> Dict[str, Any]:
|
|
"""
|
|
Process batch directly without chunking (total duration < 54 minutes).
|
|
Uses two-stage synthesis even for short batches.
|
|
"""
|
|
logger.info(f"Batch {batch_id}: [Stage 1] Direct processing of {len(video_infos)} videos")
|
|
stage1_start = time.time()
|
|
|
|
# Process each video separately first (stage 1: summaries)
|
|
summaries = []
|
|
for i, video_info in enumerate(video_infos, 1):
|
|
logger.info(f"Batch {batch_id}: [Stage 1] Processing video {i}/{len(video_infos)}: {video_info['filename']}")
|
|
|
|
summary_prompt = self._create_chunk_summary_prompt(
|
|
original_prompt=prompt,
|
|
chunk_number=i,
|
|
total_chunks=len(video_infos),
|
|
video_name=video_info['filename']
|
|
)
|
|
|
|
# Log prompt if configured
|
|
if self.log_prompts:
|
|
logger.debug(f"Batch {batch_id}: [Stage 1] Prompt for video {i}:\n{summary_prompt[:300]}...")
|
|
|
|
try:
|
|
video_start = time.time()
|
|
result = self.process_video(video_info['path'], summary_prompt, user_email)
|
|
video_time = time.time() - video_start
|
|
summary = result.get('content', '')
|
|
summaries.append(summary)
|
|
|
|
logger.info(f"Batch {batch_id}: [Stage 1] Video {i} complete: {len(summary)} chars in {video_time:.2f}s")
|
|
|
|
# Log summary preview if configured
|
|
if self.log_summaries:
|
|
logger.debug(f"Batch {batch_id}: [Stage 1] Video {i} summary preview:\n{summary[:300]}...")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Batch {batch_id}: [Stage 1] Failed to process {video_info['filename']}: {e}")
|
|
summaries.append(f"[Error processing {video_info['filename']}: {str(e)}]")
|
|
|
|
stage1_time = time.time() - stage1_start
|
|
logger.info(f"Batch {batch_id}: [Stage 1] Complete - {len(summaries)} summaries in {stage1_time:.2f}s")
|
|
|
|
# Log traceability
|
|
logger.info(f"Batch {batch_id}: [Traceability] Video-to-summary mapping:")
|
|
for i, video_info in enumerate(video_infos, 1):
|
|
logger.info(f"Batch {batch_id}: - Video {i}: {video_info['filename']} → Summary {i}")
|
|
|
|
# Stage 2: Synthesize all summaries
|
|
stage2_start = time.time()
|
|
logger.info(f"Batch {batch_id}: [Stage 2] Synthesizing {len(summaries)} summaries")
|
|
chunk_metadata = [{'video_name': v['filename'], 'video_idx': i}
|
|
for i, v in enumerate(video_infos)]
|
|
|
|
final_content = self._synthesize_final_result(
|
|
summaries=summaries,
|
|
chunk_metadata=chunk_metadata,
|
|
original_prompt=prompt,
|
|
user_email=user_email
|
|
)
|
|
|
|
stage2_time = time.time() - stage2_start
|
|
total_time = stage1_time + stage2_time
|
|
|
|
# Log performance metrics
|
|
logger.info(f"Batch {batch_id}: [Metrics] Stage 1: {stage1_time:.2f}s, Stage 2: {stage2_time:.2f}s, Total: {total_time:.2f}s")
|
|
logger.info(f"Batch {batch_id}: [Metrics] Avg time per video: {stage1_time/len(video_infos):.2f}s")
|
|
|
|
return {
|
|
'content': final_content,
|
|
'total_chunks': len(video_infos),
|
|
'synthesis_stages': 2
|
|
}
|
|
def _process_batch_with_chunking(self, video_infos: List[Dict], prompt: str,
|
|
user_email: str, batch_id: str) -> Dict[str, Any]:
|
|
"""
|
|
Split batch videos into 54-min chunks and process with two-stage synthesis.
|
|
Treats all videos as one continuous sequence.
|
|
"""
|
|
logger.info(f"Batch {batch_id}: Starting chunked processing")
|
|
|
|
all_chunks = []
|
|
chunk_metadata = []
|
|
|
|
# Step 1: Split all videos into chunks
|
|
for video_idx, video_info in enumerate(video_infos):
|
|
video_path = video_info['path']
|
|
filename = video_info['filename']
|
|
duration = video_info['duration']
|
|
|
|
if duration > self.video_splitter.chunk_duration_seconds:
|
|
# Split long video
|
|
logger.info(f"Batch {batch_id}: Splitting {filename} ({duration/60:.1f} min)")
|
|
try:
|
|
chunks = self.video_splitter.split_video(video_path)
|
|
for chunk_idx, chunk_path in enumerate(chunks):
|
|
all_chunks.append(chunk_path)
|
|
chunk_metadata.append({
|
|
'video_idx': video_idx,
|
|
'video_name': filename,
|
|
'chunk_idx': chunk_idx,
|
|
'is_split': True
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"Batch {batch_id}: Failed to split {filename}: {e}")
|
|
raise
|
|
else:
|
|
# Use whole video as one chunk
|
|
all_chunks.append(video_path)
|
|
chunk_metadata.append({
|
|
'video_idx': video_idx,
|
|
'video_name': filename,
|
|
'chunk_idx': 0,
|
|
'is_split': False
|
|
})
|
|
|
|
logger.info(f"Batch {batch_id}: Split into {len(all_chunks)} total chunks")
|
|
|
|
# Step 2: Process chunks with two-stage synthesis
|
|
try:
|
|
result = self._process_chunks_two_stage(
|
|
all_chunks,
|
|
chunk_metadata,
|
|
prompt,
|
|
user_email,
|
|
batch_id
|
|
)
|
|
finally:
|
|
# Step 3: Cleanup split chunks (not original videos)
|
|
for chunk_path, metadata in zip(all_chunks, chunk_metadata):
|
|
if metadata['is_split']:
|
|
try:
|
|
if os.path.exists(chunk_path):
|
|
os.remove(chunk_path)
|
|
logger.info(f"Batch {batch_id}: Cleaned up chunk: {os.path.basename(chunk_path)}")
|
|
except Exception as e:
|
|
logger.warning(f"Batch {batch_id}: Cleanup failed for {chunk_path}: {e}")
|
|
|
|
return result
|
|
|
|
def _process_chunks_two_stage(self, chunk_paths: List[str], chunk_metadata: List[Dict],
|
|
prompt: str, user_email: str, batch_id: str) -> Dict[str, Any]:
|
|
"""
|
|
Two-stage synthesis:
|
|
Stage 1: Each chunk → concise summary
|
|
Stage 2: All summaries → final unified result
|
|
"""
|
|
logger.info(f"Batch {batch_id}: [Stage 1] Generating summaries for {len(chunk_paths)} chunks")
|
|
stage1_start = time.time()
|
|
|
|
chunk_summaries = []
|
|
|
|
if self.max_parallel_chunks > 1:
|
|
# Parallel processing
|
|
logger.info(f"Batch {batch_id}: [Stage 1] Using parallel processing with {self.max_parallel_chunks} workers")
|
|
with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor:
|
|
futures = []
|
|
|
|
for i, (chunk_path, metadata) in enumerate(zip(chunk_paths, chunk_metadata)):
|
|
summary_prompt = self._create_chunk_summary_prompt(
|
|
original_prompt=prompt,
|
|
chunk_number=i + 1,
|
|
total_chunks=len(chunk_paths),
|
|
video_name=metadata['video_name']
|
|
)
|
|
|
|
# Log prompt if configured
|
|
if self.log_prompts:
|
|
logger.debug(f"Batch {batch_id}: [Stage 1] Prompt for chunk {i+1} ({metadata['video_name']}):\n{summary_prompt[:300]}...")
|
|
|
|
future = executor.submit(
|
|
self._process_single_chunk,
|
|
(i, chunk_path, summary_prompt, len(chunk_paths), user_email)
|
|
)
|
|
futures.append(future)
|
|
|
|
# Collect results
|
|
completed_count = 0
|
|
for future in as_completed(futures):
|
|
try:
|
|
chunk_idx, result = future.result()
|
|
|
|
# Extract content from result dict
|
|
if result.get('success'):
|
|
summary = result.get('content', '')
|
|
else:
|
|
summary = f"[Error: {result.get('message', 'Unknown error')}]"
|
|
|
|
chunk_summaries.append((chunk_idx, summary))
|
|
completed_count += 1
|
|
|
|
logger.info(f"Batch {batch_id}: [Stage 1] Chunk {chunk_idx + 1}/{len(chunk_paths)} complete ({completed_count}/{len(chunk_paths)} total)")
|
|
|
|
# Log summary preview if configured
|
|
if self.log_summaries and isinstance(summary, str) and not summary.startswith('[Error'):
|
|
logger.debug(f"Batch {batch_id}: [Stage 1] Chunk {chunk_idx + 1} summary preview:\n{summary[:300]}...")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Batch {batch_id}: [Stage 1] Failed to process chunk: {e}")
|
|
chunk_summaries.append((len(chunk_summaries), f"[Error: {str(e)}]"))
|
|
|
|
else:
|
|
# Sequential processing
|
|
logger.info(f"Batch {batch_id}: [Stage 1] Using sequential processing")
|
|
for i, (chunk_path, metadata) in enumerate(zip(chunk_paths, chunk_metadata)):
|
|
logger.info(f"Batch {batch_id}: [Stage 1] Processing chunk {i+1}/{len(chunk_paths)} from {metadata['video_name']}")
|
|
|
|
summary_prompt = self._create_chunk_summary_prompt(
|
|
original_prompt=prompt,
|
|
chunk_number=i + 1,
|
|
total_chunks=len(chunk_paths),
|
|
video_name=metadata['video_name']
|
|
)
|
|
|
|
# Log prompt if configured
|
|
if self.log_prompts:
|
|
logger.debug(f"Batch {batch_id}: [Stage 1] Prompt for chunk {i+1}:\n{summary_prompt[:300]}...")
|
|
|
|
try:
|
|
chunk_start = time.time()
|
|
result = self.process_video(chunk_path, summary_prompt, user_email)
|
|
chunk_time = time.time() - chunk_start
|
|
summary = result.get('content', '')
|
|
chunk_summaries.append((i, summary))
|
|
|
|
logger.info(f"Batch {batch_id}: [Stage 1] Chunk {i + 1} complete: {len(summary)} chars in {chunk_time:.2f}s")
|
|
|
|
# Log summary preview if configured
|
|
if self.log_summaries:
|
|
logger.debug(f"Batch {batch_id}: [Stage 1] Chunk {i+1} summary preview:\n{summary[:300]}...")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Batch {batch_id}: [Stage 1] Failed to process chunk {i + 1}: {e}")
|
|
chunk_summaries.append((i, f"[Error: {str(e)}]"))
|
|
|
|
# Sort by chunk index
|
|
chunk_summaries.sort(key=lambda x: x[0])
|
|
summaries = [s[1] for s in chunk_summaries]
|
|
|
|
stage1_time = time.time() - stage1_start
|
|
logger.info(f"Batch {batch_id}: [Stage 1] Complete - {len(summaries)} summaries generated in {stage1_time:.2f}s")
|
|
|
|
# Log traceability
|
|
logger.info(f"Batch {batch_id}: [Traceability] Chunk-to-summary mapping:")
|
|
for i, metadata in enumerate(chunk_metadata):
|
|
chunk_info = f"chunk {metadata.get('chunk_idx', 0)+1}" if metadata.get('is_split') else "whole video"
|
|
logger.info(f"Batch {batch_id}: - Chunk {i+1}: {metadata['video_name']} ({chunk_info}) → Summary {i+1}")
|
|
|
|
# Stage 2: Synthesize all summaries
|
|
stage2_start = time.time()
|
|
logger.info(f"Batch {batch_id}: [Stage 2] Synthesizing {len(summaries)} summaries into final result")
|
|
|
|
final_content = self._synthesize_final_result(
|
|
summaries=summaries,
|
|
chunk_metadata=chunk_metadata,
|
|
original_prompt=prompt,
|
|
user_email=user_email
|
|
)
|
|
|
|
stage2_time = time.time() - stage2_start
|
|
total_time = stage1_time + stage2_time
|
|
|
|
# Log performance metrics
|
|
logger.info(f"Batch {batch_id}: [Metrics] Stage 1: {stage1_time:.2f}s, Stage 2: {stage2_time:.2f}s, Total: {total_time:.2f}s")
|
|
logger.info(f"Batch {batch_id}: [Metrics] Avg time per chunk: {stage1_time/len(chunk_paths):.2f}s")
|
|
logger.info(f"Batch {batch_id}: [Metrics] Total API calls: {len(chunk_paths) + 1}") # +1 for synthesis
|
|
|
|
return {
|
|
'content': final_content,
|
|
'total_chunks': len(chunk_paths),
|
|
'synthesis_stages': 2
|
|
}
|
|
|
|
def _create_chunk_summary_prompt(self, original_prompt: str, chunk_number: int,
|
|
total_chunks: int, video_name: str) -> str:
|
|
"""
|
|
Create a prompt that asks for a summary suitable for later synthesis.
|
|
"""
|
|
summary_prompt = f"""You are analyzing segment {chunk_number} of {total_chunks} from video "{video_name}".
|
|
|
|
Original user request:
|
|
{original_prompt}
|
|
|
|
Your task: Provide a CONCISE SUMMARY of this segment that captures:
|
|
1. Key information relevant to the user's request
|
|
2. Important details, facts, or insights
|
|
3. Any diagrams, charts, or structured data (preserve Mermaid syntax if applicable)
|
|
4. Chronological context if relevant
|
|
|
|
Keep the summary focused and information-dense. This summary will be combined with {total_chunks - 1} other summaries to create a final unified response.
|
|
|
|
Do NOT mention "this is segment X" or "this chunk contains". Just provide the factual content.
|
|
"""
|
|
return summary_prompt
|
|
|
|
def _detect_prompt_type(self, prompt: str, summaries: List[str]) -> str:
|
|
"""
|
|
Detect the type of prompt to apply specialized synthesis strategy.
|
|
|
|
Args:
|
|
prompt: Original user prompt
|
|
summaries: List of summaries (to check content)
|
|
|
|
Returns:
|
|
Prompt type: "meeting_summary", "documentation", "documentation_with_charts", or "generic"
|
|
"""
|
|
prompt_lower = prompt.lower()
|
|
|
|
# Check for meeting-related keywords
|
|
if any(keyword in prompt_lower for keyword in ["meeting", "discussion", "action item", "agenda"]):
|
|
return "meeting_summary"
|
|
|
|
# Check for documentation keywords
|
|
if any(keyword in prompt_lower for keyword in ["documentation", "process", "training", "knowledge base", "step by step"]):
|
|
# Check if it also includes charts/diagrams
|
|
if any(keyword in prompt_lower for keyword in ["diagram", "chart", "mermaid", "workflow"]):
|
|
return "documentation_with_charts"
|
|
return "documentation"
|
|
|
|
# Default to generic
|
|
return "generic"
|
|
|
|
def _synthesize_final_result(self, summaries: List[str], chunk_metadata: List[Dict],
|
|
original_prompt: str, user_email: str) -> str:
|
|
"""
|
|
Synthesize all chunk summaries into single cohesive result using Gemini.
|
|
Uses prompt type detection to apply specialized synthesis strategies.
|
|
"""
|
|
# Extract video names for context
|
|
video_names = list(set(m['video_name'] for m in chunk_metadata))
|
|
num_videos = len(video_names)
|
|
|
|
# Prepare summaries text
|
|
summaries_text = ""
|
|
total_summary_chars = 0
|
|
for i, summary in enumerate(summaries, 1):
|
|
video_name = chunk_metadata[i-1]['video_name']
|
|
summaries_text += f"\n\n--- Summary {i} (from {video_name}) ---\n{summary.strip()}\n"
|
|
total_summary_chars += len(summary)
|
|
|
|
logger.info(f"[Stage 2] Combined summaries: {len(summaries)} summaries, {total_summary_chars} total chars")
|
|
|
|
# Detect prompt type for specialized synthesis
|
|
prompt_type = self._detect_prompt_type(original_prompt, summaries)
|
|
logger.info(f"[Stage 2] Detected prompt type: {prompt_type}")
|
|
|
|
# Check for Mermaid diagrams
|
|
has_diagrams = any('```mermaid' in s for s in summaries)
|
|
|
|
# Create synthesis prompt based on type
|
|
if prompt_type == "meeting_summary":
|
|
synthesis_prompt = self._create_synthesis_prompt_meeting(
|
|
summaries_text, original_prompt, num_videos, video_names
|
|
)
|
|
elif prompt_type == "documentation":
|
|
synthesis_prompt = self._create_synthesis_prompt_documentation(
|
|
summaries_text, original_prompt, num_videos, video_names
|
|
)
|
|
elif has_diagrams:
|
|
synthesis_prompt = self._create_synthesis_prompt_with_diagrams(
|
|
summaries_text, original_prompt, num_videos, video_names
|
|
)
|
|
else:
|
|
synthesis_prompt = self._create_synthesis_prompt_generic(
|
|
summaries_text, original_prompt, num_videos, video_names
|
|
)
|
|
|
|
# Log synthesis prompt if configured
|
|
if self.log_prompts:
|
|
logger.debug(f"[Stage 2] Synthesis prompt preview:\n{synthesis_prompt[:500]}...")
|
|
|
|
# Send to Gemini for final synthesis
|
|
logger.info(f"[Stage 2] Sending synthesis request to Gemini API (model: {self.synthesis_model})")
|
|
|
|
with self._rate_limit_lock:
|
|
time.sleep(2)
|
|
|
|
synthesis_start = time.time()
|
|
try:
|
|
response = self.client.models.generate_content(
|
|
model=self.synthesis_model,
|
|
contents=[{"text": synthesis_prompt}]
|
|
)
|
|
|
|
synthesis_time = time.time() - synthesis_start
|
|
|
|
synthesized_content = ""
|
|
if response.parts:
|
|
for part in response.parts:
|
|
if hasattr(part, 'text'):
|
|
synthesized_content += part.text
|
|
|
|
if not synthesized_content:
|
|
logger.warning("[Stage 2] Synthesis returned empty, falling back to concatenation")
|
|
return self._fallback_concatenation(summaries, chunk_metadata)
|
|
|
|
logger.info(f"[Stage 2] Synthesis complete: {len(synthesized_content)} chars in {synthesis_time:.2f}s")
|
|
|
|
# Log synthesis result preview if configured
|
|
if self.log_summaries:
|
|
logger.debug(f"[Stage 2] Synthesized result preview:\n{synthesized_content[:500]}...")
|
|
|
|
return synthesized_content
|
|
|
|
except Exception as e:
|
|
logger.error(f"[Stage 2] Synthesis failed: {str(e)}, using fallback")
|
|
return self._fallback_concatenation(summaries, chunk_metadata)
|
|
|
|
def _create_synthesis_prompt_generic(self, summaries_text: str, original_prompt: str,
|
|
num_videos: int, video_names: List[str]) -> str:
|
|
"""
|
|
Generic synthesis prompt for all content types.
|
|
"""
|
|
if num_videos > 1:
|
|
video_context = f"{num_videos} videos: {', '.join(video_names)}"
|
|
else:
|
|
video_context = f"one video: {video_names[0]}"
|
|
|
|
prompt = f"""You are creating a FINAL UNIFIED RESPONSE by synthesizing multiple segment summaries.
|
|
|
|
Context:
|
|
- Source: {video_context}
|
|
- The video(s) were split into segments for processing
|
|
- Below are summaries from each segment
|
|
|
|
Original user request:
|
|
"{original_prompt}"
|
|
|
|
Segment summaries:
|
|
{summaries_text}
|
|
|
|
Your task: Create ONE cohesive, unified response that:
|
|
|
|
1. FULFILLS the original user request completely
|
|
2. INTEGRATES information from all segments naturally
|
|
3. DOES NOT mention segments, chunks, or parts
|
|
4. MAINTAINS any requested format (lists, tables, structure)
|
|
5. CONSOLIDATES duplicate information
|
|
6. PRESERVES chronological flow if relevant
|
|
7. APPEARS as if analyzing the complete video in one pass
|
|
|
|
Quality requirements:
|
|
- No phrases like "In segment 1", "The first part", "Chunk 2 discusses"
|
|
- Natural transitions between topics
|
|
- Unified narrative or structure
|
|
- Professional, coherent final product
|
|
|
|
Begin your unified response:
|
|
"""
|
|
return prompt
|
|
|
|
def _create_synthesis_prompt_meeting(self, summaries_text: str, original_prompt: str,
|
|
num_videos: int, video_names: List[str]) -> str:
|
|
"""
|
|
Specialized synthesis prompt for meeting summaries.
|
|
"""
|
|
if num_videos > 1:
|
|
video_context = f"{num_videos} videos: {', '.join(video_names)}"
|
|
else:
|
|
video_context = f"one video: {video_names[0]}"
|
|
|
|
prompt = f"""You are creating a FINAL UNIFIED MEETING SUMMARY by synthesizing multiple segment summaries.
|
|
|
|
Context:
|
|
- Source: {video_context}
|
|
- The video(s) were split into segments for processing
|
|
- Below are summaries from each segment
|
|
|
|
Original user request:
|
|
"{original_prompt}"
|
|
|
|
Segment summaries:
|
|
{summaries_text}
|
|
|
|
Your task: Create ONE cohesive meeting summary that:
|
|
|
|
1. MEETING OVERVIEW: Provide a high-level summary of the meeting
|
|
2. DISCUSSION POINTS: Consolidate all discussion topics into logical sections
|
|
- Group related discussions together
|
|
- Maintain chronological flow where relevant
|
|
- Capture key decisions made
|
|
3. ACTION ITEMS: Create a MASTER LIST of all action items
|
|
- Format: "Action item - Owner (if mentioned) - Due date (if mentioned)"
|
|
- Consolidate duplicates
|
|
- Remove redundant items
|
|
4. KEY OUTCOMES: Summarize main conclusions and next steps
|
|
|
|
Quality requirements:
|
|
- Professional meeting summary format
|
|
- No phrases like "In segment 1", "The first part", "Chunk 2 discusses"
|
|
- Natural transitions between topics
|
|
- One unified document that reads as if from single analysis
|
|
- Clear, actionable items with owners where possible
|
|
|
|
Begin your unified meeting summary:
|
|
"""
|
|
return prompt
|
|
|
|
def _create_synthesis_prompt_documentation(self, summaries_text: str, original_prompt: str,
|
|
num_videos: int, video_names: List[str]) -> str:
|
|
"""
|
|
Specialized synthesis prompt for process documentation.
|
|
"""
|
|
if num_videos > 1:
|
|
video_context = f"{num_videos} videos: {', '.join(video_names)}"
|
|
else:
|
|
video_context = f"one video: {video_names[0]}"
|
|
|
|
prompt = f"""You are creating FINAL UNIFIED PROCESS DOCUMENTATION by synthesizing multiple segment summaries.
|
|
|
|
Context:
|
|
- Source: {video_context}
|
|
- The video(s) were split into segments for processing
|
|
- Below are summaries from each segment
|
|
|
|
Original user request:
|
|
"{original_prompt}"
|
|
|
|
Segment summaries:
|
|
{summaries_text}
|
|
|
|
Your task: Create ONE comprehensive process documentation that:
|
|
|
|
1. OVERVIEW: Provide a high-level description of the process
|
|
2. PREREQUISITES: List any requirements or setup needed (if mentioned)
|
|
3. STEP-BY-STEP INSTRUCTIONS: Combine all steps into one sequential guide
|
|
- Number steps sequentially (Step 1, Step 2, etc.)
|
|
- Include sub-steps where appropriate
|
|
- Be clear and detailed for someone new to the process
|
|
4. TIPS & BEST PRACTICES: Consolidate helpful tips
|
|
5. TROUBLESHOOTING: Include common issues and solutions (if mentioned)
|
|
|
|
Quality requirements:
|
|
- Clear, sequential flow from start to finish
|
|
- No phrases like "In segment 1", "The first part", "Chunk 2 shows"
|
|
- Professional documentation format
|
|
- Easy to follow for training or reference
|
|
- One unified guide that reads naturally
|
|
|
|
Begin your unified process documentation:
|
|
"""
|
|
return prompt
|
|
|
|
def _create_synthesis_prompt_with_diagrams(self, summaries_text: str, original_prompt: str,
|
|
num_videos: int, video_names: List[str]) -> str:
|
|
"""
|
|
Synthesis prompt specifically for content with Mermaid diagrams.
|
|
"""
|
|
prompt = f"""You are creating a FINAL UNIFIED RESPONSE by synthesizing multiple segment summaries that contain Mermaid diagrams.
|
|
|
|
Original user request:
|
|
"{original_prompt}"
|
|
|
|
Segment summaries (containing diagrams):
|
|
{summaries_text}
|
|
|
|
Your task: Create ONE unified response with a SINGLE MERGED DIAGRAM.
|
|
|
|
Requirements:
|
|
1. MERGE all Mermaid diagrams into ONE comprehensive diagram
|
|
2. Combine nodes, relationships, and flows intelligently
|
|
3. Remove duplicate nodes/edges
|
|
4. Maintain logical structure and connections
|
|
5. Synthesize accompanying text naturally
|
|
6. DO NOT mention segments or parts
|
|
|
|
Diagram merging strategy:
|
|
- If multiple flowcharts: combine into single flowchart with logical flow
|
|
- If multiple architecture diagrams: create unified architecture
|
|
- If sequential diagrams: show complete sequence
|
|
- Use clear labels and grouping where appropriate
|
|
|
|
Output format:
|
|
```mermaid
|
|
[Your merged diagram here]
|
|
```
|
|
|
|
[Unified explanatory text here]
|
|
|
|
Begin your unified response with merged diagram:
|
|
"""
|
|
return prompt
|
|
|
|
def _fallback_concatenation(self, summaries: List[str], chunk_metadata: List[Dict]) -> str:
|
|
"""
|
|
Fallback method when AI synthesis fails.
|
|
Simple concatenation with section markers.
|
|
"""
|
|
logger.info("Using fallback concatenation method")
|
|
|
|
result = "# Combined Analysis\n\n"
|
|
result += "*Note: This is a combined analysis from multiple video segments.*\n\n"
|
|
|
|
for i, summary in enumerate(summaries, 1):
|
|
video_name = chunk_metadata[i-1]['video_name']
|
|
result += f"## Segment {i} - {video_name}\n\n"
|
|
result += summary.strip() + "\n\n"
|
|
|
|
return result
|