video-query/backend/video_processor.py

from google import genai
import mimetypes
import time
import os
import logging
import requests
import json
import datetime
import base64
from typing import Dict, Any, Optional, List, Tuple
from dotenv import load_dotenv
from video_splitter import VideoSplitter
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading

# Load environment variables from .env file
load_dotenv()

logger = logging.getLogger('video_query')

class VideoProcessor:
    """
    Class to handle video uploads and processing with Gemini API.
    """
    # Default prompts for different modes
    PROMPTS = {
        "meeting_summary": "Generate a detailed summary of the meeting in the attached video recording, including discussion points and action items with owners",
        "process_documentation": "Generate detailed process documentation suitable for reference or training based on the process illustrated in the attached video recording. Write the documentation so that a new user will be able to follow step by step and accomplish the task illustrated in the video",
        "documentation_with_charts": "Analyze this video to create comprehensive process documentation with workflow diagrams for a knowledge base article. Follow these requirements exactly:\n\n1. CONTENT REQUIREMENTS:\n   - Provide a detailed step-by-step explanation of the process shown\n   - Be extremely verbose and thorough - include all relevant details, context, and nuances\n   - Structure as a complete knowledge base article with clear sections\n   - Include overview, detailed steps, tips, and troubleshooting where applicable\n\n2. MERMAID DIAGRAM REQUIREMENTS:\n   - Create workflow diagrams using valid Mermaid syntax where helpful\n   - CRITICAL: Use only simple alphanumeric text in node descriptions and labels\n   - CRITICAL: No special characters like quotes brackets colons semicolons or symbols in node text\n   - CRITICAL: Use underscores instead of spaces in node IDs and labels\n   - CRITICAL: Keep all text simple to avoid syntax errors\n   - Example format: Start_Process --> Complete_Task --> End_Process\n   - Use flowchart format: graph TD or graph LR\n\n3. OUTPUT STRUCTURE:\n   - Title and overview section\n   - Prerequisites section if applicable\n   - Detailed step-by-step process\n   - Mermaid workflow diagram(s) showing the process flow\n   - Tips and best practices\n   - Troubleshooting common issues\n\nEnsure all Mermaid diagrams use simple text without special characters to prevent parsing errors.",
        "custom": ""  # Custom prompt will be provided by the user
    }

    # Maximum video duration in minutes (Gemini limitation)
    MAX_VIDEO_DURATION = 55

    # Threshold for chunked upload (10MB)
    CHUNKED_UPLOAD_THRESHOLD = 10 * 1024 * 1024

    # Webhook URL for tracking usage
    WEBHOOK_URL = "https://hook.us1.make.celonis.com/8ri1h8b2he4wudp2jku69mgcxumzxf3v"

    # Parallel processing configuration
    # Default max workers for parallel chunk processing
    # Free tier: 5 RPM (use 3-4 workers to be safe)
    # Paid tier: 150 RPM (can use more workers)
    DEFAULT_MAX_WORKERS = 4  # Conservative default for free tier

    # Model configuration
    DEFAULT_PROCESSING_MODEL = "gemini-2.5-pro"  # Model for individual video processing
    DEFAULT_SYNTHESIS_MODEL = "gemini-2.5-pro"   # Model for batch synthesis (updated for consistency)

    def __init__(self, api_key: Optional[str] = None, max_parallel_chunks: int = None):
        """
        Initialize with API key from environment variable or direct setting

        Args:
            api_key: Google API key for Gemini
            max_parallel_chunks: Maximum number of chunks to process in parallel
                                (default: 4, recommended 3-4 for free tier, up to 10+ for paid tier)
        """
        self.api_key = api_key or os.getenv("GOOGLE_API_KEY")
        if not self.api_key:
            logger.error("API key not provided")
            raise ValueError("API key not provided - set GOOGLE_API_KEY environment variable or pass when initializing")

        # Initialize the Gemini client
        logger.info("Initializing Gemini API client")
        self.client = genai.Client(api_key=self.api_key)
        logger.info("Gemini API client initialized successfully")

        # Set parallel processing configuration
        self.max_parallel_chunks = max_parallel_chunks or self.DEFAULT_MAX_WORKERS
        logger.info(f"Parallel processing enabled with max {self.max_parallel_chunks} concurrent chunks")

        # Initialize video splitter
        self.video_splitter = VideoSplitter()

        # Thread lock for rate limiting
        self._rate_limit_lock = threading.Lock()

        # Load configuration from environment variables
        self.processing_model = os.getenv("VIDEO_PROCESSOR_MODEL", self.DEFAULT_PROCESSING_MODEL)
        self.synthesis_model = os.getenv("VIDEO_SYNTHESIS_MODEL", self.DEFAULT_SYNTHESIS_MODEL)
        self.log_prompts = os.getenv("BATCH_PROCESSING_LOG_PROMPTS", "false").lower() == "true"
        self.log_summaries = os.getenv("BATCH_PROCESSING_LOG_SUMMARIES", "false").lower() == "true"

        logger.info(f"Configuration: processing_model={self.processing_model}, synthesis_model={self.synthesis_model}")
        logger.info(f"Logging: prompts={self.log_prompts}, summaries={self.log_summaries}")

    def send_usage_webhook(self, user_email: str, prompt: str) -> None:
        """
        Send usage data to webhook for tracking purposes

        Args:
            user_email: Email of the user who processed the video
            prompt: The prompt used for processing
        """
        try:
            current_datetime = datetime.datetime.now().isoformat()

            webhook_data = {
                "tool": "VIDEOQUERY",
                "date": current_datetime,
                "user": user_email,
                "model": "GEMINI",
                "settings": "no settings",
                "subTool": "no subTool",
                "prompt": prompt,
                "negativePrompt": "no NEGATIVE_PROMPT",
                "image": "no image"
            }

            logger.info(f"Sending usage data to webhook for user: {user_email}")

            response = requests.post(
                self.WEBHOOK_URL,
                headers={"Content-Type": "application/json"},
                data=json.dumps(webhook_data),
                timeout=10  # 10 second timeout
            )

            if response.status_code == 200:
                logger.info("Successfully sent usage data to webhook")
            else:
                logger.warning(f"Webhook request failed with status code: {response.status_code}")
                logger.warning(f"Response: {response.text}")

        except Exception as e:
            logger.error(f"Error sending usage data to webhook: {str(e)}")
            # Don't raise the exception - webhook failure shouldn't block the main flow

    def process_video(self, video_path: str, prompt: str, user_email: str = "anonymous") -> Dict[str, Any]:
        """
        Process a video with the given prompt using Gemini API

        Args:
            video_path: Path to the video file
            prompt: Text prompt to use for video analysis
            user_email: Email of the user processing the video (for usage tracking)

        Returns:
            Dictionary with processing result or error
        """
        start_time = time.time()

        result = {
            "success": False,
            "message": "",
            "content": "",
            "processing_time_seconds": 0
        }

        logger.info(f"Processing video: {video_path}")
        logger.info(f"Prompt: {prompt[:100]}..." if len(prompt) > 100 else f"Prompt: {prompt}")

        if not os.path.exists(video_path):
            error_msg = f"Video file not found at '{video_path}'"
            logger.error(error_msg)
            result["message"] = error_msg
            return result

        try:
            # Get file size
            file_size = os.path.getsize(video_path)
            file_size_mb = file_size / (1024 * 1024)
            logger.info(f"File size: {file_size_mb:.2f} MB")

            # Get video duration to detect too-short videos
            try:
                video_duration = self.video_splitter.get_video_duration(video_path)
                if video_duration:
                    logger.info(f"Video duration: {video_duration:.2f} seconds ({video_duration/60:.2f} minutes)")

                    # Check if video is too short (< 1 second can cause INVALID_ARGUMENT)
                    if video_duration < 1.0:
                        error_msg = f"Video too short: {video_duration:.2f}s. Gemini requires videos >= 1 second"
                        logger.error(error_msg)
                        result["message"] = error_msg
                        return result
                else:
                    logger.warning("Could not determine video duration")
            except Exception as dur_err:
                logger.warning(f"Error checking video duration: {str(dur_err)}")

            # Determine MIME type for the video
            mime_type, _ = mimetypes.guess_type(video_path)
            if not mime_type:
                logger.info(f"Could not determine MIME type, using default: video/mp4")
                mime_type = "video/mp4"  # Fallback
            else:
                logger.info(f"MIME type: {mime_type}")

            # Validate MIME type is video
            if not mime_type.startswith('video/'):
                error_msg = f"Invalid file type: {mime_type}. Only video files are supported (mp4, avi, mov, mkv, webm, etc.)"
                logger.error(error_msg)
                result["message"] = error_msg
                return result

            # Validate video file is readable and not corrupted
            # This provides a secondary check in case validation at upload didn't happen
            try:
                import subprocess
                probe_result = subprocess.run(
                    ['ffprobe', '-v', 'error', '-show_entries', 'format=duration,format_name',
                     '-of', 'default=noprint_wrappers=1', video_path],
                    capture_output=True, text=True, timeout=10
                )
                if probe_result.returncode != 0:
                    error_detail = probe_result.stderr.strip()

                    # Provide user-friendly error messages for common issues
                    if "moov atom not found" in error_detail.lower():
                        error_msg = "Video file is incomplete or corrupted (missing header). The file may not have uploaded completely. Please try uploading again."
                    elif "invalid data found" in error_detail.lower():
                        error_msg = "Video file contains invalid data and cannot be processed. Please check the file or try re-encoding it."
                    else:
                        error_msg = f"Video file appears to be corrupted or unreadable. Technical details: {error_detail}"

                    logger.error(f"Video validation failed: {error_msg}")
                    result["message"] = error_msg
                    return result
                logger.info(f"Video file validation successful. Format info: {probe_result.stdout[:100]}")
            except subprocess.TimeoutExpired:
                logger.warning("Video validation timed out after 10s - proceeding anyway")
            except FileNotFoundError:
                logger.warning("ffprobe not found - skipping video file validation")
            except Exception as val_err:
                logger.warning(f"Could not validate video file: {str(val_err)} - proceeding anyway")

            # Use different approach based on file size
            # Small files (< 10MB): use inline base64 data (faster, no upload wait)
            # Large files (>= 10MB): use file upload API (handles larger files)
            # Note: Base64 adds ~37% overhead, so 10MB file = ~13.7MB base64
            SIZE_THRESHOLD_MB = 10
            uploaded_file = None

            if file_size_mb < SIZE_THRESHOLD_MB:
                # Small file: Use base64 encoding for inline data
                logger.info(f"File < {SIZE_THRESHOLD_MB}MB, using inline base64 data")
                with open(video_path, "rb") as video_file_obj:
                    video_data = video_file_obj.read()
                    video_base64 = base64.b64encode(video_data).decode('utf-8')

                logger.info(f"Base64 encoding complete. Size: {len(video_base64)} characters")

                # Create the content parts using inline data
                prompt_parts = [
                    {"text": prompt},
                    {"inline_data": {
                        "mime_type": mime_type,
                        "data": video_base64
                    }}
                ]
            else:
                # Large file: Use file upload API
                logger.info(f"File >= {SIZE_THRESHOLD_MB}MB, using file upload API")
                upload_start = time.time()

                uploaded_file = self.client.files.upload(
                    file=video_path
                )
                logger.info(f"Upload complete in {time.time() - upload_start:.1f}s. File URI: {uploaded_file.uri}")
                logger.info(f"Initial file state: {uploaded_file.state}")

                # Wait for file to be processed with timeout
                max_wait_time = 300  # 5 minutes timeout
                wait_start = time.time()
                check_count = 0

                while uploaded_file.state == "PROCESSING":
                    check_count += 1
                    elapsed = time.time() - wait_start

                    if elapsed > max_wait_time:
                        error_msg = f"File processing timeout after {max_wait_time}s. File may be too large or corrupted."
                        logger.error(error_msg)
                        # Try to delete the file
                        try:
                            self.client.files.delete(name=uploaded_file.name)
                        except:
                            pass
                        result["message"] = error_msg
                        return result

                    logger.info(f"File is still processing (check #{check_count}, {elapsed:.0f}s elapsed), waiting...")
                    time.sleep(3)

                    try:
                        uploaded_file = self.client.files.get(name=uploaded_file.name)
                        logger.info(f"Updated file state: {uploaded_file.state}")
                    except Exception as status_err:
                        logger.error(f"Error checking file status: {str(status_err)}")
                        # Wait a bit longer and try again
                        time.sleep(5)
                        try:
                            uploaded_file = self.client.files.get(name=uploaded_file.name)
                        except Exception as retry_err:
                            error_msg = f"Failed to check file upload status: {str(retry_err)}"
                            logger.error(error_msg)
                            result["message"] = error_msg
                            return result

                if uploaded_file.state != "ACTIVE":
                    error_msg = f"File upload failed. State: {uploaded_file.state}"
                    logger.error(error_msg)
                    logger.error("This may indicate the video is corrupted, unsupported format, or contains invalid data")
                    result["message"] = error_msg
                    return result

                logger.info("File is ACTIVE and ready for processing")

                # Create content parts using file reference
                prompt_parts = [
                    {"text": prompt},
                    {"file_data": {
                        "file_uri": uploaded_file.uri,
                        "mime_type": mime_type
                    }}
                ]

            # Rate limiting: Wait to avoid hitting API limits
            # Free tier: 5 RPM, so minimum 12 seconds between requests
            with self._rate_limit_lock:
                time.sleep(2)  # 2 second delay between API calls

            # Use the client to generate content with the new SDK API
            logger.info("Sending prompt to Gemini for processing...")
            api_start = time.time()

            # Add retry logic for network failures
            max_retries = 3
            retry_delay = 5  # seconds
            last_exception = None

            for attempt in range(max_retries):
                try:
                    response = self.client.models.generate_content(
                        model=self.processing_model,
                        contents=prompt_parts
                    )
                    # If successful, break out of retry loop
                    break
                except Exception as e:
                    last_exception = e
                    error_str = str(e).lower()

                    # Log detailed error information for INVALID_ARGUMENT
                    if 'invalid_argument' in error_str or '400' in error_str:
                        logger.error("=" * 80)
                        logger.error("INVALID_ARGUMENT ERROR DETAILS:")
                        logger.error(f"  Video path: {video_path}")
                        logger.error(f"  File size: {file_size_mb:.2f} MB")
                        logger.error(f"  MIME type: {mime_type}")
                        if 'video_duration' in locals():
                            logger.error(f"  Duration: {video_duration:.2f}s ({video_duration/60:.2f} min)")
                        logger.error(f"  Prompt length: {len(prompt)} characters")
                        logger.error(f"  Upload method: {'File Upload API' if uploaded_file else 'Inline Base64'}")
                        if uploaded_file:
                            logger.error(f"  File state: {uploaded_file.state}")
                            logger.error(f"  File URI: {uploaded_file.uri}")
                        logger.error(f"  Error message: {str(e)}")
                        logger.error("=" * 80)

                    # Check if it's a retryable network error
                    if any(err in error_str for err in ['name resolution', 'connection', 'timeout', 'network']):
                        if attempt < max_retries - 1:
                            logger.warning(f"Network error on attempt {attempt + 1}/{max_retries}: {str(e)}")
                            logger.info(f"Retrying in {retry_delay} seconds...")
                            time.sleep(retry_delay)
                            continue
                        else:
                            logger.error(f"All {max_retries} attempts failed with network errors")
                            raise
                    else:
                        # Non-retryable error, raise immediately
                        logger.error(f"Non-retryable error: {str(e)}")
                        raise

            api_time = time.time() - api_start
            logger.info(f"Received response from Gemini (API call took {api_time:.1f}s)")

            # Extract the response content
            content = ""
            if response.parts:
                logger.info(f"Response has {len(response.parts)} parts")
                for i, part in enumerate(response.parts):
                    if hasattr(part, 'text'):
                        part_text = part.text
                        content_preview = part_text[:100] + '...' if len(part_text) > 100 else part_text
                        logger.info(f"Part {i} (text): {content_preview}")
                        content += part_text
                    else:
                        logger.info(f"Part {i} (no text): {type(part)}")
            else:
                logger.warning("No parts in response")
                if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
                    logger.warning(f"Prompt feedback: {response.prompt_feedback}")

            # Set success result
            result["success"] = True
            result["content"] = content
            result["processing_time_seconds"] = round(time.time() - start_time, 2)
            logger.info(f"Processed result with {len(content)} characters")
            logger.info(f"Total processing time: {result['processing_time_seconds']}s")

            # Send usage data to webhook for tracking
            self.send_usage_webhook(user_email, prompt)

            # Clean up uploaded file if it was used
            if uploaded_file:
                try:
                    logger.info(f"Deleting uploaded file: {uploaded_file.name}")
                    self.client.files.delete(name=uploaded_file.name)
                    logger.info("File deleted successfully from Gemini storage")
                except Exception as del_err:
                    logger.warning(f"Could not delete file from Gemini storage: {str(del_err)}")

            return result

        except Exception as e:
            import traceback
            error_details = traceback.format_exc()
            logger.error(f"Error processing video: {str(e)}")
            logger.error(error_details)
            result["message"] = f"Error processing video: {str(e)}"
            result["error_details"] = error_details
            return result

    def combine_chunk_responses(self, responses: List[str], prompt: str,
                                num_chunks: int) -> str:
        """
        Intelligently combine responses from multiple video chunks.

        Args:
            responses: List of response texts from each chunk
            prompt: Original prompt used for processing
            num_chunks: Total number of chunks processed

        Returns:
            Combined response text
        """
        logger.info(f"Combining {len(responses)} chunk responses")

        # Detect the prompt type to determine combination strategy
        prompt_lower = prompt.lower()
        is_meeting_summary = "meeting" in prompt_lower or "summary" in prompt_lower
        is_documentation = "documentation" in prompt_lower or "process" in prompt_lower
        is_with_charts = "mermaid" in prompt_lower or "diagram" in prompt_lower or "chart" in prompt_lower

        if is_with_charts:
            return self._combine_with_charts(responses, num_chunks)
        elif is_meeting_summary:
            return self._combine_meeting_summary(responses, num_chunks)
        elif is_documentation:
            return self._combine_documentation(responses, num_chunks)
        else:
            return self._combine_generic(responses, num_chunks)

    def _combine_generic(self, responses: List[str], num_chunks: int) -> str:
        """Generic combination: simple sequential joining with section headers."""
        logger.info("Using generic combination strategy")
        combined = []

        combined.append(f"# Complete Video Analysis\n")
        combined.append(f"*This video was processed in {num_chunks} parts due to its length.*\n")

        for i, response in enumerate(responses, 1):
            combined.append(f"\n## Part {i} of {num_chunks}\n")
            combined.append(response.strip())

        return "\n".join(combined)

    def _combine_meeting_summary(self, responses: List[str], num_chunks: int) -> str:
        """Combination strategy optimized for meeting summaries."""
        logger.info("Using meeting summary combination strategy")

        # First, try to synthesize the segments into a unified summary
        try:
            logger.info("Attempting to synthesize segments into unified meeting summary")
            synthesized = self._synthesize_meeting_segments(responses, num_chunks)
            if synthesized:
                return synthesized
            else:
                logger.warning("Synthesis failed, falling back to segment concatenation")
        except Exception as e:
            logger.warning(f"Error during synthesis: {e}, falling back to segment concatenation")

        # Fallback: simple concatenation with formatting
        combined = []

        combined.append(f"# Complete Meeting Recording Summary\n")
        combined.append(f"*This recording was analyzed in {num_chunks} segments.*\n")
        combined.append(f"\n---\n")

        # Combine all discussion points with clear time markers
        for i, response in enumerate(responses, 1):
            time_range = self._format_time_range(i, num_chunks)
            combined.append(f"\n## Segment {i}: {time_range}\n")
            combined.append(response.strip())
            combined.append(f"\n---\n")

        # Add consolidated note
        combined.append(f"\n### Notes")
        combined.append(f"- Review all segments above for discussion points and action items")
        combined.append(f"- Total recording duration: ~{num_chunks * 50} minutes")
        combined.append(f"- Recording was split into {num_chunks} segments for analysis")

        return "\n".join(combined)

    def _synthesize_meeting_segments(self, responses: List[str], num_chunks: int) -> Optional[str]:
        """
        Use AI to synthesize multiple segment summaries into one unified meeting summary.

        Args:
            responses: List of segment summaries
            num_chunks: Number of segments

        Returns:
            Unified meeting summary or None if synthesis fails
        """
        try:
            # Prepare the segments for synthesis
            segments_text = ""
            for i, response in enumerate(responses, 1):
                time_range = self._format_time_range(i, num_chunks)
                segments_text += f"\n\n### Segment {i} ({time_range}):\n{response.strip()}\n"

            # Create synthesis prompt
            synthesis_prompt = f"""You are analyzing a meeting recording that was split into {num_chunks} segments due to its length. Below are the summaries from each segment. Your task is to create ONE unified, comprehensive meeting summary that integrates all the information.

SEGMENT SUMMARIES:
{segments_text}

Please provide a SINGLE, UNIFIED meeting summary that:
1. Combines all discussion points into one cohesive narrative (not separated by segments)
2. Consolidates all action items into one master list (removing duplicates if any)
3. Identifies main themes and outcomes across the entire meeting
4. Maintains chronological flow where relevant
5. Uses clear sections: Meeting Summary, Discussion Points, Action Items (with owners)

Format the output as a professional meeting summary document. Do not reference the segments in your output - write as if this was analyzed as one continuous meeting."""

            logger.info("Sending synthesis request to Gemini")

            # Add retry logic for network failures
            max_retries = 3
            retry_delay = 5

            for attempt in range(max_retries):
                try:
                    synthesis_response = self.client.models.generate_content(
                        model=self.synthesis_model,
                        contents=synthesis_prompt
                    )
                    break
                except Exception as e:
                    error_str = str(e).lower()
                    if any(err in error_str for err in ['name resolution', 'connection', 'timeout', 'network']):
                        if attempt < max_retries - 1:
                            logger.warning(f"Network error during synthesis (attempt {attempt + 1}/{max_retries}): {str(e)}")
                            logger.info(f"Retrying in {retry_delay} seconds...")
                            time.sleep(retry_delay)
                            continue
                        else:
                            logger.error(f"Synthesis failed after {max_retries} attempts")
                            raise
                    else:
                        raise

            if synthesis_response.parts:
                synthesized_content = ""
                for part in synthesis_response.parts:
                    if hasattr(part, 'text'):
                        synthesized_content += part.text

                if synthesized_content:
                    logger.info("Successfully synthesized unified meeting summary")
                    # Add header noting this was synthesized
                    final_output = "# Meeting Summary\n\n"
                    final_output += f"*Synthesized from {num_chunks}-segment analysis*\n\n"
                    final_output += "---\n\n"
                    final_output += synthesized_content
                    return final_output

            logger.warning("No content in synthesis response")
            return None

        except Exception as e:
            logger.error(f"Error during synthesis: {str(e)}")
            return None

    def _combine_documentation(self, responses: List[str], num_chunks: int) -> str:
        """Combination strategy optimized for process documentation."""
        logger.info("Using documentation combination strategy")
        combined = []

        combined.append(f"# Complete Process Documentation\n")
        combined.append(f"*This process was documented from a {num_chunks}-part video recording.*\n")

        combined.append(f"\n## Overview\n")
        combined.append(f"This documentation covers the complete process shown in the video. "
                       f"The content has been organized sequentially across all segments.\n")

        for i, response in enumerate(responses, 1):
            combined.append(f"\n## Section {i}: {self._format_time_range(i, num_chunks)}\n")
            combined.append(response.strip())

        combined.append(f"\n\n---\n*End of documentation*")

        return "\n".join(combined)

    def _combine_with_charts(self, responses: List[str], num_chunks: int) -> str:
        """Combination strategy for documentation with Mermaid diagrams."""
        logger.info("Using documentation with charts combination strategy")
        combined = []

        combined.append(f"# Complete Process Documentation with Workflow Diagrams\n")
        combined.append(f"*This analysis spans {num_chunks} video segments.*\n")

        # First, add all text content
        combined.append(f"\n## Overview and Detailed Steps\n")

        for i, response in enumerate(responses, 1):
            combined.append(f"\n### Part {i}: {self._format_time_range(i, num_chunks)}\n")

            # Separate mermaid diagrams from text content
            parts = response.split("```mermaid")
            text_part = parts[0].strip()
            combined.append(text_part)

            # Add mermaid diagrams in a dedicated section
            if len(parts) > 1:
                for j, diagram_part in enumerate(parts[1:], 1):
                    if "```" in diagram_part:
                        diagram_code = diagram_part.split("```")[0]
                        combined.append(f"\n**Workflow Diagram {i}.{j}:**\n")
                        combined.append(f"```mermaid{diagram_code}```\n")

                        # Add any remaining text after the diagram
                        remaining_text = "```".join(diagram_part.split("```")[1:]).strip()
                        if remaining_text:
                            combined.append(remaining_text)

        combined.append(f"\n\n---\n*Complete documentation generated from {num_chunks}-part video analysis*")

        return "\n".join(combined)

    def _format_time_range(self, part_num: int, total_parts: int,
                          chunk_duration: int = 50) -> str:
        """Format time range for a video part."""
        start_min = (part_num - 1) * chunk_duration
        end_min = part_num * chunk_duration if part_num < total_parts else "End"

        if isinstance(end_min, int):
            return f"{start_min}-{end_min} minutes"
        else:
            return f"{start_min}+ minutes"

    def _process_single_chunk(self, chunk_info: Tuple[int, str, str, int, str]) -> Tuple[int, Dict[str, Any]]:
        """
        Process a single video chunk. Used for parallel processing.

        Args:
            chunk_info: Tuple of (chunk_index, chunk_path, chunk_prompt, total_chunks, user_email)

        Returns:
            Tuple of (chunk_index, result_dict)
        """
        chunk_index, chunk_path, chunk_prompt, total_chunks, user_email = chunk_info

        logger.info(f"[Parallel] Processing chunk {chunk_index + 1}/{total_chunks}: {chunk_path}")

        try:
            chunk_result = self.process_video(chunk_path, chunk_prompt, user_email)
            logger.info(f"[Parallel] Completed chunk {chunk_index + 1}/{total_chunks}")
            return (chunk_index, chunk_result)
        except Exception as e:
            logger.error(f"[Parallel] Error processing chunk {chunk_index + 1}/{total_chunks}: {str(e)}")
            return (chunk_index, {
                "success": False,
                "message": f"Error processing chunk {chunk_index + 1}: {str(e)}",
                "content": ""
            })

    def _process_chunks_parallel(self, chunk_paths: List[str], prompt: str,
                                 user_email: str) -> List[Dict[str, Any]]:
        """
        Process multiple video chunks in parallel using ThreadPoolExecutor.

        Args:
            chunk_paths: List of paths to video chunk files
            prompt: Original prompt for video analysis
            user_email: User email for tracking

        Returns:
            List of result dictionaries in order of chunks
        """
        num_chunks = len(chunk_paths)
        logger.info(f"Starting parallel processing of {num_chunks} chunks with {self.max_parallel_chunks} workers")

        # Prepare chunk information for parallel processing
        chunk_infos = []
        for i, chunk_path in enumerate(chunk_paths):
            chunk_prompt = self._create_chunk_prompt(prompt, i + 1, num_chunks)
            chunk_infos.append((i, chunk_path, chunk_prompt, num_chunks, user_email))

        # Process chunks in parallel
        results = [None] * num_chunks  # Pre-allocate results list to maintain order

        with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor:
            # Submit all chunks for processing
            future_to_chunk = {
                executor.submit(self._process_single_chunk, chunk_info): chunk_info[0]
                for chunk_info in chunk_infos
            }

            # Collect results as they complete
            completed = 0
            for future in as_completed(future_to_chunk):
                chunk_index = future_to_chunk[future]
                try:
                    chunk_index, result = future.result()
                    results[chunk_index] = result
                    completed += 1
                    logger.info(f"[Parallel] Progress: {completed}/{num_chunks} chunks completed")
                except Exception as e:
                    logger.error(f"[Parallel] Unexpected error for chunk {chunk_index + 1}: {str(e)}")
                    results[chunk_index] = {
                        "success": False,
                        "message": f"Unexpected error: {str(e)}",
                        "content": ""
                    }

        logger.info(f"[Parallel] All {num_chunks} chunks processed")
        return results

    def process_long_video(self, video_path: str, prompt: str,
                          user_email: str = "anonymous", use_parallel: bool = True) -> Dict[str, Any]:
        """
        Process a long video by splitting it into chunks and combining the results.
        Supports both parallel and sequential processing.

        Args:
            video_path: Path to the video file
            prompt: Text prompt to use for video analysis
            user_email: Email of the user processing the video (for usage tracking)
            use_parallel: If True, process chunks in parallel; if False, process sequentially

        Returns:
            Dictionary with processing result or error
        """
        start_time = time.time()

        result = {
            "success": False,
            "message": "",
            "content": "",
            "chunks_processed": 0,
            "processing_mode": "parallel" if use_parallel else "sequential",
            "processing_time_seconds": 0
        }

        chunk_paths = []

        try:
            # Check if video needs splitting
            num_chunks, duration_minutes = self.video_splitter.get_chunk_info(video_path)

            if num_chunks <= 1:
                logger.info("Video does not need splitting, processing normally")
                return self.process_video(video_path, prompt, user_email)

            logger.info(f"Long video detected: {duration_minutes:.2f} minutes, will be split into {num_chunks} chunks")
            logger.info(f"Processing mode: {'PARALLEL' if use_parallel else 'SEQUENTIAL'}")

            # Split the video
            logger.info("Starting video splitting...")
            chunk_paths = self.video_splitter.split_video(video_path)
            logger.info(f"Video split into {len(chunk_paths)} chunks successfully")

            # Process chunks (parallel or sequential)
            if use_parallel:
                # Parallel processing using ThreadPoolExecutor
                chunk_results = self._process_chunks_parallel(chunk_paths, prompt, user_email)
            else:
                # Sequential processing (original logic)
                chunk_results = []
                for i, chunk_path in enumerate(chunk_paths, 1):
                    logger.info(f"[Sequential] Processing chunk {i}/{len(chunk_paths)}: {chunk_path}")

                    # Modify prompt to indicate this is part of a multi-part video
                    chunk_prompt = self._create_chunk_prompt(prompt, i, len(chunk_paths))

                    # Process this chunk
                    chunk_result = self.process_video(chunk_path, chunk_prompt, user_email)
                    chunk_results.append(chunk_result)

                    logger.info(f"[Sequential] Completed chunk {i}/{len(chunk_paths)}")

            # Check for failures in any chunk
            chunk_responses = []
            for i, chunk_result in enumerate(chunk_results, 1):
                if not chunk_result["success"]:
                    error_msg = f"Failed to process chunk {i}/{len(chunk_paths)}: {chunk_result.get('message', 'Unknown error')}"
                    logger.error(error_msg)
                    result["message"] = error_msg
                    result["chunks_processed"] = i - 1
                    return result

                chunk_responses.append(chunk_result["content"])

            # Combine all responses
            logger.info("Combining responses from all chunks...")
            combined_content = self.combine_chunk_responses(
                chunk_responses,
                prompt,
                len(chunk_paths)
            )

            result["success"] = True
            result["content"] = combined_content
            result["chunks_processed"] = len(chunk_paths)
            result["processing_time_seconds"] = round(time.time() - start_time, 2)
            result["message"] = f"Successfully processed video in {len(chunk_paths)} chunks"

            logger.info(f"Long video processing completed successfully: {len(chunk_paths)} chunks")
            logger.info(f"Total processing time: {result['processing_time_seconds']}s")

            return result

        except Exception as e:
            import traceback
            error_details = traceback.format_exc()
            logger.error(f"Error processing long video: {str(e)}")
            logger.error(error_details)
            result["message"] = f"Error processing long video: {str(e)}"
            result["error_details"] = error_details
            return result

        finally:
            # Always clean up chunk files
            if chunk_paths:
                logger.info("Cleaning up temporary chunk files...")
                self.video_splitter.cleanup_chunks(chunk_paths)

    def _create_chunk_prompt(self, original_prompt: str, chunk_num: int,
                            total_chunks: int) -> str:
        """
        Create a prompt for a video chunk that provides context about its position.

        Args:
            original_prompt: The original user prompt
            chunk_num: Current chunk number (1-indexed)
            total_chunks: Total number of chunks

        Returns:
            Modified prompt for the chunk
        """
        # For meeting summaries, modify the prompt to focus on just summarizing what's in this segment
        prompt_lower = original_prompt.lower()
        is_meeting = "meeting" in prompt_lower

        if is_meeting:
            # For meetings, ask for a partial summary of this segment only
            if chunk_num == 1:
                context = f"[SEGMENT {chunk_num} of {total_chunks} - First 50 minutes] "
                context += "Provide a summary of the discussion points and any action items covered in THIS segment only. "
                context += "Do not try to provide a complete meeting summary - just summarize what happens in this part. "
            elif chunk_num == total_chunks:
                context = f"[SEGMENT {chunk_num} of {total_chunks} - Final segment] "
                context += "Provide a summary of the discussion points and any action items covered in THIS final segment only. "
                context += "This continues from previous segments, but only summarize what happens in this part. "
            else:
                context = f"[SEGMENT {chunk_num} of {total_chunks} - Middle segment] "
                context += "Provide a summary of the discussion points and any action items covered in THIS segment only. "
                context += "This is a middle portion of a longer recording - only summarize what happens in this part. "

            return context + original_prompt
        else:
            # For other types, use the original approach
            context = f"[PART {chunk_num} of {total_chunks}] "

            if chunk_num == 1:
                context += "This is the first segment of a longer video. "
            elif chunk_num == total_chunks:
                context += "This is the final segment continuing from previous parts. "
            else:
                context += "This is a middle segment continuing from previous parts. "

            return context + original_prompt

    def process_video_auto(self, video_path: str, prompt: str,
                          user_email: str = "anonymous") -> Dict[str, Any]:
        """
        Automatically process a video, handling both short and long videos.
        This method detects if the video needs splitting and processes accordingly.

        Args:
            video_path: Path to the video file
            prompt: Text prompt to use for video analysis
            user_email: Email of the user processing the video (for usage tracking)

        Returns:
            Dictionary with processing result or error
        """
        logger.info(f"Auto-processing video: {video_path}")

        # Check if video needs splitting
        if self.video_splitter.needs_splitting(video_path):
            logger.info("Video requires splitting, using long video processing")
            return self.process_long_video(video_path, prompt, user_email)
        else:
            logger.info("Video is within single-chunk limit, using standard processing")
            return self.process_video(video_path, prompt, user_email)

    def process_video_batch(self, video_paths: List[str], filenames: List[str],
                           prompt: str, user_email: str, batch_id: str) -> Dict[str, Any]:
        """
        Process multiple videos as a continuous batch.
        Videos are treated as sequential segments of one long video.

        Args:
            video_paths: List of paths to video files (in order)
            filenames: List of original filenames
            prompt: Text prompt to use for analysis
            user_email: Email of the user
            batch_id: Unique identifier for this batch

        Returns:
            Dictionary with unified result
        """
        logger.info(f"Batch {batch_id}: Processing {len(video_paths)} videos")

        try:
            # Get durations and calculate total
            video_infos = []
            total_duration_seconds = 0

            for video_path, filename in zip(video_paths, filenames):
                try:
                    duration = self.video_splitter.get_video_duration(video_path)
                    video_infos.append({
                        'path': video_path,
                        'filename': filename,
                        'duration': duration
                    })
                    total_duration_seconds += duration
                    logger.info(f"Batch {batch_id}: {filename} - {duration/60:.1f} minutes")
                except Exception as e:
                    logger.error(f"Batch {batch_id}: Failed to get duration for {filename}: {e}")
                    raise

            total_duration_minutes = total_duration_seconds / 60
            chunk_duration_minutes = self.video_splitter.chunk_duration_seconds / 60

            logger.info(f"Batch {batch_id}: Total duration: {total_duration_minutes:.1f} minutes")

            # Decide processing strategy
            needs_chunking = (
                total_duration_minutes > chunk_duration_minutes or
                any(v['duration'] > self.video_splitter.chunk_duration_seconds
                    for v in video_infos)
            )

            if needs_chunking:
                logger.info(f"Batch {batch_id}: Using chunked processing")
                return self._process_batch_with_chunking(
                    video_infos, prompt, user_email, batch_id
                )
            else:
                logger.info(f"Batch {batch_id}: Processing directly (no chunking needed)")
                return self._process_batch_direct(
                    video_infos, prompt, user_email, batch_id
                )

        except Exception as e:
            logger.error(f"Batch {batch_id}: Processing error: {str(e)}")
            raise

    def _process_batch_direct(self, video_infos: List[Dict], prompt: str,
                             user_email: str, batch_id: str) -> Dict[str, Any]:
        """
        Process batch directly without chunking (total duration < 54 minutes).
        Uses two-stage synthesis even for short batches.
        """
        logger.info(f"Batch {batch_id}: [Stage 1] Direct processing of {len(video_infos)} videos")
        stage1_start = time.time()

        # Process each video separately first (stage 1: summaries)
        summaries = []
        for i, video_info in enumerate(video_infos, 1):
            logger.info(f"Batch {batch_id}: [Stage 1] Processing video {i}/{len(video_infos)}: {video_info['filename']}")

            summary_prompt = self._create_chunk_summary_prompt(
                original_prompt=prompt,
                chunk_number=i,
                total_chunks=len(video_infos),
                video_name=video_info['filename']
            )

            # Log prompt if configured
            if self.log_prompts:
                logger.debug(f"Batch {batch_id}: [Stage 1] Prompt for video {i}:\n{summary_prompt[:300]}...")

            try:
                video_start = time.time()
                result = self.process_video(video_info['path'], summary_prompt, user_email)
                video_time = time.time() - video_start
                summary = result.get('content', '')
                summaries.append(summary)

                logger.info(f"Batch {batch_id}: [Stage 1] Video {i} complete: {len(summary)} chars in {video_time:.2f}s")

                # Log summary preview if configured
                if self.log_summaries:
                    logger.debug(f"Batch {batch_id}: [Stage 1] Video {i} summary preview:\n{summary[:300]}...")

            except Exception as e:
                logger.error(f"Batch {batch_id}: [Stage 1] Failed to process {video_info['filename']}: {e}")
                summaries.append(f"[Error processing {video_info['filename']}: {str(e)}]")

        stage1_time = time.time() - stage1_start
        logger.info(f"Batch {batch_id}: [Stage 1] Complete - {len(summaries)} summaries in {stage1_time:.2f}s")

        # Log traceability
        logger.info(f"Batch {batch_id}: [Traceability] Video-to-summary mapping:")
        for i, video_info in enumerate(video_infos, 1):
            logger.info(f"Batch {batch_id}:   - Video {i}: {video_info['filename']} → Summary {i}")

        # Stage 2: Synthesize all summaries
        stage2_start = time.time()
        logger.info(f"Batch {batch_id}: [Stage 2] Synthesizing {len(summaries)} summaries")
        chunk_metadata = [{'video_name': v['filename'], 'video_idx': i}
                         for i, v in enumerate(video_infos)]

        final_content = self._synthesize_final_result(
            summaries=summaries,
            chunk_metadata=chunk_metadata,
            original_prompt=prompt,
            user_email=user_email
        )

        stage2_time = time.time() - stage2_start
        total_time = stage1_time + stage2_time

        # Log performance metrics
        logger.info(f"Batch {batch_id}: [Metrics] Stage 1: {stage1_time:.2f}s, Stage 2: {stage2_time:.2f}s, Total: {total_time:.2f}s")
        logger.info(f"Batch {batch_id}: [Metrics] Avg time per video: {stage1_time/len(video_infos):.2f}s")

        return {
            'content': final_content,
            'total_chunks': len(video_infos),
            'synthesis_stages': 2
        }
    def _process_batch_with_chunking(self, video_infos: List[Dict], prompt: str,
                                    user_email: str, batch_id: str) -> Dict[str, Any]:
        """
        Split batch videos into 54-min chunks and process with two-stage synthesis.
        Treats all videos as one continuous sequence.
        """
        logger.info(f"Batch {batch_id}: Starting chunked processing")

        all_chunks = []
        chunk_metadata = []

        # Step 1: Split all videos into chunks
        for video_idx, video_info in enumerate(video_infos):
            video_path = video_info['path']
            filename = video_info['filename']
            duration = video_info['duration']

            if duration > self.video_splitter.chunk_duration_seconds:
                # Split long video
                logger.info(f"Batch {batch_id}: Splitting {filename} ({duration/60:.1f} min)")
                try:
                    chunks = self.video_splitter.split_video(video_path)
                    for chunk_idx, chunk_path in enumerate(chunks):
                        all_chunks.append(chunk_path)
                        chunk_metadata.append({
                            'video_idx': video_idx,
                            'video_name': filename,
                            'chunk_idx': chunk_idx,
                            'is_split': True
                        })
                except Exception as e:
                    logger.error(f"Batch {batch_id}: Failed to split {filename}: {e}")
                    raise
            else:
                # Use whole video as one chunk
                all_chunks.append(video_path)
                chunk_metadata.append({
                    'video_idx': video_idx,
                    'video_name': filename,
                    'chunk_idx': 0,
                    'is_split': False
                })

        logger.info(f"Batch {batch_id}: Split into {len(all_chunks)} total chunks")

        # Step 2: Process chunks with two-stage synthesis
        try:
            result = self._process_chunks_two_stage(
                all_chunks,
                chunk_metadata,
                prompt,
                user_email,
                batch_id
            )
        finally:
            # Step 3: Cleanup split chunks (not original videos)
            for chunk_path, metadata in zip(all_chunks, chunk_metadata):
                if metadata['is_split']:
                    try:
                        if os.path.exists(chunk_path):
                            os.remove(chunk_path)
                            logger.info(f"Batch {batch_id}: Cleaned up chunk: {os.path.basename(chunk_path)}")
                    except Exception as e:
                        logger.warning(f"Batch {batch_id}: Cleanup failed for {chunk_path}: {e}")

        return result

    def _process_chunks_two_stage(self, chunk_paths: List[str], chunk_metadata: List[Dict],
                                  prompt: str, user_email: str, batch_id: str) -> Dict[str, Any]:
        """
        Two-stage synthesis:
        Stage 1: Each chunk → concise summary
        Stage 2: All summaries → final unified result
        """
        logger.info(f"Batch {batch_id}: [Stage 1] Generating summaries for {len(chunk_paths)} chunks")
        stage1_start = time.time()

        chunk_summaries = []

        if self.max_parallel_chunks > 1:
            # Parallel processing
            logger.info(f"Batch {batch_id}: [Stage 1] Using parallel processing with {self.max_parallel_chunks} workers")
            with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor:
                futures = []

                for i, (chunk_path, metadata) in enumerate(zip(chunk_paths, chunk_metadata)):
                    summary_prompt = self._create_chunk_summary_prompt(
                        original_prompt=prompt,
                        chunk_number=i + 1,
                        total_chunks=len(chunk_paths),
                        video_name=metadata['video_name']
                    )

                    # Log prompt if configured
                    if self.log_prompts:
                        logger.debug(f"Batch {batch_id}: [Stage 1] Prompt for chunk {i+1} ({metadata['video_name']}):\n{summary_prompt[:300]}...")

                    future = executor.submit(
                        self._process_single_chunk,
                        (i, chunk_path, summary_prompt, len(chunk_paths), user_email)
                    )
                    futures.append(future)

                # Collect results
                completed_count = 0
                for future in as_completed(futures):
                    try:
                        chunk_idx, result = future.result()

                        # Extract content from result dict
                        if result.get('success'):
                            summary = result.get('content', '')
                        else:
                            summary = f"[Error: {result.get('message', 'Unknown error')}]"

                        chunk_summaries.append((chunk_idx, summary))
                        completed_count += 1

                        logger.info(f"Batch {batch_id}: [Stage 1] Chunk {chunk_idx + 1}/{len(chunk_paths)} complete ({completed_count}/{len(chunk_paths)} total)")

                        # Log summary preview if configured
                        if self.log_summaries and isinstance(summary, str) and not summary.startswith('[Error'):
                            logger.debug(f"Batch {batch_id}: [Stage 1] Chunk {chunk_idx + 1} summary preview:\n{summary[:300]}...")

                    except Exception as e:
                        logger.error(f"Batch {batch_id}: [Stage 1] Failed to process chunk: {e}")
                        chunk_summaries.append((len(chunk_summaries), f"[Error: {str(e)}]"))

        else:
            # Sequential processing
            logger.info(f"Batch {batch_id}: [Stage 1] Using sequential processing")
            for i, (chunk_path, metadata) in enumerate(zip(chunk_paths, chunk_metadata)):
                logger.info(f"Batch {batch_id}: [Stage 1] Processing chunk {i+1}/{len(chunk_paths)} from {metadata['video_name']}")

                summary_prompt = self._create_chunk_summary_prompt(
                    original_prompt=prompt,
                    chunk_number=i + 1,
                    total_chunks=len(chunk_paths),
                    video_name=metadata['video_name']
                )

                # Log prompt if configured
                if self.log_prompts:
                    logger.debug(f"Batch {batch_id}: [Stage 1] Prompt for chunk {i+1}:\n{summary_prompt[:300]}...")

                try:
                    chunk_start = time.time()
                    result = self.process_video(chunk_path, summary_prompt, user_email)
                    chunk_time = time.time() - chunk_start
                    summary = result.get('content', '')
                    chunk_summaries.append((i, summary))

                    logger.info(f"Batch {batch_id}: [Stage 1] Chunk {i + 1} complete: {len(summary)} chars in {chunk_time:.2f}s")

                    # Log summary preview if configured
                    if self.log_summaries:
                        logger.debug(f"Batch {batch_id}: [Stage 1] Chunk {i+1} summary preview:\n{summary[:300]}...")

                except Exception as e:
                    logger.error(f"Batch {batch_id}: [Stage 1] Failed to process chunk {i + 1}: {e}")
                    chunk_summaries.append((i, f"[Error: {str(e)}]"))

        # Sort by chunk index
        chunk_summaries.sort(key=lambda x: x[0])
        summaries = [s[1] for s in chunk_summaries]

        stage1_time = time.time() - stage1_start
        logger.info(f"Batch {batch_id}: [Stage 1] Complete - {len(summaries)} summaries generated in {stage1_time:.2f}s")

        # Log traceability
        logger.info(f"Batch {batch_id}: [Traceability] Chunk-to-summary mapping:")
        for i, metadata in enumerate(chunk_metadata):
            chunk_info = f"chunk {metadata.get('chunk_idx', 0)+1}" if metadata.get('is_split') else "whole video"
            logger.info(f"Batch {batch_id}:   - Chunk {i+1}: {metadata['video_name']} ({chunk_info}) → Summary {i+1}")

        # Stage 2: Synthesize all summaries
        stage2_start = time.time()
        logger.info(f"Batch {batch_id}: [Stage 2] Synthesizing {len(summaries)} summaries into final result")

        final_content = self._synthesize_final_result(
            summaries=summaries,
            chunk_metadata=chunk_metadata,
            original_prompt=prompt,
            user_email=user_email
        )

        stage2_time = time.time() - stage2_start
        total_time = stage1_time + stage2_time

        # Log performance metrics
        logger.info(f"Batch {batch_id}: [Metrics] Stage 1: {stage1_time:.2f}s, Stage 2: {stage2_time:.2f}s, Total: {total_time:.2f}s")
        logger.info(f"Batch {batch_id}: [Metrics] Avg time per chunk: {stage1_time/len(chunk_paths):.2f}s")
        logger.info(f"Batch {batch_id}: [Metrics] Total API calls: {len(chunk_paths) + 1}")  # +1 for synthesis

        return {
            'content': final_content,
            'total_chunks': len(chunk_paths),
            'synthesis_stages': 2
        }

    def _create_chunk_summary_prompt(self, original_prompt: str, chunk_number: int,
                                    total_chunks: int, video_name: str) -> str:
        """
        Create a prompt that asks for a summary suitable for later synthesis.
        """
        summary_prompt = f"""You are analyzing segment {chunk_number} of {total_chunks} from video "{video_name}".

Original user request:
{original_prompt}

Your task: Provide a CONCISE SUMMARY of this segment that captures:
1. Key information relevant to the user's request
2. Important details, facts, or insights
3. Any diagrams, charts, or structured data (preserve Mermaid syntax if applicable)
4. Chronological context if relevant

Keep the summary focused and information-dense. This summary will be combined with {total_chunks - 1} other summaries to create a final unified response.

Do NOT mention "this is segment X" or "this chunk contains". Just provide the factual content.
"""
        return summary_prompt

    def _detect_prompt_type(self, prompt: str, summaries: List[str]) -> str:
        """
        Detect the type of prompt to apply specialized synthesis strategy.

        Args:
            prompt: Original user prompt
            summaries: List of summaries (to check content)

        Returns:
            Prompt type: "meeting_summary", "documentation", "documentation_with_charts", or "generic"
        """
        prompt_lower = prompt.lower()

        # Check for meeting-related keywords
        if any(keyword in prompt_lower for keyword in ["meeting", "discussion", "action item", "agenda"]):
            return "meeting_summary"

        # Check for documentation keywords
        if any(keyword in prompt_lower for keyword in ["documentation", "process", "training", "knowledge base", "step by step"]):
            # Check if it also includes charts/diagrams
            if any(keyword in prompt_lower for keyword in ["diagram", "chart", "mermaid", "workflow"]):
                return "documentation_with_charts"
            return "documentation"

        # Default to generic
        return "generic"

    def _synthesize_final_result(self, summaries: List[str], chunk_metadata: List[Dict],
                                 original_prompt: str, user_email: str) -> str:
        """
        Synthesize all chunk summaries into single cohesive result using Gemini.
        Uses prompt type detection to apply specialized synthesis strategies.
        """
        # Extract video names for context
        video_names = list(set(m['video_name'] for m in chunk_metadata))
        num_videos = len(video_names)

        # Prepare summaries text
        summaries_text = ""
        total_summary_chars = 0
        for i, summary in enumerate(summaries, 1):
            video_name = chunk_metadata[i-1]['video_name']
            summaries_text += f"\n\n--- Summary {i} (from {video_name}) ---\n{summary.strip()}\n"
            total_summary_chars += len(summary)

        logger.info(f"[Stage 2] Combined summaries: {len(summaries)} summaries, {total_summary_chars} total chars")

        # Detect prompt type for specialized synthesis
        prompt_type = self._detect_prompt_type(original_prompt, summaries)
        logger.info(f"[Stage 2] Detected prompt type: {prompt_type}")

        # Check for Mermaid diagrams
        has_diagrams = any('```mermaid' in s for s in summaries)

        # Create synthesis prompt based on type
        if prompt_type == "meeting_summary":
            synthesis_prompt = self._create_synthesis_prompt_meeting(
                summaries_text, original_prompt, num_videos, video_names
            )
        elif prompt_type == "documentation":
            synthesis_prompt = self._create_synthesis_prompt_documentation(
                summaries_text, original_prompt, num_videos, video_names
            )
        elif has_diagrams:
            synthesis_prompt = self._create_synthesis_prompt_with_diagrams(
                summaries_text, original_prompt, num_videos, video_names
            )
        else:
            synthesis_prompt = self._create_synthesis_prompt_generic(
                summaries_text, original_prompt, num_videos, video_names
            )

        # Log synthesis prompt if configured
        if self.log_prompts:
            logger.debug(f"[Stage 2] Synthesis prompt preview:\n{synthesis_prompt[:500]}...")

        # Send to Gemini for final synthesis
        logger.info(f"[Stage 2] Sending synthesis request to Gemini API (model: {self.synthesis_model})")

        with self._rate_limit_lock:
            time.sleep(2)

        synthesis_start = time.time()
        try:
            response = self.client.models.generate_content(
                model=self.synthesis_model,
                contents=[{"text": synthesis_prompt}]
            )

            synthesis_time = time.time() - synthesis_start

            synthesized_content = ""
            if response.parts:
                for part in response.parts:
                    if hasattr(part, 'text'):
                        synthesized_content += part.text

            if not synthesized_content:
                logger.warning("[Stage 2] Synthesis returned empty, falling back to concatenation")
                return self._fallback_concatenation(summaries, chunk_metadata)

            logger.info(f"[Stage 2] Synthesis complete: {len(synthesized_content)} chars in {synthesis_time:.2f}s")

            # Log synthesis result preview if configured
            if self.log_summaries:
                logger.debug(f"[Stage 2] Synthesized result preview:\n{synthesized_content[:500]}...")

            return synthesized_content

        except Exception as e:
            logger.error(f"[Stage 2] Synthesis failed: {str(e)}, using fallback")
            return self._fallback_concatenation(summaries, chunk_metadata)

    def _create_synthesis_prompt_generic(self, summaries_text: str, original_prompt: str,
                                         num_videos: int, video_names: List[str]) -> str:
        """
        Generic synthesis prompt for all content types.
        """
        if num_videos > 1:
            video_context = f"{num_videos} videos: {', '.join(video_names)}"
        else:
            video_context = f"one video: {video_names[0]}"

        prompt = f"""You are creating a FINAL UNIFIED RESPONSE by synthesizing multiple segment summaries.

Context:
- Source: {video_context}
- The video(s) were split into segments for processing
- Below are summaries from each segment

Original user request:
"{original_prompt}"

Segment summaries:
{summaries_text}

Your task: Create ONE cohesive, unified response that:

1. FULFILLS the original user request completely
2. INTEGRATES information from all segments naturally
3. DOES NOT mention segments, chunks, or parts
4. MAINTAINS any requested format (lists, tables, structure)
5. CONSOLIDATES duplicate information
6. PRESERVES chronological flow if relevant
7. APPEARS as if analyzing the complete video in one pass

Quality requirements:
- No phrases like "In segment 1", "The first part", "Chunk 2 discusses"
- Natural transitions between topics
- Unified narrative or structure
- Professional, coherent final product

Begin your unified response:
"""
        return prompt

    def _create_synthesis_prompt_meeting(self, summaries_text: str, original_prompt: str,
                                         num_videos: int, video_names: List[str]) -> str:
        """
        Specialized synthesis prompt for meeting summaries.
        """
        if num_videos > 1:
            video_context = f"{num_videos} videos: {', '.join(video_names)}"
        else:
            video_context = f"one video: {video_names[0]}"

        prompt = f"""You are creating a FINAL UNIFIED MEETING SUMMARY by synthesizing multiple segment summaries.

Context:
- Source: {video_context}
- The video(s) were split into segments for processing
- Below are summaries from each segment

Original user request:
"{original_prompt}"

Segment summaries:
{summaries_text}

Your task: Create ONE cohesive meeting summary that:

1. MEETING OVERVIEW: Provide a high-level summary of the meeting
2. DISCUSSION POINTS: Consolidate all discussion topics into logical sections
   - Group related discussions together
   - Maintain chronological flow where relevant
   - Capture key decisions made
3. ACTION ITEMS: Create a MASTER LIST of all action items
   - Format: "Action item - Owner (if mentioned) - Due date (if mentioned)"
   - Consolidate duplicates
   - Remove redundant items
4. KEY OUTCOMES: Summarize main conclusions and next steps

Quality requirements:
- Professional meeting summary format
- No phrases like "In segment 1", "The first part", "Chunk 2 discusses"
- Natural transitions between topics
- One unified document that reads as if from single analysis
- Clear, actionable items with owners where possible

Begin your unified meeting summary:
"""
        return prompt

    def _create_synthesis_prompt_documentation(self, summaries_text: str, original_prompt: str,
                                               num_videos: int, video_names: List[str]) -> str:
        """
        Specialized synthesis prompt for process documentation.
        """
        if num_videos > 1:
            video_context = f"{num_videos} videos: {', '.join(video_names)}"
        else:
            video_context = f"one video: {video_names[0]}"

        prompt = f"""You are creating FINAL UNIFIED PROCESS DOCUMENTATION by synthesizing multiple segment summaries.

Context:
- Source: {video_context}
- The video(s) were split into segments for processing
- Below are summaries from each segment

Original user request:
"{original_prompt}"

Segment summaries:
{summaries_text}

Your task: Create ONE comprehensive process documentation that:

1. OVERVIEW: Provide a high-level description of the process
2. PREREQUISITES: List any requirements or setup needed (if mentioned)
3. STEP-BY-STEP INSTRUCTIONS: Combine all steps into one sequential guide
   - Number steps sequentially (Step 1, Step 2, etc.)
   - Include sub-steps where appropriate
   - Be clear and detailed for someone new to the process
4. TIPS & BEST PRACTICES: Consolidate helpful tips
5. TROUBLESHOOTING: Include common issues and solutions (if mentioned)

Quality requirements:
- Clear, sequential flow from start to finish
- No phrases like "In segment 1", "The first part", "Chunk 2 shows"
- Professional documentation format
- Easy to follow for training or reference
- One unified guide that reads naturally

Begin your unified process documentation:
"""
        return prompt

    def _create_synthesis_prompt_with_diagrams(self, summaries_text: str, original_prompt: str,
                                                num_videos: int, video_names: List[str]) -> str:
        """
        Synthesis prompt specifically for content with Mermaid diagrams.
        """
        prompt = f"""You are creating a FINAL UNIFIED RESPONSE by synthesizing multiple segment summaries that contain Mermaid diagrams.

Original user request:
"{original_prompt}"

Segment summaries (containing diagrams):
{summaries_text}

Your task: Create ONE unified response with a SINGLE MERGED DIAGRAM.

Requirements:
1. MERGE all Mermaid diagrams into ONE comprehensive diagram
2. Combine nodes, relationships, and flows intelligently
3. Remove duplicate nodes/edges
4. Maintain logical structure and connections
5. Synthesize accompanying text naturally
6. DO NOT mention segments or parts

Diagram merging strategy:
- If multiple flowcharts: combine into single flowchart with logical flow
- If multiple architecture diagrams: create unified architecture
- If sequential diagrams: show complete sequence
- Use clear labels and grouping where appropriate

Output format:
```mermaid
[Your merged diagram here]
```

[Unified explanatory text here]

Begin your unified response with merged diagram:
"""
        return prompt

    def _fallback_concatenation(self, summaries: List[str], chunk_metadata: List[Dict]) -> str:
        """
        Fallback method when AI synthesis fails.
        Simple concatenation with section markers.
        """
        logger.info("Using fallback concatenation method")

        result = "# Combined Analysis\n\n"
        result += "*Note: This is a combined analysis from multiple video segments.*\n\n"

        for i, summary in enumerate(summaries, 1):
            video_name = chunk_metadata[i-1]['video_name']
            result += f"## Segment {i} - {video_name}\n\n"
            result += summary.strip() + "\n\n"

        return result