video-query/backend/app.py

import os
import tempfile
import uuid
import logging
import sys
import base64
import json
import re
import io
import cairosvg
import pathlib
from PIL import Image, ImageDraw, ImageFont
from flask import Flask, request, jsonify, send_from_directory, send_file
from werkzeug.utils import secure_filename
from werkzeug.exceptions import RequestEntityTooLarge
from dotenv import load_dotenv
from flask_cors import CORS
from chunked_upload import chunked_upload_bp
from auth import require_auth, lenient_auth
import pdfkit
from pdfkit.configuration import Configuration
from bs4 import BeautifulSoup

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger('video_query')

# Load environment variables from .env file
load_dotenv()

from video_processor import VideoProcessor

app = Flask(__name__)
# Enable CORS with permissive settings for large file uploads
CORS(app, resources={r"/api/*": {
    "origins": ["https://ai-sandbox.oliver.solutions"],
    "supports_credentials": True,
    "methods": ["GET", "POST", "OPTIONS"],
    "allow_headers": ["Content-Type", "X-Requested-With", "Authorization"]
}}, expose_headers=["Content-Disposition", "Authorization"])

# Register the chunked upload blueprint
app.register_blueprint(chunked_upload_bp)

# Configuration
UPLOAD_FOLDER = os.path.join(tempfile.gettempdir(), 'video_query_uploads')
# 5GB max upload size
MAX_CONTENT_LENGTH = 5 * 1024 * 1024 * 1024

# Create upload folder if it doesn't exist
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# Configuration for persistent output - commented out as no longer needed
# PERSISTENT_PNG_ROOT_DIR = '/var/www/html/video_query/png_output'  # Filesystem path for PNG files
# PERSISTENT_SVG_ROOT_DIR = '/var/www/html/video_query/svg_output'  # Filesystem path for SVG files
# PERSISTENT_PNG_BASE_URL = 'https://ai-sandbox.oliver.solutions/video_query/png_output'  # Web accessible URL base for PNGs
# PERSISTENT_SVG_BASE_URL = 'https://ai-sandbox.oliver.solutions/video_query/svg_output'  # Web accessible URL base for SVGs

# Create temporary directories for PDF generation instead
TEMP_PNG_DIR = os.path.join(tempfile.gettempdir(), 'video_query_png_temp')
TEMP_SVG_DIR = os.path.join(tempfile.gettempdir(), 'video_query_svg_temp')
os.makedirs(TEMP_PNG_DIR, exist_ok=True)
os.makedirs(TEMP_SVG_DIR, exist_ok=True)

# Configure the app
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
# Set larger buffer size for large file uploads
app.config['MAX_CONTENT_PATH'] = 5 * 1024 * 1024 * 1024  # 5GB

# Initialize video processor
video_processor = VideoProcessor()

# Set allowed extensions for videos
ALLOWED_EXTENSIONS = {'mp4', 'avi', 'mov', 'wmv', 'mkv', 'webm'}

def allowed_file(filename):
    """Check if file has an allowed extension"""
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

@app.route('/api/process', methods=['POST'])
@lenient_auth
def process_video():
    """Process uploaded video with the selected mode and prompt"""
    logger.info("API request received: /api/process")
    logger.info(f"Content-Type: {request.content_type}")
    logger.info(f"Content-Length: {request.content_length}")

    # Handle chunked upload case
    if request.is_json:
        data = request.get_json()
        file_path = data.get('file_path')
        filename = data.get('filename')
        prompt = data.get('prompt')

        if not file_path or not os.path.exists(file_path):
            logger.error(f"File path not found: {file_path}")
            return jsonify({'success': False, 'message': 'Uploaded file not found'}), 400

        if not prompt:
            logger.error("No prompt provided")
            return jsonify({'success': False, 'message': 'No prompt provided'}), 400

        # Get user email from authentication if available
        user_email = "anonymous"
        if hasattr(request, "user") and isinstance(request.user, dict):
            user_email = request.user.get("email", request.user.get("preferred_username", "anonymous"))

        logger.info(f"Processing chunked upload from {file_path} ({filename}) for user: {user_email}")
        result = video_processor.process_video(file_path, prompt, user_email)

        # Clean up the uploaded file
        try:
            os.remove(file_path)
            logger.info(f"Cleaned up temporary file: {file_path}")
        except Exception as cleanup_error:
            logger.warning(f"Could not remove temporary file {file_path}: {str(cleanup_error)}")

        if result['success']:
            content_length = len(result['content']) if result['content'] else 0
            logger.info(f"Returning successful response with {content_length} characters")
            return jsonify({
                'success': True,
                'content': result['content']
            })
        else:
            logger.error(f"Processing failed: {result['message']}")
            return jsonify({
                'success': False,
                'message': result['message']
            }), 500

    # Standard direct upload method (for small files)
    # Check if a file was uploaded
    if 'video' not in request.files:
        logger.error("No video file in request")
        return jsonify({'success': False, 'message': 'No video file provided'}), 400

    file = request.files['video']
    prompt = request.form.get('prompt', '')

    logger.info(f"Received file: {file.filename}")
    logger.info(f"Prompt length: {len(prompt)} characters")

    # Check for empty filename
    if file.filename == '':
        logger.error("Empty filename provided")
        return jsonify({'success': False, 'message': 'No video selected'}), 400

    if not prompt:
        logger.error("No prompt provided")
        return jsonify({'success': False, 'message': 'No prompt provided'}), 400

    # Check file extension
    if not allowed_file(file.filename):
        logger.error(f"Invalid file type: {file.filename}")
        return jsonify({
            'success': False,
            'message': f'Invalid file type. Allowed types: {", ".join(ALLOWED_EXTENSIONS)}'
        }), 400

    try:
        # Make sure upload directory exists
        os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
        logger.info(f"Upload directory: {app.config['UPLOAD_FOLDER']}")

        # Generate a unique filename to prevent collisions
        original_filename = secure_filename(file.filename)
        unique_filename = f"{uuid.uuid4()}_{original_filename}"
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], unique_filename)
        logger.info(f"Writing to: {file_path}")

        # Stream the file to disk in larger chunks for better performance
        chunk_size = 1024 * 1024  # 1MB chunks
        total_bytes = 0
        try:
            with open(file_path, 'wb') as f:
                while True:
                    chunk = file.read(chunk_size)
                    if not chunk:
                        break
                    total_bytes += len(chunk)
                    f.write(chunk)
                    # Periodically log progress for large files
                    if total_bytes % (50 * 1024 * 1024) == 0:  # Log every 50MB
                        logger.info(f"Upload progress: {total_bytes / (1024 * 1024):.2f} MB")
        except Exception as chunk_error:
            logger.error(f"Error during chunked upload: {str(chunk_error)}")
            raise

        logger.info(f"File saved: {file_path} ({total_bytes} bytes)")

        # Get user email from authentication if available
        user_email = "anonymous"
        if hasattr(request, "user") and isinstance(request.user, dict):
            user_email = request.user.get("email", request.user.get("preferred_username", "anonymous"))

        # Process the video
        logger.info(f"Starting video processing for user: {user_email}...")
        result = video_processor.process_video(file_path, prompt, user_email)
        logger.info(f"Processing result: success={result['success']}")

        # Clean up the file after processing
        try:
            os.remove(file_path)
            logger.info(f"Cleaned up temporary file: {file_path}")
        except Exception as cleanup_error:
            logger.warning(f"Could not remove temporary file {file_path}: {str(cleanup_error)}")

        if result['success']:
            content_length = len(result['content']) if result['content'] else 0
            logger.info(f"Returning successful response with {content_length} characters")
            return jsonify({
                'success': True,
                'content': result['content']
            })
        else:
            logger.error(f"Processing failed: {result['message']}")
            return jsonify({
                'success': False,
                'message': result['message']
            }), 500

    except RequestEntityTooLarge:
        logger.error(f"File too large: {request.content_length} bytes")
        return jsonify({
            'success': False,
            'message': 'The uploaded file is too large (max 5GB)'
        }), 413
    except Exception as e:
        import traceback
        error_trace = traceback.format_exc()
        logger.error(f"Error processing video: {str(e)}")
        logger.error(error_trace)
        return jsonify({
            'success': False,
            'message': f'An unexpected error occurred: {str(e)}'
        }), 500

# Test route to verify authentication
@app.route('/api/auth-test', methods=['GET'])
@lenient_auth
def auth_test():
    """Test endpoint to verify authentication is working"""
    user_info = {
        "authenticated": True,
        "user": request.user.get("name", "Anonymous") if hasattr(request, "user") else "Unknown",
        "token_present": "Authorization" in request.headers,
        "token_info": {k: request.user.get(k) for k in ["name", "preferred_username", "email"]
                      if k in request.user} if hasattr(request, "user") else {}
    }
    logger.info(f"Auth test: {user_info}")
    return jsonify(user_info)

# Handle PDF generation
@app.route('/api/generate-pdf', methods=['POST'])
@lenient_auth
def generate_pdf():
    """Generate a PDF from HTML content with mermaid diagrams"""
    logger.info("API request received: /api/generate-pdf")

    if not request.is_json:
        logger.error("Request is not JSON")
        return jsonify({'success': False, 'message': 'JSON request required'}), 400

    data = request.get_json()
    html_content = data.get('html')
    text_diagrams = data.get('textDiagrams', {})
    svg_diagrams = data.get('svgDiagrams', {})
    diagram_png_data_urls = data.get('diagramPngs', {})
    video_file_name = data.get('videoFileName', '')

    # Log detailed request information
    logger.info(f"Request data: HTML content length: {len(html_content) if html_content else 0}")
    logger.info(f"Text diagrams received: {len(text_diagrams)}")
    logger.info(f"SVG diagrams received: {len(svg_diagrams)}")
    logger.info(f"Diagram PNGs received: {len(diagram_png_data_urls)}")
    logger.info(f"Video filename received: {video_file_name if video_file_name else 'None'}")

    # Comment out full HTML content logging
    # logger.info("HTML CONTENT RECEIVED START -------------------")
    # logger.info(html_content)
    # logger.info("HTML CONTENT RECEIVED END ---------------------")

    if text_diagrams:
        logger.info(f"Text diagram keys: {list(text_diagrams.keys())}")

    if svg_diagrams:
        logger.info(f"SVG diagram keys: {list(svg_diagrams.keys())}")
        for key, value in svg_diagrams.items():
            logger.info(f"SVG diagram {key}: starts with data:image/svg+xml;base64: {value.startswith('data:image/svg+xml;base64,') if value else False}")

    if diagram_png_data_urls:
        logger.info(f"Diagram PNG keys: {list(diagram_png_data_urls.keys())}")
        for key, value in diagram_png_data_urls.items():
            logger.info(f"Diagram PNG {key}: starts with data:image/png;base64: {value.startswith('data:image/png;base64,') if value else False} (length: {len(value) if value else 0})")

    if not html_content:
        logger.error("No HTML content provided")
        return jsonify({'success': False, 'message': 'No HTML content provided'}), 400

    try:
        # Create a temporary directory for PDF and HTML file, not necessarily for images
        temp_dir_for_pdf = tempfile.mkdtemp()
        pdf_path = os.path.join(temp_dir_for_pdf, f"response_{uuid.uuid4()}.pdf")

        # Process HTML to replace mermaid divs with image tags
        processed_html = html_content
        processed_svg_ids = set()

        # Decide whether to use web URLs or file URIs for pdfkit
        # Always use file:/// URIs with enable-local-file-access
        # USE_WEB_URLS_FOR_PDFKIT = False  # This is no longer needed
        # We now use temp directories and local file paths for all images

        # Create a subdirectory for images in the temp dir (for the HTML structure)
        img_dir = os.path.join(temp_dir_for_pdf, "images")
        os.makedirs(img_dir, exist_ok=True)

        logger.info("HTML content before processing:")
        logger.info(f"HTML contains '.mermaid' class: {'class=mermaid' in html_content}")
        logger.info(f"HTML contains mermaid code blocks: {'```mermaid' in html_content or 'graph TD' in html_content}")

        # First approach: Manually look for the mermaid pattern in the HTML before any processing
        pattern1 = r'<div[^>]*class=.?mermaid.?[^>]*>(.*?)</div>'
        pattern2 = r'<pre><code.*?>(graph\s+TD.*?)</code></pre>'
        pattern3 = r'graph\s+TD'

        mermaid_matches1 = re.findall(pattern1, html_content, re.DOTALL)
        mermaid_matches2 = re.findall(pattern2, html_content, re.DOTALL)
        mermaid_matches3 = re.findall(pattern3, html_content, re.DOTALL)

        logger.info(f"Mermaid div matches: {len(mermaid_matches1)}")
        if mermaid_matches1:
            for i, m in enumerate(mermaid_matches1):
                logger.info(f"Mermaid div content {i} (first 100 chars): {m[:100]}")

        logger.info(f"Mermaid code block matches: {len(mermaid_matches2)}")
        if mermaid_matches2:
            for i, m in enumerate(mermaid_matches2):
                logger.info(f"Mermaid code content {i} (first 100 chars): {m[:100]}")

        logger.info(f"Mermaid graph TD matches: {len(mermaid_matches3)}")

        # First, prioritize using the frontend-generated PNGs if available
        if diagram_png_data_urls:
            logger.info(f"Processing {len(diagram_png_data_urls)} PNG diagrams provided by frontend.")

            # Parse the HTML with BeautifulSoup ONCE before the loop
            soup = BeautifulSoup(processed_html, 'html.parser')

            for diagram_id, png_data_url in diagram_png_data_urls.items():
                unique_png_filename = f"{diagram_id}_{uuid.uuid4()}.png"
                temp_png_path = os.path.join(TEMP_PNG_DIR, unique_png_filename)

                image_source_for_pdfkit = None

                try:
                    if not png_data_url.startswith('data:image/png;base64,'):
                        logger.warning(f"Unsupported PNG data URL format for {diagram_id}")
                        raise ValueError("Unsupported PNG data URL format")

                    base64_png_content = png_data_url.split(',', 1)[1]
                    png_bytes = base64.b64decode(base64_png_content)

                    with open(temp_png_path, 'wb') as f:
                        f.write(png_bytes)

                    if not os.path.exists(temp_png_path) or os.path.getsize(temp_png_path) == 0:
                        logger.error(f"PNG for {diagram_id} (from frontend PNG) was not saved or is empty at {temp_png_path}.")
                        raise ValueError("PNG saving failed or empty")

                    logger.info(f"Saved frontend-generated PNG for {diagram_id} to: {temp_png_path} (size: {os.path.getsize(temp_png_path)} bytes)")

                    # We no longer use web URLs, always use local file path
                    image_source_for_pdfkit = pathlib.Path(temp_png_path).as_uri()

                    alt_text = f"Diagram: {text_diagrams.get(diagram_id, diagram_id)[:50].replace('<', '&lt;').replace('>', '&gt;')}..."

                    # --- MODIFIED REPLACEMENT using BeautifulSoup ---
                    target_div = soup.find('div', id=diagram_id)
                    if target_div:
                        # Create the new img tag as a BeautifulSoup object
                        new_img_tag_soup = soup.new_tag('img', src=image_source_for_pdfkit, alt=alt_text)
                        new_img_tag_soup['style'] = "max-width:100%; margin:20px auto; display:block; border:1px solid #eee;"

                        # Replace the target div with our new img tag
                        target_div.replace_with(new_img_tag_soup)
                        logger.info(f"Replaced div with id='{diagram_id}' using its frontend-generated PNG (src: {image_source_for_pdfkit}) via BeautifulSoup.")
                        processed_svg_ids.add(diagram_id)
                    else:
                        logger.warning(f"PNG_WARN: Could not find div with id='{diagram_id}' in the HTML to replace with frontend PNG using BeautifulSoup.")
                        # Fallback to replacing code block if div with ID isn't found
                        original_code_for_png = text_diagrams.get(diagram_id)
                        if original_code_for_png:
                            # Try to find a pre/code block with matching content
                            code_blocks = soup.find_all('pre')
                            for code_block in code_blocks:
                                code_el = code_block.find('code')
                                if code_el and original_code_for_png.strip() in code_el.text.strip():
                                    # Create new img tag
                                    new_img_tag_soup_fallback = soup.new_tag('img', src=image_source_for_pdfkit, alt=alt_text)
                                    new_img_tag_soup_fallback['style'] = "max-width:100%; margin:20px auto; display:block; border:1px solid #eee;"

                                    # Replace the code block with the img tag
                                    code_block.replace_with(new_img_tag_soup_fallback)
                                    logger.info(f"PNG_WARN_RECOVERY: Replaced a code block matching content of diagram {diagram_id} with its frontend-PNG img tag via BeautifulSoup.")
                                    processed_svg_ids.add(diagram_id)
                                    break
                            else:
                                logger.warning(f"PNG_WARN_FAIL: Also failed to find a code block for diagram {diagram_id} content for frontend-PNG replacement with BeautifulSoup.")

                except Exception as e_png_proc:
                    logger.error(f"Error processing provided PNG for diagram_id '{diagram_id}': {str(e_png_proc)}")
                    # Create a placeholder image indicating the error for this specific diagram
                    try:
                        img_err = Image.new('RGB', (500, 150), color=(255, 230, 230)) # Light red
                        draw_err = ImageDraw.Draw(img_err)
                        # Consider ImageFont.truetype for specific fonts/sizes if default is too small
                        title_font = ImageFont.load_default()
                        text_font = ImageFont.load_default()
                        draw_err.text((10, 10), f"Error rendering diagram:", fill=(128, 0, 0), font=title_font)
                        draw_err.text((10, 30), f"ID: {diagram_id}", fill=(100, 0, 0), font=text_font)
                        draw_err.text((10, 50), f"Details: {str(e_png_proc)[:80]}", fill=(100, 0, 0), font=text_font)
                        if text_diagrams.get(diagram_id):
                            draw_err.text((10,70), f"Code: {text_diagrams[diagram_id][:60]}...", fill=(100,0,0), font=text_font)

                        with open(temp_png_path, 'wb') as f_err: # Save error image with the same name pattern
                            img_err.save(f_err, 'PNG')
                        logger.info(f"Created error placeholder image for {diagram_id} at {temp_png_path}")

                        # We no longer use web URLs, always use local file path
                        image_source_for_pdfkit = pathlib.Path(temp_png_path).as_uri()

                        # Find and replace the target div with the error image
                        target_div_err = soup.find('div', id=diagram_id)
                        if target_div_err:
                            new_err_img_tag = soup.new_tag('img', src=image_source_for_pdfkit, alt=f"Error rendering diagram {diagram_id}")
                            new_err_img_tag['style'] = "max-width:100%; margin:20px auto; display:block; border: 2px solid red;"
                            target_div_err.replace_with(new_err_img_tag)
                            logger.info(f"Replaced div with id='{diagram_id}' using an error placeholder image via BeautifulSoup.")
                            processed_svg_ids.add(diagram_id)
                        else:
                            logger.error(f"Could not find div with id='{diagram_id}' to replace with error placeholder image.")
                    except Exception as e_placeholder_img:
                        logger.error(f"Failed to create error placeholder image for {diagram_id}: {str(e_placeholder_img)}")
                        # Try to insert a simple error paragraph if div is found
                        target_div_err2 = soup.find('div', id=diagram_id)
                        if target_div_err2:
                            error_p = soup.new_tag('p')
                            error_p['style'] = "color:red; border:1px solid red; padding:10px;"
                            error_p.string = f"[Error processing diagram: {diagram_id} - {str(e_png_proc)[:50]}]"
                            target_div_err2.replace_with(error_p)
                            logger.info(f"Replaced div with id='{diagram_id}' with a simple error message via BeautifulSoup.")
                            processed_svg_ids.add(diagram_id)

            # After processing all PNG diagrams, update processed_html
            processed_html = str(soup)
            logger.info("Completed BeautifulSoup processing of all PNG diagrams")

        # Fallback to using SVG diagrams if provided
        if svg_diagrams:
            logger.info(f"Processing {len(svg_diagrams)} SVG diagrams provided by frontend.")

            # Ensure we're working with a BeautifulSoup object
            if 'soup' not in locals() or not isinstance(soup, BeautifulSoup):
                soup = BeautifulSoup(processed_html, 'html.parser')

            for diagram_id, svg_data_url in svg_diagrams.items():
                # Skip if this diagram ID was already processed in the PNG section
                if diagram_id in processed_svg_ids:
                    logger.info(f"Skipping SVG for diagram_id '{diagram_id}' as it was already processed in PNG section.")
                    continue

                # Generate a unique filename for the persistent storage to avoid collisions
                unique_png_filename = f"{diagram_id}_{uuid.uuid4()}.png"
                temp_png_path = os.path.join(TEMP_PNG_DIR, unique_png_filename)

                image_source_for_pdfkit = None

                try:
                    logger.info(f"Processing diagram ID: {diagram_id}")

                    if not svg_data_url.startswith('data:image/svg+xml;base64,'):
                        logger.warning(f"Unsupported SVG data URL format for {diagram_id}: {svg_data_url[:30]}...")
                        raise ValueError("Unsupported SVG data URL format")

                    # Extract base64 content
                    base64_data = svg_data_url.split(',', 1)[1]
                    logger.info(f"Base64 data length: {len(base64_data)}")

                    # Decode the base64 data
                    svg_bytes = base64.b64decode(base64_data)
                    logger.info(f"Decoded SVG data length: {len(svg_bytes)}")

                    # Save the SVG data to the temporary SVG directory
                    temp_svg_filename = f"{diagram_id}_{uuid.uuid4()}.svg"
                    temp_svg_path = os.path.join(TEMP_SVG_DIR, temp_svg_filename)
                    with open(temp_svg_path, 'wb') as f:
                        f.write(svg_bytes)
                    logger.info(f"Saved SVG data to {temp_svg_path} (size: {len(svg_bytes)} bytes)")

                    # Convert SVG to PNG using cairosvg with white background
                    png_data = cairosvg.svg2png(bytestring=svg_bytes, scale=2.0, background_color="white")
                    with open(temp_png_path, 'wb') as f:
                        f.write(png_data)

                    if not os.path.exists(temp_png_path) or os.path.getsize(temp_png_path) == 0:
                        logger.error(f"PNG for {diagram_id} (from SVG) was not created or is empty at {temp_png_path}.")
                        raise ValueError("PNG creation failed or empty")

                    logger.info(f"Generated PNG for {diagram_id} from SVG: {temp_png_path} (size: {os.path.getsize(temp_png_path)} bytes)")

                    # We no longer use web URLs, always use local file path
                    image_source_for_pdfkit = pathlib.Path(temp_png_path).as_uri()

                    alt_text = f"Mermaid Diagram: {text_diagrams.get(diagram_id, diagram_id)[:50].replace('<', '&lt;').replace('>', '&gt;')}..."

                    # --- MODIFIED REPLACEMENT using BeautifulSoup ---
                    target_div_svg = soup.find('div', id=diagram_id)
                    if target_div_svg:
                        # Create the new img tag as a BeautifulSoup object
                        new_img_tag_soup_svg = soup.new_tag('img', src=image_source_for_pdfkit, alt=alt_text)
                        new_img_tag_soup_svg['style'] = "max-width:100%; margin:20px auto; display:block; border:1px solid #eee;"

                        # Replace the target div with our new img tag
                        target_div_svg.replace_with(new_img_tag_soup_svg)
                        logger.info(f"Replaced div with id='{diagram_id}' using its SVG-generated PNG via BeautifulSoup.")
                        processed_svg_ids.add(diagram_id)
                    else:
                        logger.warning(f"SVG_WARN: Could not find div with id='{diagram_id}' for SVG replacement using BeautifulSoup.")
                        # Try to find a code block with matching content from textDiagrams
                        original_code_for_svg = text_diagrams.get(diagram_id)
                        if original_code_for_svg and os.path.exists(temp_png_path):
                            # Try to find matching code blocks
                            code_blocks = soup.find_all('pre')
                            for code_block in code_blocks:
                                code_el = code_block.find('code')
                                if code_el and original_code_for_svg.strip() in code_el.text.strip():
                                    # Create new img tag
                                    new_img_tag_soup_svg_fallback = soup.new_tag('img', src=image_source_for_pdfkit, alt=alt_text)
                                    new_img_tag_soup_svg_fallback['style'] = "max-width:100%; margin:20px auto; display:block; border:1px solid #eee;"

                                    # Replace the code block with the img tag
                                    code_block.replace_with(new_img_tag_soup_svg_fallback)
                                    logger.info(f"SVG_WARN_RECOVERY: Replaced a code block matching content for diagram {diagram_id} with its SVG-PNG img tag via BeautifulSoup.")
                                    processed_svg_ids.add(diagram_id)
                                    break
                            else:
                                logger.warning(f"SVG_WARN_FAIL: Failed to find a matching code block for SVG diagram {diagram_id} with BeautifulSoup.")

                except Exception as e_svg_proc:
                    logger.error(f"Error processing provided SVG for diagram_id '{diagram_id}': {str(e_svg_proc)}")
                    # Create a placeholder image indicating the error for this specific diagram
                    try:
                        img_err = Image.new('RGB', (500, 150), color=(255, 230, 230)) # Light red
                        draw_err = ImageDraw.Draw(img_err)
                        # Consider ImageFont.truetype for specific fonts/sizes if default is too small
                        title_font = ImageFont.load_default()
                        text_font = ImageFont.load_default()
                        draw_err.text((10, 10), f"Error rendering diagram:", fill=(128, 0, 0), font=title_font)
                        draw_err.text((10, 30), f"ID: {diagram_id}", fill=(100, 0, 0), font=text_font)
                        draw_err.text((10, 50), f"Details: {str(e_svg_proc)[:80]}", fill=(100, 0, 0), font=text_font)
                        if text_diagrams.get(diagram_id):
                            draw_err.text((10,70), f"Code: {text_diagrams[diagram_id][:60]}...", fill=(100,0,0), font=text_font)

                        with open(temp_png_path, 'wb') as f_err: # Save error image with the same name pattern
                            img_err.save(f_err, 'PNG')
                        logger.info(f"Created error placeholder image for SVG diagram {diagram_id} at {temp_png_path}")

                        # We no longer use web URLs, always use local file path
                        image_source_for_pdfkit = pathlib.Path(temp_png_path).as_uri()

                        # Find and replace the target div with the error image
                        target_div_svg_err = soup.find('div', id=diagram_id)
                        if target_div_svg_err:
                            new_err_img_tag_svg = soup.new_tag('img', src=image_source_for_pdfkit, alt=f"Error rendering SVG diagram {diagram_id}")
                            new_err_img_tag_svg['style'] = "max-width:100%; margin:20px auto; display:block; border: 2px solid red;"
                            target_div_svg_err.replace_with(new_err_img_tag_svg)
                            logger.info(f"Replaced div with id='{diagram_id}' using an SVG error placeholder image via BeautifulSoup.")
                            processed_svg_ids.add(diagram_id)
                        else:
                            logger.error(f"Could not find div with id='{diagram_id}' to replace with SVG error placeholder image.")
                    except Exception as e_placeholder_img:
                        logger.error(f"Failed to create SVG error placeholder image for {diagram_id}: {str(e_placeholder_img)}")
                        # Try to insert a simple error paragraph if div is found
                        target_div_svg_err2 = soup.find('div', id=diagram_id)
                        if target_div_svg_err2:
                            error_p_svg = soup.new_tag('p')
                            error_p_svg['style'] = "color:red; border:1px solid red; padding:10px;"
                            error_p_svg.string = f"[Error processing SVG diagram: {diagram_id} - {str(e_svg_proc)[:50]}]"
                            target_div_svg_err2.replace_with(error_p_svg)
                            logger.info(f"Replaced div with id='{diagram_id}' with a simple SVG error message via BeautifulSoup.")
                            processed_svg_ids.add(diagram_id)

            # After processing all SVG diagrams, update processed_html
            processed_html = str(soup)
            logger.info("Completed BeautifulSoup processing of all SVG diagrams")

        # Fallback for any mermaid code blocks/divs *not* covered by processed_svg_ids
        # This typically means the frontend didn't send an SVG for them, or all replacement attempts above failed.

        # Fallback for remaining <div class="mermaid"> (that might not have had a corresponding SVG)
        logger.info("Fallback: Looking for any remaining <div class='mermaid'> not already handled.")
        temp_processed_html_list = []
        last_end = 0
        # More specific regex for class="mermaid" and also capturing ID if present
        div_fallback_pattern = r'(<div[^>]*class\s*=\s*["\']?[^"\']*mermaid[^"\']*["\']?[^>]*>(?:.*?)</div>)'

        for match_obj in re.finditer(div_fallback_pattern, processed_html, flags=re.DOTALL | re.IGNORECASE):
            start, end = match_obj.span()
            div_html_segment = match_obj.group(1)

            # Check if this div has an ID that was already processed
            id_in_div_match = re.search(r'\bid\s*=\s*["\']?([^"\s\'<>]+)["\']?', div_html_segment, re.IGNORECASE)
            current_div_id = None
            if id_in_div_match:
                current_div_id = id_in_div_match.group(1)
                if current_div_id in processed_svg_ids:
                    # Skip this div as it was already processed by svgDiagrams
                    temp_processed_html_list.append(processed_html[last_end:end])
                    last_end = end
                    logger.info(f"Fallback Div: Skipping div with id='{current_div_id}' as it's in processed_svg_ids.")
                    continue

            # This div was not handled by a provided SVG. Generate text-based placeholder.
            logger.warning(f"Fallback Div: Processing <div class='mermaid'> at {start}-{end} (ID: {current_div_id}) not in processed_svg_ids. Generating text placeholder.")

            # Try to extract diagram code from the div
            soup_div = BeautifulSoup(div_html_segment, 'html.parser')
            diagram_text_content = soup_div.get_text(separator='\n', strip=True) or "No text in div"

            # Also check if we have this in textDiagrams
            if current_div_id and current_div_id in text_diagrams:
                diagram_text_content = text_diagrams[current_div_id]

            # Generate a unique ID for this fallback image
            fallback_uuid = str(uuid.uuid4())[:8]
            placeholder_img_name = f"fallback_div_{fallback_uuid}.png"
            placeholder_path = os.path.join(TEMP_PNG_DIR, placeholder_img_name)

            try:
                img = Image.new('RGB', (800, 300), color=(240, 240, 240))
                draw = ImageDraw.Draw(img)
                draw.text((10, 10), "Mermaid Diagram (Fallback Render)", fill=(50, 50, 50))
                draw.text((10, 30), f"ID: {current_div_id or 'N/A'}", fill=(50, 50, 50))
                y_pos = 50
                for i, line in enumerate(diagram_text_content.split('\n')[:15]):  # Show more lines
                    draw.text((10, y_pos), line[:80], fill=(50, 50, 50))
                    y_pos += 15
                with open(placeholder_path, 'wb') as f:
                    img.save(f, 'PNG')

                # We no longer use web URLs, always use local file path
                fallback_image_src = pathlib.Path(placeholder_path).as_uri()
                img_tag = f'<img src="{fallback_image_src}" alt="Fallback Mermaid Diagram" style="max-width:100%; margin:20px auto; display:block; border:1px dashed #ccc;">'
            except Exception as e_pil:
                logger.error(f"Fallback Div: Error creating image for {current_div_id}: {e_pil}")
                img_tag = f"<pre style='color:#333; background:#f5f5f5; padding:10px; border:1px dashed #ccc;'>[Mermaid Diagram Code]:\n{diagram_text_content[:500]}</pre>"

            temp_processed_html_list.append(processed_html[last_end:start])
            temp_processed_html_list.append(img_tag)
            last_end = end
            if current_div_id:
                processed_svg_ids.add(current_div_id)  # Mark as handled

        # Add the remaining content after the last match
        temp_processed_html_list.append(processed_html[last_end:])
        processed_html = "".join(temp_processed_html_list)

        # Process any remaining mermaid code blocks that weren't already handled
        logger.info("Fallback: Looking for any remaining mermaid code blocks not explicitly handled by ID.")
        # More specific pattern for <pre><code class="language-mermaid">...</code> or similar structures
        # Avoid overly broad patterns like raw 'graph TD'
        # This pattern tries to capture the code within a language-mermaid block
        code_block_pattern = r'(<pre[^>]*>\s*<code[^>]*class\s*=\s*["\']?[^"\']*language-mermaid[^"\']*["\']?[^>]*>([\s\S]*?)</code>\s*</pre>)'

        temp_processed_html_list_codeblocks = []
        last_end_codeblocks = 0

        for match_obj in re.finditer(code_block_pattern, processed_html, flags=re.DOTALL | re.IGNORECASE):
            start, end = match_obj.span()
            full_match_html = match_obj.group(1)  # The whole <pre><code>...</code></pre>
            diagram_content = match_obj.group(2).strip() # Just the code

            logger.info(f"Found potential unhandled mermaid code block. Content starts: {diagram_content[:50]}...")

            # Try to find if this diagram_content matches any ID in textDiagrams
            # And if that ID has *already* been processed (i.e., an <img> tag was made)
            is_already_processed_by_id = False
            matched_original_id = None
            for diag_id, original_code_from_textdiagrams in text_diagrams.items():
                # Simple check: if the extracted diagram_content is very similar to original_code
                # This might need a more sophisticated similarity check.
                if diagram_content == original_code_from_textdiagrams.strip():
                    matched_original_id = diag_id
                    if diag_id in processed_svg_ids:
                        is_already_processed_by_id = True
                        logger.info(f"Code block content matches diagram ID '{diag_id}' which is in processed_svg_ids. Skipping fallback.")
                        break
                    else:
                        logger.info(f"Code block content matches diagram ID '{diag_id}' which was NOT in processed_svg_ids. Will attempt SVG render if available.")
                        break # Found a match, even if not processed by ID yet

            temp_processed_html_list_codeblocks.append(processed_html[last_end_codeblocks:start]) # Content before this match

            if is_already_processed_by_id:
                # This code block corresponds to an image already inserted.
                # The original <pre><code> block should be removed or replaced by the image if it wasn't already.
                # Since the image replacement by ID targets <div id="...">, this <pre> might still be there.
                # For safety, if it was already processed, we should ensure this <pre> block is GONE.
                # However, the primary image replacement should have taken care of the visual aspect.
                # If the pre block is still there, it's a problem with the primary replacement not being thorough.
                # For now, let's assume if is_already_processed_by_id, we don't want to add anything new here.
                # We might actually want to ensure this 'full_match_html' is *removed* if its corresponding img is present.
                # This gets complex. Let's first focus on not *adding* duplicates.
                # If an image was already made, we effectively want to remove this <pre><code> block.
                # So, we append nothing here for this specific match.
                logger.info(f"Skipping rendering for code block of diagram {matched_original_id} as it was already processed by ID.")
                # Effectively, this removes the <pre><code> block if its content was for an already-rendered image.
            else:
                # This code block was NOT processed by ID (or didn't match any known ID).
                # Try to render it now.
                img_tag_for_code_block = None
                # Check if we have an SVG for it (if matched_original_id was found but not in processed_svg_ids)
                if matched_original_id and matched_original_id in svg_diagrams and svg_diagrams[matched_original_id].startswith('data:image/svg+xml;base64,'):
                    # Generate PNG from SVG
                    try:
                        base64_data = svg_diagrams[matched_original_id].split(',')[1]
                        svg_data_decoded = base64.b64decode(base64_data)
                        uuid_value = uuid.uuid4()

                        # Save SVG data
                        temp_svg_filename = f"{matched_original_id}_{uuid_value}.svg"
                        temp_svg_path = os.path.join(TEMP_SVG_DIR, temp_svg_filename)
                        with open(temp_svg_path, 'wb') as f:
                            f.write(svg_data_decoded)
                        logger.info(f"Saved SVG data for code block to {temp_svg_path} (size: {len(svg_data_decoded)} bytes)")

                        # Save PNG data
                        temp_png_filename = f"{matched_original_id}_{uuid_value}.png"
                        temp_png_path = os.path.join(TEMP_PNG_DIR, temp_png_filename)

                        png_data = cairosvg.svg2png(bytestring=svg_data_decoded, scale=2.0, background_color="white")
                        with open(temp_png_path, 'wb') as f:
                            f.write(png_data)

                        # We no longer use web URLs, always use local file path
                        img_src = pathlib.Path(temp_png_path).as_uri()

                        img_tag_for_code_block = f'<img src="{img_src}" alt="Mermaid Diagram" style="max-width:100%; margin:20px auto; display:block; border:1px solid #eee;">'
                        logger.info(f"Used SVG render for code block: {matched_original_id}")
                        processed_svg_ids.add(matched_original_id)
                    except Exception as e:
                        logger.error(f"Error converting SVG to PNG: {str(e)}")

                if not img_tag_for_code_block: # No SVG or SVG processing failed
                    logger.info(f"No specific SVG found for this code block, creating PIL fallback image or pre.")
                    # Create a fallback image if no matching SVG was found
                    fallback_uuid_code = str(uuid.uuid4())[:8]
                    placeholder_img_name_code = f"code_block_pil_{fallback_uuid_code}.png"
                    placeholder_path_code = os.path.join(TEMP_PNG_DIR, placeholder_img_name_code)

                    try:
                        # Create an image with the diagram code
                        img = Image.new('RGB', (800, 400), color=(245, 245, 245))
                        draw = ImageDraw.Draw(img)
                        draw.text((10, 10), "Mermaid Diagram (Fallback)", fill=(50, 50, 50))

                        # Add the diagram code content
                        y_pos = 40
                        for line_idx, line in enumerate(diagram_content.split('\n')[:20]):
                            draw.text((10, y_pos), line[:80], fill=(50, 50, 50))
                            y_pos += 15

                        with open(placeholder_path_code, 'wb') as f:
                            img.save(f, 'PNG')

                        # We no longer use web URLs, always use local file path
                        img_src_code = pathlib.Path(placeholder_path_code).as_uri()

                        img_tag_for_code_block = f'<img src="{img_src_code}" alt="Mermaid Diagram (Code Fallback)" style="max-width:100%; margin:20px auto; display:block; border:1px dashed #ccc;">'
                    except Exception as e_img:
                        logger.error(f"Error creating fallback image: {str(e_img)}")
                        # IMPORTANT: Avoid just dumping the diagram_content here if that's the source of the problem.
                        # Use a more generic placeholder instead
                        img_tag_for_code_block = f'<p style="color:red; border:1px solid red; padding:10px;">[Mermaid diagram code could not be rendered here. Content: {diagram_content[:80]}...]</p>'

                temp_processed_html_list_codeblocks.append(img_tag_for_code_block or "") # Append the new image/placeholder

            last_end_codeblocks = end

        temp_processed_html_list_codeblocks.append(processed_html[last_end_codeblocks:])
        processed_html = "".join(temp_processed_html_list_codeblocks)

        # Configure PDF options
        options = {
            'page-size': 'Letter',
            'margin-top': '0.75in',
            'margin-right': '0.75in',
            'margin-bottom': '0.75in',
            'margin-left': '0.75in',
            'encoding': 'UTF-8',
            # 'no-outline': None, # Removed - not supported in unpatched Qt
            'enable-local-file-access': True  # Still needed for local file access
            # 'load-error-handling': 'skip', # or 'ignore' - might hide issues but prevent PDF failure
            # 'load-media-error-handling': 'skip',
        }

        # The server has an unpatched version of wkhtmltopdf which doesn't support
        # the 'enable-remote-images' option. We're using file:/// URIs with enable-local-file-access instead

        # Add custom CSS for better formatting
        css = """
        body {
            font-family: Arial, sans-serif;
            font-size: 12pt;
            line-height: 1.6;
        }
        img {
            max-width: 100%;
            height: auto;
            margin: 20px auto;
            display: block;
        }
        h1, h2, h3, h4, h5, h6 {
            color: #333;
            margin-top: 20px;
            margin-bottom: 10px;
        }
        pre {
            background-color: #f5f5f5;
            padding: 10px;
            border-radius: 5px;
            overflow-x: auto;
        }
        code {
            font-family: 'Courier New', Courier, monospace;
            font-size: 11pt;
        }
        table {
            border-collapse: collapse;
            width: 100%;
            margin: 20px 0;
        }
        table, th, td {
            border: 1px solid #ddd;
        }
        th, td {
            padding: 8px;
            text-align: left;
        }
        th {
            background-color: #f2f2f2;
        }
        /* Special handling for pre containing mermaid code */
        pre.mermaid-source {
            display: none;
        }
        """

        # Comment out final HTML content logging
        # logger.info("====================================================")
        # logger.info("FINAL HTML CONTENT BEING SENT TO PDFKIT:")
        # logger.info(processed_html)
        # logger.info("====================================================")

        # Create an index.html file in the temp directory for PDF generation
        index_html_path = os.path.join(temp_dir_for_pdf, "index.html")
        with open(index_html_path, 'w', encoding='utf-8') as f:
            f.write(f"""
            <!DOCTYPE html>
            <html>
            <head>
                <meta charset="UTF-8">
                <title>Video Query Result</title>
                <style>{css}</style>
            </head>
            <body>
                {processed_html}
            </body>
            </html>
            """)

        # Log the final processed HTML for debugging
        logger.info(f"Final HTML length: {len(processed_html)}")
        logger.info("Final HTML contains image tags: " + str('<img src=' in processed_html))

        # Check if processed HTML still contains mermaid divs or code blocks
        contains_mermaid_div = 'class=mermaid' in processed_html
        contains_mermaid_code = 'graph TD' in processed_html
        logger.info(f"Final HTML still contains mermaid divs: {contains_mermaid_div}")
        logger.info(f"Final HTML still contains mermaid code: {contains_mermaid_code}")

        # Commented out listing of files as we no longer keep persistent files
        # logger.info("Files in temporary PNG directory:")
        # for file_name in os.listdir(TEMP_PNG_DIR):
        #     file_path = os.path.join(TEMP_PNG_DIR, file_name)
        #     file_size = os.path.getsize(file_path)
        #     logger.info(f"  - {file_name}: {file_size} bytes")
        #
        # logger.info("Files in temporary SVG directory:")
        # for file_name in os.listdir(TEMP_SVG_DIR):
        #     file_path = os.path.join(TEMP_SVG_DIR, file_name)
        #     file_size = os.path.getsize(file_path)
        #     logger.info(f"  - {file_name}: {file_size} bytes")

        # Generate PDF from the file to properly handle references to image files
        logger.info(f"Generating PDF to {pdf_path}")

        # Log the image sources being used in the HTML
        img_src_pattern = r'<img\s+[^>]*src\s*=\s*["\']([^"\']+)["\']'
        img_srcs = re.findall(img_src_pattern, processed_html)
        logger.info(f"Found {len(img_srcs)} image sources in the HTML")
        for i, src in enumerate(img_srcs):
            logger.info(f"Image {i+1} src: {src}")

        # Find wkhtmltopdf on the system
        try:
            import subprocess
            which_result = subprocess.run(['which', 'wkhtmltopdf'], capture_output=True, text=True)
            if which_result.returncode == 0:
                wkhtmltopdf_which_path = which_result.stdout.strip()
                logger.info(f"wkhtmltopdf found at: {wkhtmltopdf_which_path}")
            else:
                logger.warning(f"wkhtmltopdf not found in PATH: {which_result.stderr}")
                # Try another approach with `whereis`
                whereis_result = subprocess.run(['whereis', 'wkhtmltopdf'], capture_output=True, text=True)
                logger.info(f"whereis wkhtmltopdf result: {whereis_result.stdout}")
        except Exception as e:
            logger.warning(f"Error while trying to locate wkhtmltopdf: {str(e)}")

        try:
            # Configure pdfkit with the path to wkhtmltopdf
            wkhtmltopdf_path = '/usr/bin/wkhtmltopdf'  # Common location on Linux servers

            # If we found the path with 'which', use that
            if 'wkhtmltopdf_which_path' in locals() and os.path.exists(wkhtmltopdf_which_path):
                wkhtmltopdf_path = wkhtmltopdf_which_path
                logger.info(f"Using wkhtmltopdf path from 'which': {wkhtmltopdf_path}")

            # Check if wkhtmltopdf is available at the specified path
            if os.path.exists(wkhtmltopdf_path):
                logger.info(f"Using wkhtmltopdf at: {wkhtmltopdf_path}")
                pdfkit_config = Configuration(wkhtmltopdf=wkhtmltopdf_path)
                pdfkit.from_file(index_html_path, pdf_path, options=options, configuration=pdfkit_config)
            else:
                # Try alternate paths
                alternate_paths = [
                    '/usr/local/bin/wkhtmltopdf',
                    '/opt/bin/wkhtmltopdf',
                    '/snap/bin/wkhtmltopdf'
                ]

                found_path = None
                for path in alternate_paths:
                    if os.path.exists(path):
                        found_path = path
                        break

                if found_path:
                    logger.info(f"Using wkhtmltopdf at alternate path: {found_path}")
                    pdfkit_config = Configuration(wkhtmltopdf=found_path)
                    pdfkit.from_file(index_html_path, pdf_path, options=options, configuration=pdfkit_config)
                else:
                    # Try with default config, which may use PATH environment variable
                    logger.warning("wkhtmltopdf not found at expected paths, trying with default configuration")
                    pdfkit.from_file(index_html_path, pdf_path, options=options)

            logger.info(f"PDF generated successfully, file size: {os.path.getsize(pdf_path)} bytes")
        except Exception as pdf_error:
            logger.error(f"Error generating PDF: {str(pdf_error)}")
            import traceback
            logger.error(traceback.format_exc())

            # Try with direct HTML content as fallback
            logger.info("Trying fallback PDF generation directly from HTML string")
            try:
                # Strip out any remaining mermaid divs or code blocks that might be causing problems
                final_html = processed_html
                problem_patterns = [
                    r'<div[^>]*class=.?mermaid.?[^>]*>.*?</div>',
                    r'<pre><code>graph\s+TD.*?</code></pre>',
                    r'<pre><code class=.?language-mermaid.?>.*?</code></pre>',
                    r'```mermaid\s+[\s\S]*?```',
                    r'graph\s+TD[^;]*;'
                ]

                logger.info("Stripping any remaining problematic elements before final fallback")
                for pattern in problem_patterns:
                    before_len = len(final_html)
                    final_html = re.sub(pattern, '<p>[Diagram placeholder]</p>', final_html, flags=re.DOTALL)
                    after_len = len(final_html)
                    if before_len != after_len:
                        logger.info(f"Removed pattern, length before: {before_len}, after: {after_len}")

                fallback_options = {
                    'page-size': 'Letter',
                    'margin-top': '0.75in',
                    'margin-right': '0.75in',
                    'margin-bottom': '0.75in',
                    'margin-left': '0.75in',
                    'encoding': 'UTF-8',
                    'enable-local-file-access': True
                }

                # Try to locate wkhtmltopdf for fallback method too
                if os.path.exists(wkhtmltopdf_path):
                    pdfkit_config = Configuration(wkhtmltopdf=wkhtmltopdf_path)
                    pdfkit.from_string(f"""
                    <!DOCTYPE html>
                    <html>
                    <head>
                        <meta charset="UTF-8">
                        <title>Video Query Result</title>
                        <style>{css}</style>
                    </head>
                    <body>
                        {final_html}
                    </body>
                    </html>
                    """, pdf_path, options=fallback_options, configuration=pdfkit_config)
                else:
                    logger.warning("Using default configuration for fallback PDF generation")
                    pdfkit.from_string(f"""
                    <!DOCTYPE html>
                    <html>
                    <head>
                        <meta charset="UTF-8">
                        <title>Video Query Result</title>
                        <style>{css}</style>
                    </head>
                    <body>
                        {final_html}
                    </body>
                    </html>
                    """, pdf_path, options=fallback_options)

                logger.info("Fallback PDF generation succeeded")
            except Exception as fallback_error:
                logger.error(f"Fallback PDF generation also failed: {str(fallback_error)}")

        # Read the generated PDF file
        if os.path.exists(pdf_path):
            with open(pdf_path, 'rb') as file:
                pdf_data = file.read()

            # Encode the PDF as base64
            pdf_base64 = base64.b64encode(pdf_data).decode('utf-8')
            logger.info(f"Encoded PDF data length: {len(pdf_base64)}")
        else:
            logger.error("PDF file does not exist after generation")
            return jsonify({'success': False, 'message': 'PDF generation failed'}), 500

        # Clean up temporary PDF generation files and temp images
        try:
            # Clean up PDF temp directory
            for root, dirs, files in os.walk(temp_dir_for_pdf, topdown=False):
                for file in files:
                    os.remove(os.path.join(root, file))
                for dir_name in dirs: # dir is a reserved word
                    os.rmdir(os.path.join(root, dir_name))
            os.rmdir(temp_dir_for_pdf)
            logger.info(f"Cleaned up temporary PDF directory: {temp_dir_for_pdf}")

            # Clean up temporary PNG and SVG files
            for file in os.listdir(TEMP_PNG_DIR):
                os.remove(os.path.join(TEMP_PNG_DIR, file))
            for file in os.listdir(TEMP_SVG_DIR):
                os.remove(os.path.join(TEMP_SVG_DIR, file))
            logger.info("Cleaned up temporary PNG and SVG files")

        except Exception as cleanup_error:
            logger.warning(f"Could not remove all temporary files: {str(cleanup_error)}")

        # Generate PDF filename from video filename
        if video_file_name:
            # Remove video extension and add .pdf
            base_name = os.path.splitext(video_file_name)[0]
            pdf_filename = f"{base_name}.pdf"
            logger.info(f"Generated PDF filename from video name: {pdf_filename}")
        else:
            pdf_filename = 'video_query_result.pdf'
            logger.info("No video filename provided, using default PDF name")

        return jsonify({
            'success': True,
            'pdf': pdf_base64,
            'filename': pdf_filename
        })

    except Exception as e:
        import traceback
        error_trace = traceback.format_exc()
        logger.error(f"Error generating PDF: {str(e)}")
        logger.error(error_trace)
        return jsonify({
            'success': False,
            'message': f'An unexpected error occurred: {str(e)}'
        }), 500

# Handle CORS preflight requests for all API routes
@app.route('/api/<path:path>', methods=['OPTIONS'])
def handle_options(path):
    response = jsonify({})
    response.headers.add('Access-Control-Allow-Origin', 'https://ai-sandbox.oliver.solutions')
    response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization,X-Requested-With')
    response.headers.add('Access-Control-Allow-Methods', 'GET,POST,OPTIONS')
    response.headers.add('Access-Control-Max-Age', '86400')  # 24 hours
    response.headers.add('Access-Control-Allow-Credentials', 'true')
    return response

# No longer need to serve frontend from the backend
# Frontend will be hosted at https://ai-sandbox.oliver.solutions/video_query

if __name__ == '__main__':
    # For development only - use Hypercorn in production
    app.run(debug=True, port=5000)