#!/usr/bin/env python3 """ API server for Visual AI QC application. Provides API endpoints for visual quality control checks without web UI. """ import os import sys import json import base64 import collections import html import importlib import traceback import re import threading import time from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timedelta from pathlib import Path from flask import Flask, request, jsonify, Response, make_response, g, redirect from dotenv import load_dotenv # Determine environment and load appropriate config def load_environment_config(): """Load environment-specific configuration""" # Check for environment variable first environment = os.environ.get('ENVIRONMENT', 'development') # Determine config file path based on environment base_dir = os.path.dirname(os.path.abspath(__file__)) # Try new config structure first config_path = os.path.join(base_dir, 'config', f'{environment}.env') # Fall back to old config.env if new structure doesn't exist if not os.path.exists(config_path): old_config_path = os.path.join(base_dir, 'config.env') if os.path.exists(old_config_path): config_path = old_config_path environment = 'production' # Assume production for backward compatibility print(f"Using legacy config file: {config_path}") else: print(f"No configuration file found. Checked: {config_path} and {old_config_path}") return environment # Load the configuration load_dotenv(config_path) print(f"Environment: {environment}") print(f"Loaded configuration from: {config_path}") print(f"OPENAI_API_KEY set: {'OPENAI_API_KEY' in os.environ}") print(f"GOOGLE_API_KEY set: {'GOOGLE_API_KEY' in os.environ}") print(f"Port: {os.environ.get('PORT', 'not set')}") return environment # Load environment configuration current_environment = load_environment_config() # Add the parent directory to the Python path to ensure imports work correctly sys.path.append(os.path.dirname(os.path.abspath(__file__))) # Import QC utilities and model configuration from visual_qc_apps.utils import get_image_from_asset from llm_config import run_visual_qc, get_model_info from profile_config import QC_CHECKS, PROFILES, get_profile, get_check_llm_map from brand_guidelines_db import BrandGuidelinesDB from auth_middleware import AuthMiddleware from technical_check import inspect as technical_inspect, format_for_llm_prompt as technical_to_prompt import box_jwt_client from PIL import Image import io # Create Flask app app = Flask(__name__) # Configure app based on environment upload_folder = os.environ.get('UPLOAD_FOLDER', 'uploads') output_folder = os.environ.get('OUTPUT_FOLDER', 'output') debug_mode = os.environ.get('DEBUG_MODE', 'false').lower() == 'true' app.config['UPLOAD_FOLDER'] = upload_folder app.config['OUTPUT_FOLDER'] = output_folder app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB max file size app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'default-secret-change-this') app.debug = debug_mode print(f"Upload folder: {upload_folder}") print(f"Output folder: {output_folder}") print(f"Debug mode: {debug_mode}") # Ensure directories exist os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) os.makedirs(app.config['OUTPUT_FOLDER'], exist_ok=True) # Initialize authentication middleware auth = AuthMiddleware(app) # Initialize brand guidelines database brand_db = BrandGuidelinesDB() # Initialize media plans storage MEDIA_PLANS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'media_plans') os.makedirs(MEDIA_PLANS_DIR, exist_ok=True) MEDIA_PLANS_DB_FILE = os.path.join(MEDIA_PLANS_DIR, 'plans_db.json') def _load_media_plans_db(): if os.path.exists(MEDIA_PLANS_DB_FILE): try: with open(MEDIA_PLANS_DB_FILE, 'r') as f: return json.load(f) except (json.JSONDecodeError, FileNotFoundError): pass return {} def _save_media_plans_db(db): with open(MEDIA_PLANS_DB_FILE, 'w') as f: json.dump(db, f, indent=2) def _get_active_media_plan(client_id): """Get the parsed media plan data for a client, or None.""" db = _load_media_plans_db() plan_info = db.get(client_id) if not plan_info: return None json_path = plan_info.get('json_path') if json_path and os.path.exists(json_path): try: with open(json_path, 'r') as f: return json.load(f) except (json.JSONDecodeError, FileNotFoundError): pass return None # Global progress tracking progress_tracker = {} # Dictionary to store QC app instances and prompts qc_apps = {} # Define QC checks that require reference assets REFERENCE_ASSET_REQUIRED_CHECKS = {'brand_assets_visibility', 'visual_hierarchy', 'logo_visibility'} def extract_json_from_response(response_text): """Extract JSON objects from the LLM's response""" # First, try to find JSON blocks (```json ... ```) json_pattern = r'```json\s*(.*?)\s*```' json_matches = re.finditer(json_pattern, response_text, re.DOTALL) # Get all JSON blocks as a list json_objects = [] for match in json_matches: try: json_data = json.loads(match.group(1).strip()) json_objects.append(json_data) except Exception as e: print(f"Could not parse JSON block: {e}") # If we found multiple JSON blocks, merge them (later blocks override earlier blocks) if json_objects: merged_json = {} for json_obj in json_objects: if json_obj: # If not empty merged_json.update(json_obj) if merged_json: return merged_json # If we couldn't extract JSON blocks or they were empty, look for JSON directly try: # Try to find pure JSON in response (without code blocks) # Remove markdown code formatting first clean_response = re.sub(r'```.*?```', '', response_text, flags=re.DOTALL) # Look for text that looks like JSON (between { and }) json_pattern = r'\{.*\}' json_match = re.search(json_pattern, clean_response, re.DOTALL) if json_match: try: json_data = json.loads(json_match.group(0)) return json_data except json.JSONDecodeError: pass except Exception as e: print(f"Failed to extract direct JSON: {e}") # If we couldn't find valid JSON, return an empty dict return {} def detect_and_crop_main_element(image_path, file_type_hint=None): """Detect main element in POS files and return cropped region coordinates""" try: if not file_type_hint or 'pos' not in file_type_hint.lower(): return None # Create a prompt to detect the main marketing element crop_detection_prompt = """ This appears to be a POS (Point of Sale) material. Please identify the main marketing element that should be used for QC analysis. Look for the primary branded content area (usually the center panel or main marketing message) and ignore peripheral elements like: - Side panels with fine print - Edge decorations - Background patterns - Border elements Respond with JSON format: { "main_element_detected": true/false, "crop_coordinates": { "x": left_position_percentage, "y": top_position_percentage, "width": width_percentage, "height": height_percentage }, "description": "description of the main element found" } Coordinates should be percentages (0-100) of the total image dimensions. """ # Run AI analysis to detect main element result = run_visual_qc( prompt=crop_detection_prompt, asset_path=image_path, model_name="Gemini" ) # Extract crop coordinates crop_data = extract_json_from_response(result['response']) if crop_data.get('main_element_detected') and 'crop_coordinates' in crop_data: return crop_data['crop_coordinates'] return None except Exception as e: print(f"Error detecting main element: {e}") return None def apply_crop_to_analysis(image_path, crop_coordinates): """Apply cropping to image for analysis (if coordinates provided)""" if not crop_coordinates: return image_path try: from PIL import Image import os # Load original image with Image.open(image_path) as img: width, height = img.size # Convert percentage coordinates to pixels x = int((crop_coordinates['x'] / 100) * width) y = int((crop_coordinates['y'] / 100) * height) crop_width = int((crop_coordinates['width'] / 100) * width) crop_height = int((crop_coordinates['height'] / 100) * height) # Ensure coordinates are within image bounds x = max(0, min(x, width)) y = max(0, min(y, height)) crop_width = min(crop_width, width - x) crop_height = min(crop_height, height - y) # Crop the image cropped_img = img.crop((x, y, x + crop_width, y + crop_height)) # Save cropped version filename, ext = os.path.splitext(image_path) cropped_path = f"{filename}_cropped{ext}" cropped_img.save(cropped_path) return cropped_path except Exception as e: print(f"Error applying crop: {e}") return image_path return image_path def extract_score_from_result(result, profile_config=None, check_name=None): """Extract score from LLM result response with Unilever-specific logic""" score = None try: # Use our extraction function to get score from JSON blocks json_data = extract_json_from_response(result['response']) # Unilever Key Visual profile specific logic if (profile_config and ((hasattr(profile_config, 'name') and profile_config.name == 'Unilever Key Visual') or (hasattr(profile_config, 'get') and profile_config.get('name') == 'Unilever Key Visual')) and check_name in ['face_visibility', 'new_visibility', 'face_gaze_direction']): # Check for zero score conditions based on missing elements if check_name == 'face_visibility' and json_data.get('face_present') == False: print(f"Unilever profile: No face detected for {check_name}, setting score to 0") return 0 elif check_name == 'new_visibility' and json_data.get('new_present') == False: print(f"Unilever profile: No 'new' element detected for {check_name}, setting score to 0") return 0 elif check_name == 'face_gaze_direction' and json_data.get('face_present') == False: print(f"Unilever profile: No face detected for {check_name}, setting score to 0") return 0 # Standard scoring logic if 'score' in json_data: score = json_data.get('score') print(f"Extracted score from JSON block: {score}") # If we still don't have a score, look for any score in text if score is None: # Try to find a score pattern in the text score_pattern = r'["\']score["\']\s*:\s*(\d+)' score_match = re.search(score_pattern, result['response']) if score_match: score = int(score_match.group(1)) print(f"Extracted score from regex: {score}") else: # Look for descriptive scores in text descriptive_score_pattern = r'score(?:\s+is|\s*:\s*|\s+of\s+)(?:\s*)(\d+)(?:\s*out\s*of\s*10)?' descriptive_match = re.search(descriptive_score_pattern, result['response'].lower()) if descriptive_match: score = int(descriptive_match.group(1)) print(f"Extracted score from descriptive text: {score}") else: # Try to determine score from pass/fail status (legacy mode) result_text = result.get('response', '').upper() if "PASS" in result_text: score = 10 # Pass = 10/10 print("Detected PASS keyword, setting score to 10") elif "FAIL" in result_text: score = 3 # Fail = 3/10 print("Detected FAIL keyword, setting score to 3") else: score = 5 # Default middle score print(f"Could not extract score, using default of 5") except Exception as parse_error: print(f"Error parsing score from response: {parse_error}") score = 5 # Default to middle score return score if score is not None else 5 def determine_grade(overall_score): """Determine Pass/Fail based on overall score""" # Convert overall score to individual check average (1-10 scale) avg_individual_score = overall_score / 10 if avg_individual_score >= 6: return 'Pass' else: return 'Fail' def _run_dj_file_naming_check(check_name, file_path, profile_weights): """ Deterministic file-naming check for the Dow Jones / OLIVER convention. Bypasses the LLM dispatch entirely; returns a result dict shaped like an LLM check so it slots into the existing scoring + report pipeline. """ from file_naming_validator import validate_filename filename = os.path.basename(file_path) if file_path else '' validation = validate_filename(filename) weight = profile_weights.get(check_name, 0.1) score = validation['score'] weighted_score = score * weight return { 'check_name': check_name, 'status': 'success', 'response': validation['explanation'], 'json_data': validation, 'score': score, 'weight': weight, 'weighted_score': weighted_score, 'model_used': {'model': 'deterministic', 'provider': 'internal'}, 'token_usage': {'input_tokens': 0, 'output_tokens': 0}, 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'display_name': 'DJ File Naming', 'brand_guidelines_status': None, 'requires_brand_guidelines': False, } def process_single_check(check_name, qc_apps, profile_config, profile_weights, file_path, analysis_reference_asset, brand_db, progress_tracker, session_id, check_index, total_checks, model_version=None, media_plan_context=None, ocr_context=None): """Process a single QC check - designed to run in parallel""" try: # Deterministic file-naming check — short-circuit before any LLM dispatch. if check_name == 'dj_file_naming': return _run_dj_file_naming_check(check_name, file_path, profile_weights) # Check if this check requires a reference asset but none is provided if check_name in REFERENCE_ASSET_REQUIRED_CHECKS and not analysis_reference_asset: # Return automatic fail with score 0 fail_response = f"Reference asset is required for the '{check_name}' QC check but was not provided." return { 'check_name': check_name, 'status': 'success', 'score': 0, 'result': 'Fail', 'response': fail_response, 'json_data': {}, 'requires_brand_guidelines': False, 'brand_guidelines_status': None } check_prompt = qc_apps[check_name]['prompt'] llm_model = profile_config.get_check_llm(check_name) # Handle brand guidelines if needed requires_brand_guidelines = any(keyword in check_prompt.lower() for keyword in [ 'brand guideline', 'brand standard', 'brand requirement', 'brand specification', 'brand compliance', 'brand rule', 'brand policy' ]) brand_guidelines_status = None detected_brand = None # Since we skip triage, we won't have detected_brand automatically # Could be enhanced with direct brand detection if needed if requires_brand_guidelines and detected_brand: brand_guidelines = brand_db.get_brand_guidelines(detected_brand) if brand_guidelines: brand_guidelines_status = f"Brand guidelines found for {detected_brand} ({len(brand_guidelines)} files)." # Add guidelines to prompt (simplified version) check_prompt += f"\n\nBrand Analysis Context: Using brand guidelines for {detected_brand}." else: brand_guidelines_status = f"INFO: Brand detected as '{detected_brand}' but no brand guidelines found." # Add pre-analysis instructions if available final_prompt = check_prompt if profile_config.pre_analysis_instructions: final_prompt = profile_config.pre_analysis_instructions + "\n\n" + check_prompt # Add reference asset content if selected reference_image_path = None if analysis_reference_asset: reference_content = get_reference_asset_content(analysis_reference_asset) if reference_content: final_prompt = reference_content + "\n\n" + final_prompt print(f"Added reference asset {analysis_reference_asset} to {check_name} prompt") # Also get the actual reference image path for LLM reference_image_path = get_reference_asset_image_path(analysis_reference_asset) # Add media plan context if available if media_plan_context: final_prompt = final_prompt + "\n" + media_plan_context # Add OCR measurement context for checks that evaluate spatial layout try: from ocr_measurement import OCR_RELEVANT_CHECKS as _ocr_checks ocr_enabled = check_name in _ocr_checks except ImportError: ocr_enabled = False if ocr_context and ocr_enabled: final_prompt = final_prompt + "\n" + ocr_context tech_report = progress_tracker.get(session_id, {}).get('technical_report') if tech_report: try: final_prompt = technical_to_prompt(tech_report) + "\n\n" + final_prompt except Exception: pass # Pre-flight context is best-effort; never block the check on it. print(f"Running check {check_index + 1}/{total_checks}: {check_name}") result = run_visual_qc( prompt=final_prompt, asset_path=file_path, reference_path=reference_image_path, model_name=llm_model, model_version=model_version ) # Extract score and data json_data = extract_json_from_response(result['response']) score = extract_score_from_result(result, profile_config, check_name) weight = profile_weights.get(check_name, 0.1) weighted_score = score * weight if score is not None else 0 return { 'check_name': check_name, 'status': 'success', 'result': result, 'response': result['response'], 'brand_guidelines_status': brand_guidelines_status, 'requires_brand_guidelines': requires_brand_guidelines, 'json_data': json_data, 'score': score, 'weight': weight, 'weighted_score': weighted_score, 'model_used': result.get('model_info', {}), 'token_usage': result.get('token_usage', {}), 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'display_name': check_name.replace('_', ' ').title() } except Exception as e: print(f"Error in check {check_name}: {str(e)}") weight = profile_weights.get(check_name, 0.1) return { 'check_name': check_name, 'status': 'error', 'error': str(e), 'weight': weight, 'score': 0, 'weighted_score': 0 } def process_checks_in_batches(enabled_checks, qc_apps, profile_config, profile_weights, file_path, analysis_reference_asset, brand_db, progress_tracker, session_id, batch_size=15, media_plan_context=None, ocr_context=None): """Process QC checks in parallel batches""" check_results = {} total_checks = len(enabled_checks) completed_checks = 0 print(f"Processing {total_checks} checks in batches of {batch_size}") # Split checks into batches for batch_start in range(0, total_checks, batch_size): batch_end = min(batch_start + batch_size, total_checks) batch_checks = enabled_checks[batch_start:batch_end] batch_number = (batch_start // batch_size) + 1 total_batches = (total_checks + batch_size - 1) // batch_size print(f"Processing batch {batch_number}/{total_batches} ({len(batch_checks)} checks)") # Update progress for batch start progress_tracker[session_id].update({ 'current_batch': batch_number, 'total_batches': total_batches, 'current_check': f"Batch {batch_number}", 'current_check_display': f"Processing batch {batch_number}/{total_batches}", 'percentage': 10 + ((completed_checks / total_checks) * 80) }) # Process batch in parallel with ThreadPoolExecutor(max_workers=batch_size) as executor: # Submit all checks in the batch future_to_check = {} for i, check_name in enumerate(batch_checks): future = executor.submit( process_single_check, check_name, qc_apps, profile_config, profile_weights, file_path, analysis_reference_asset, brand_db, progress_tracker, session_id, batch_start + i, total_checks, None, media_plan_context, ocr_context ) future_to_check[future] = check_name # Collect results as they complete batch_results = {} for future in as_completed(future_to_check): check_name = future_to_check[future] try: result = future.result() batch_results[result['check_name']] = result completed_checks += 1 # Update progress for each completed check progress_tracker[session_id].update({ 'completed_checks': completed_checks, 'percentage': 10 + ((completed_checks / total_checks) * 80) }) print(f"Completed check: {check_name} ({completed_checks}/{total_checks})") except Exception as e: print(f"Error getting result for check {check_name}: {str(e)}") weight = profile_weights.get(check_name, 0.1) batch_results[check_name] = { 'check_name': check_name, 'status': 'error', 'error': str(e), 'weight': weight, 'score': 0, 'weighted_score': 0 } completed_checks += 1 # Add batch results to main results check_results.update(batch_results) print(f"Completed batch {batch_number}/{total_batches}") # Small delay between batches to avoid overwhelming the APIs if batch_number < total_batches: time.sleep(0.5) print(f"Completed all {total_checks} checks in {total_batches} batches") return check_results def process_single_check_with_triage(check_name, qc_apps, profile_config, profile_weights, file_path, reference_asset, brand_db, progress_tracker, session_id, check_index, total_checks, model_version=None): """Process a single QC check with triage logic - designed to run in parallel""" try: # Deterministic file-naming check — short-circuit before any LLM dispatch. if check_name == 'dj_file_naming': return _run_dj_file_naming_check(check_name, file_path, profile_weights) # Check if this check requires a reference asset but none is provided if check_name in REFERENCE_ASSET_REQUIRED_CHECKS and not reference_asset: # Return automatic fail with score 0 fail_response = f"Reference asset is required for the '{check_name}' QC check but was not provided." return { 'check_name': check_name, 'status': 'success', 'score': 0, 'result': 'Fail', 'response': fail_response, 'json_data': {}, 'requires_brand_guidelines': False, 'brand_guidelines_status': None } check_prompt = qc_apps[check_name]['prompt'] llm_model = profile_config.get_check_llm(check_name) # Check if this check requires brand guidelines requires_brand_guidelines = any(keyword in check_prompt.lower() for keyword in [ 'brand guideline', 'brand standard', 'brand requirement', 'brand specification', 'brand compliance', 'brand rule', 'brand policy' ]) brand_guidelines_status = None # Try to extract brand from triage results or detection detected_brand = None # Since we skip triage, we won't have detected_brand automatically # Could be enhanced with direct brand detection if needed if requires_brand_guidelines: if detected_brand: brand_guidelines = brand_db.get_brand_guidelines(detected_brand) if not brand_guidelines: brand_guidelines_status = f"INFO: Brand detected as '{detected_brand}' but no brand guidelines found. Performing generic brand analysis." check_prompt += f"\n\nBrand Analysis Context: The detected brand is '{detected_brand}'. While specific brand guidelines are not available, please analyze the visual content for general brand consistency, professional appearance, and adherence to common branding best practices for this brand if you're familiar with it." else: brand_guidelines_status = f"Brand guidelines found for {detected_brand} ({len(brand_guidelines)} files)." # Add brand guidelines content to the prompt guidelines_content = "\n\n=== BRAND GUIDELINES REFERENCE ===\n" guidelines_content += f"The following brand guidelines have been provided for {detected_brand}:\n\n" for guideline in brand_guidelines: guidelines_content += f"**Guideline File: {guideline.get('original_filename', 'Unknown')}**\n" if guideline.get('description'): guidelines_content += f"Description: {guideline['description']}\n" if guideline.get('tags'): guidelines_content += f"Tags: {', '.join(guideline['tags'])}\n" # Try to read file content if it's a text-based file guideline_file_path = guideline.get('file_path') if guideline_file_path and os.path.exists(guideline_file_path): try: file_ext = os.path.splitext(guideline_file_path)[1].lower() if file_ext in ['.txt', '.md', '.json']: with open(guideline_file_path, 'r', encoding='utf-8') as f: content = f.read() if len(content) > 2000: # Limit content length content = content[:2000] + "... [content truncated]" guidelines_content += f"Content:\n{content}\n\n" else: guidelines_content += f"[File type {file_ext} - content not directly readable, but file is available as reference]\n\n" except Exception as e: guidelines_content += f"[Error reading file content: {str(e)}]\n\n" else: guidelines_content += "[File path not found]\n\n" guidelines_content += "Please use these brand guidelines as reference when performing your analysis. Pay special attention to brand colors, fonts, logo usage, tone of voice, and any specific requirements mentioned in the guidelines.\n" guidelines_content += "=== END BRAND GUIDELINES REFERENCE ===\n" check_prompt += guidelines_content else: brand_guidelines_status = "INFO: Brand could not be determined. Performing generic analysis." check_prompt += "\n\nGeneric Analysis: Since the brand could not be determined from the image, please analyze the visual content for general quality, professional appearance, and adherence to common design best practices." # Add pre-analysis instructions if available final_prompt = check_prompt if profile_config.pre_analysis_instructions: final_prompt = profile_config.pre_analysis_instructions + "\n\n" + check_prompt # Add reference asset content if selected reference_image_path = None if reference_asset: reference_content = get_reference_asset_content(reference_asset) if reference_content: final_prompt = reference_content + "\n\n" + final_prompt print(f"Added reference asset {reference_asset} to {check_name} prompt") # Also get the actual reference image path for LLM reference_image_path = get_reference_asset_image_path(reference_asset) print(f"Running check {check_index + 1}/{total_checks}: {check_name}") result = run_visual_qc( prompt=final_prompt, asset_path=file_path, reference_path=reference_image_path, model_name=llm_model, model_version=model_version ) # Extract score and data json_data = extract_json_from_response(result['response']) score = extract_score_from_result(result, profile_config, check_name) weight = profile_weights.get(check_name, 0.1) weighted_score = score * weight if score is not None else 0 return { 'check_name': check_name, 'status': 'completed', 'response': result['response'], 'brand_guidelines_status': brand_guidelines_status, 'requires_brand_guidelines': requires_brand_guidelines, 'json_data': json_data, 'score': score, 'weight': weight, 'weighted_score': weighted_score, 'model_used': result.get('model_info', {}), 'token_usage': result.get('token_usage', {}), 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S') } except Exception as e: print(f"Error in check {check_name}: {str(e)}") weight = profile_weights.get(check_name, 0.1) return { 'check_name': check_name, 'status': 'error', 'error': str(e), 'weight': weight, 'score': 0, 'weighted_score': 0 } def process_checks_in_batches_with_triage(enabled_checks, qc_apps, profile_config, profile_weights, file_path, reference_asset, brand_db, progress_tracker, session_id, batch_size=15, base_percentage=10, percentage_range=80): """Process QC checks in parallel batches with triage logic""" check_results = {} total_checks = len(enabled_checks) completed_checks = 0 print(f"Processing {total_checks} checks in batches of {batch_size}") # Split checks into batches for batch_start in range(0, total_checks, batch_size): batch_end = min(batch_start + batch_size, total_checks) batch_checks = enabled_checks[batch_start:batch_end] batch_number = (batch_start // batch_size) + 1 total_batches = (total_checks + batch_size - 1) // batch_size print(f"Processing batch {batch_number}/{total_batches} ({len(batch_checks)} checks)") # Update progress for batch start progress_tracker[session_id].update({ 'current_batch': batch_number, 'total_batches': total_batches, 'current_check': f"Batch {batch_number}", 'current_check_display': f"Processing batch {batch_number}/{total_batches}", 'percentage': base_percentage + ((completed_checks / total_checks) * percentage_range) }) # Process batch in parallel with ThreadPoolExecutor(max_workers=batch_size) as executor: # Submit all checks in the batch future_to_check = {} for i, check_name in enumerate(batch_checks): future = executor.submit( process_single_check_with_triage, check_name, qc_apps, profile_config, profile_weights, file_path, reference_asset, brand_db, progress_tracker, session_id, batch_start + i, total_checks ) future_to_check[future] = check_name # Collect results as they complete batch_results = {} for future in as_completed(future_to_check): check_name = future_to_check[future] try: result = future.result() batch_results[result['check_name']] = result completed_checks += 1 # Update progress for each completed check progress_tracker[session_id].update({ 'completed_checks': completed_checks, 'percentage': base_percentage + ((completed_checks / total_checks) * percentage_range) }) print(f"Completed check: {check_name} ({completed_checks}/{total_checks})") except Exception as e: print(f"Error getting result for check {check_name}: {str(e)}") weight = profile_weights.get(check_name, 0.1) batch_results[check_name] = { 'check_name': check_name, 'status': 'error', 'error': str(e), 'weight': weight, 'score': 0, 'weighted_score': 0 } completed_checks += 1 # Add batch results to main results check_results.update(batch_results) print(f"Completed batch {batch_number}/{total_batches}") # Small delay between batches to avoid overwhelming the APIs if batch_number < total_batches: time.sleep(0.5) print(f"Completed all {total_checks} checks in {total_batches} batches") return check_results def create_thumbnail_base64(file_path, max_size=(300, 300)): """Create a base64 encoded thumbnail of the input file""" try: # Get the image using the existing utility pil_image = get_image_from_asset(file_path) if not pil_image: return None # Create thumbnail thumbnail = pil_image.copy() thumbnail.thumbnail(max_size, Image.Resampling.LANCZOS) # Convert to base64 buffer = io.BytesIO() # Convert to RGB if necessary (for PNG with transparency) if thumbnail.mode in ('RGBA', 'LA'): background = Image.new('RGB', thumbnail.size, (255, 255, 255)) background.paste(thumbnail, mask=thumbnail.split()[-1] if thumbnail.mode == 'RGBA' else None) thumbnail = background thumbnail.save(buffer, format='JPEG', quality=85) img_str = base64.b64encode(buffer.getvalue()).decode() return f"data:image/jpeg;base64,{img_str}" except Exception as e: print(f"Error creating thumbnail: {e}") return None def get_client_from_profile(profile_id): """Determine client from profile ID""" if not profile_id: return 'general' profile_lower = profile_id.lower() if profile_lower.startswith('loreal'): return 'loreal' elif profile_lower.startswith('diageo'): return 'diageo' elif profile_lower.startswith('unilever'): return 'unilever' elif profile_lower.startswith('amazon'): return 'amazon' elif profile_lower.startswith('boots'): return 'boots' elif profile_lower.startswith('hp_'): return 'hp' elif profile_lower.startswith(('dow_jones', 'dj_', 'marketwatch', 'mw_', 'wsj')): return 'dow_jones' else: return 'general' def ensure_client_output_folder(client): """Ensure client-specific output folder exists""" client_folder = os.path.join(app.config['OUTPUT_FOLDER'], client) os.makedirs(client_folder, exist_ok=True) return client_folder def cleanup_old_files(max_age_days=14): """Delete files older than max_age_days from all client folders""" import time cutoff_time = time.time() - (max_age_days * 24 * 60 * 60) deleted_count = 0 try: output_folder = app.config['OUTPUT_FOLDER'] # Check root output folder for item in os.listdir(output_folder): item_path = os.path.join(output_folder, item) if os.path.isdir(item_path): # Check files in client subfolders for filename in os.listdir(item_path): file_path = os.path.join(item_path, filename) if os.path.isfile(file_path): if os.path.getctime(file_path) < cutoff_time: os.remove(file_path) deleted_count += 1 print(f"Deleted old file: {file_path}") elif os.path.isfile(item_path): # Check files in root output folder (for migration) if os.path.getctime(item_path) < cutoff_time: os.remove(item_path) deleted_count += 1 print(f"Deleted old file: {item_path}") if deleted_count > 0: print(f"Cleaned up {deleted_count} files older than {max_age_days} days") except Exception as e: print(f"Error during cleanup: {e}") return deleted_count def save_results_to_file(report_data, filename, output_mode='html', session_id=None, file_path=None): """Save analysis results to file and return file path""" print(f"DEBUG: save_results_to_file called with output_mode: '{output_mode}'") if not session_id: session_id = datetime.now().strftime('%Y%m%d_%H%M%S') # Determine client from profile profile_id = report_data.get('profile_id') or report_data.get('profiles', [None])[0] client = get_client_from_profile(profile_id) print(f"DEBUG: Detected client '{client}' from profile '{profile_id}'") # Ensure client-specific folder exists client_folder = ensure_client_output_folder(client) # Create filename base base_filename = f"{session_id}_{filename.replace(' ', '_')}" if output_mode == 'html': print(f"DEBUG: Creating HTML file because output_mode == 'html'") # Save HTML file in client-specific folder output_filename = f"{base_filename}_report.html" output_path = os.path.join(client_folder, output_filename) html_content = generate_html_content(report_data, filename, file_path) with open(output_path, 'w', encoding='utf-8') as f: f.write(html_content) return output_path else: print(f"DEBUG: Creating JSON file because output_mode != 'html' (it's '{output_mode}')") # Save JSON file in client-specific folder output_filename = f"{base_filename}_data.json" output_path = os.path.join(client_folder, output_filename) with open(output_path, 'w', encoding='utf-8') as f: json.dump(report_data, f, indent=2, ensure_ascii=False) return output_path def generate_html_content(report_data, filename, file_path=None): """Generate HTML content for report data with expandable sections""" # Define a function to get color based on score def get_score_result(score): if score >= 6: return "Pass", "#28a745" # Green for pass else: return "Fail", "#dc3545" # Red for fail # Get reference asset information from profile selection profile_selection = report_data.get('profile_selection', {}) reference_asset = profile_selection.get('reference_asset', None) reference_asset_used = profile_selection.get('reference_asset_used', False) # Build HTML for each check result with expandable sections check_results_html = "" for check_name, check_data in report_data['results'].items(): if check_data['status'] == 'success': display_name = check_data.get('display_name', check_name) score = check_data.get('score', 0) result_text, score_color = get_score_result(score) weight = check_data.get('weight', 0) weighted_score = check_data.get('weighted_score', 0) # Extract response text - try to get detailed info from JSON data first json_data = check_data.get('json_data', {}) response_text = "" # Structured findings (e.g. hp_copy_review) render as a table # instead of the default response-text block. If absent, falls # back to the existing text rendering below. findings = (json_data or {}).get('findings') if isinstance(json_data, dict) else None findings_html = _render_findings_table(findings) if findings is not None else None # Try to extract detailed analysis from JSON data if json_data: # Look for common detailed fields in the JSON analysis_details = json_data.get('analysis_details', '') explanation = json_data.get('explanation', '') issues_found = json_data.get('issues_found', []) recommendations = json_data.get('recommendations', '') elements_checked = json_data.get('elements_checked', {}) elements_found = json_data.get('elements_found', {}) marketing_text_found = json_data.get('marketing_text_found', []) recommended_adjustments = json_data.get('recommended_adjustments', '') # Build detailed response text from JSON fields if analysis_details: response_text += f"Analysis:
{analysis_details}

" elif explanation: response_text += f"Analysis:
{explanation}

" if elements_checked: response_text += "Elements Checked:
" for element, status in elements_checked.items(): response_text += f"• {element.replace('_', ' ').title()}: {status}
" response_text += "
" elif elements_found: response_text += "Elements Found:
" for element, found in elements_found.items(): status_icon = "Present" if found else "Missing" response_text += f"• {element.replace('_', ' ').title()}: {status_icon}
" response_text += "
" if marketing_text_found: response_text += "Marketing Text Found:
" for text in marketing_text_found: response_text += f"• {text}
" response_text += "
" if issues_found: response_text += "Issues Found:
" if isinstance(issues_found, list): for issue in issues_found: response_text += f"• {issue}
" else: response_text += f"{issues_found}
" response_text += "
" if recommendations: if isinstance(recommendations, list): response_text += "Recommendations:
" for rec in recommendations: response_text += f"• {rec}
" else: response_text += f"Recommendation:
{recommendations}
" if recommended_adjustments: if isinstance(recommended_adjustments, list): response_text += "
Suggested Adjustments:
" for adj in recommended_adjustments: response_text += f"• {adj}
" elif isinstance(recommended_adjustments, str) and recommended_adjustments.lower() not in ['none', 'n/a']: response_text += f"
Suggested Adjustments:
{recommended_adjustments}" # If still no response text from known fields, build summary from all JSON data if not response_text and json_data: skip_keys = {'score', 'weight', 'weighted_score', 'status'} summary_parts = [] for key, value in json_data.items(): if key in skip_keys: continue display_key = key.replace('_', ' ').title() if isinstance(value, bool): summary_parts.append(f"• {display_key}: {'Yes' if value else 'No'}") elif isinstance(value, dict): summary_parts.append(f"{display_key}:") for sub_key, sub_val in value.items(): sub_display = sub_key.replace('_', ' ').title() if isinstance(sub_val, bool): summary_parts.append(f"  • {sub_display}: {'Yes' if sub_val else 'No'}") else: summary_parts.append(f"  • {sub_display}: {sub_val}") elif isinstance(value, list): if value: summary_parts.append(f"{display_key}:") for item in value: summary_parts.append(f"  • {item}") elif isinstance(value, str) and value and value.lower() not in ['none', 'n/a', '']: summary_parts.append(f"• {display_key}: {value}") elif isinstance(value, (int, float)): summary_parts.append(f"• {display_key}: {value}") if summary_parts: response_text = "
".join(summary_parts) # If no detailed JSON data was found, fall back to original response if not response_text: response_text = check_data.get('response', '') # Remove JSON code blocks for cleaner reading response_text = re.sub(r'```json.*?```', '', response_text, flags=re.DOTALL) response_text = response_text.strip() # If still empty, provide a basic fallback if not response_text: score = check_data.get('score', 0) result_text, _ = get_score_result(score) if score == 0 and check_name in REFERENCE_ASSET_REQUIRED_CHECKS: response_text = f"Reference asset is required for the '{display_name}' QC check but was not provided." else: response_text = f"QC check result: {result_text} (Score: {score}/10)" # Create expandable section for each check check_results_html += f"""

{display_name}

{result_text}

Analysis Details:

{f'
{html.escape(json_data.get("summary", "") or "") if isinstance(json_data, dict) else ""}
{findings_html}' if findings_html is not None else f'
{response_text.replace(chr(10), "
")}
'}
""" # Get summary score result overall_score = report_data['summary']['overall_score'] overall_result, overall_color = get_score_result(overall_score/10) # Normalize to 0-10 scale # Determine the correct total score based on profile profile_id = report_data.get('profile_id', '') if profile_id == 'unilever_key_visual': score_total = 120 else: score_total = 100 technical_html = _render_technical_section_html(report_data.get('technical_report', {})) html_content = f""" Visual AI QC Results for {filename}

🤖 Visual AI QC Results

Analysis completed on: {report_data['timestamp']}

📎 Analyzed File

File thumbnail
{filename}
Original file processed for quality control analysis

📊 Analysis Summary

{overall_score}/{score_total}
Overall Score
{report_data['summary']['grade']}
Grade
{report_data['summary']['checks_count']}
Checks Performed
{report_data['profile_name']}
Profile Used
{'✅ Used' if reference_asset_used else '➖ None'}
Reference Asset
{technical_html}

🔍 Detailed Analysis Results

Click on any section below to expand and view detailed analysis

{check_results_html}
📄 Show/Hide Raw JSON Data
{json.dumps(report_data, indent=2)}
""" return html_content def generate_html_response(report_data, filename, save_to_file=False, session_id=None, file_path=None): """Generate HTML response for report data with optional file saving""" html_content = generate_html_content(report_data, filename, file_path) if save_to_file: # Save to file and return file path info output_path = save_results_to_file(report_data, filename, 'html', session_id, file_path) return Response(html_content, mimetype='text/html'), output_path else: return Response(html_content, mimetype='text/html') def _render_findings_table(findings): """Render an hp_copy_review-style findings array as an HTML table. Each finding dict is expected to carry: priority (high|medium|low), category, quote, issue, suggested_fix, source_reference. All string fields are HTML-escaped before interpolation. An empty/None findings list renders a friendly "clean copy" note instead of an empty table. """ if not findings: return '

No findings — copy is clean.

' rows = [] for f in findings: priority = (f.get('priority') or 'low').lower() pri_class = { 'high': 'priority-high', 'medium': 'priority-medium', 'low': 'priority-low', }.get(priority, 'priority-low') quote_raw = (f.get('quote') or '')[:200] rows.append( '' f'{html.escape(priority.upper())}' f'{html.escape(f.get("category", "") or "")}' f'{html.escape(quote_raw)}' f'{html.escape(f.get("issue", "") or "")}' f'{html.escape(f.get("suggested_fix", "") or "")}' f'{html.escape(f.get("source_reference", "") or "")}' '' ) return ( '' '' '' '' + ''.join(rows) + '
PriorityCategoryQuoteIssueSuggested fixSource
' ) def _render_technical_section_html(report): """Render the technical pre-flight report as an HTML block. Empty string if no report.""" if not report or report.get('kind') in (None, 'unknown'): return '' kind = report['kind'] rows = [] size_mb = report.get('file_size_mb') if size_mb is not None: rows.append(f'
File size: {size_mb} MB
') dims = report.get('dimensions') if dims: rows.append(f'
Dimensions: {dims["width"]} × {dims["height"]}
') fmt = report.get('format') if fmt: rows.append(f'
Format: {fmt}
') dpi = report.get('dpi') if dpi: rows.append(f'
DPI: {dpi[0]} × {dpi[1]}
') mode = report.get('mode') if mode: rows.append(f'
Color mode: {mode}
') pc = report.get('page_count') if pc is not None: rows.append(f'
Pages: {pc}
') pdf_ver = report.get('pdf_version') if pdf_ver: rows.append(f'
PDF version: {pdf_ver}
') duration = report.get('duration_seconds') if duration is not None: rows.append(f'
Duration: {duration}s
') codec = report.get('video_codec') if codec: rows.append(f'
Video codec: {codec}
') fps = report.get('fps') if fps: rows.append(f'
Frame rate: {fps} fps
') fonts = report.get('embedded_fonts') if fonts: suffix = ' …' if len(fonts) > 8 else '' rows.append(f'
Embedded fonts: {", ".join(fonts[:8])}{suffix}
') fm = report.get('filename_match') if fm: if fm['match']: badge = '✓ Matches filename' else: badge = '⚠ Filename mismatch' rows.append(f'
{badge} {fm["detail"]}
') errors = report.get('errors', []) if errors: rows.append(f'
Inspection notes: {"; ".join(errors)}
') if not rows: return '' return f'''

🔧 Technical Details (machine-inspected, no AI)

{''.join(rows)}
''' def generate_comprehensive_html_report(analysis_result, filename, file_path=None): """Generate comprehensive HTML report similar to the web UI format""" summary = analysis_result.get('summary', {}) qc_analysis = analysis_result.get('qc_analysis', {}) profile_selection = analysis_result.get('profile_selection', {}) check_results = qc_analysis.get('check_results', {}) timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') overall_score = summary.get('overall_score', 0) profile_name = profile_selection.get('suggested_profile', 'Unknown Profile') total_checks = qc_analysis.get('total_checks', 0) completed_checks = qc_analysis.get('completed_checks', 0) reference_asset = profile_selection.get('reference_asset', None) reference_asset_used = profile_selection.get('reference_asset_used', False) # Generate check results HTML check_results_html = '' for check_name, result in check_results.items(): if result.get('status') in ('success', 'completed'): score = result.get('score', 0) result_text = "Pass" if score >= 6 else "Fail" score_color = '#28a745' if score >= 6 else '#dc3545' response = result.get('response', 'No response available') display_name = check_name.replace('_', ' ').replace(chr(32).join([w.capitalize() for w in check_name.split('_')]), check_name.replace('_', ' ').title()) # Structured findings (e.g. hp_copy_review) render as a table # instead of the default response-text block. Fallback to the # existing response rendering when 'findings' is absent. json_data = result.get('json_data') if isinstance(result, dict) else None findings = json_data.get('findings') if isinstance(json_data, dict) else None findings_html = _render_findings_table(findings) if findings is not None else None # Remove JSON blocks for cleaner display and handle empty responses response = re.sub(r'```json.*?```', '', response, flags=re.DOTALL).strip() if not response: if score == 0 and check_name in REFERENCE_ASSET_REQUIRED_CHECKS: response = f"Reference asset is required for the '{display_name}' QC check but was not provided." else: response = f"QC check result: {result_text} (Score: {score}/10)" check_results_html += f'''

{display_name}

{result_text}

Analysis Details:

{f'
{html.escape(json_data.get("summary", "") or "") if isinstance(json_data, dict) else ""}
{findings_html}' if findings_html is not None else f'
{response.replace(chr(10), "
")}
'}
''' # Convert overall score to pass/fail based on average of individual check scores avg_individual_score = overall_score / 10 # Normalize to 1-10 scale grade_text = 'Pass' if avg_individual_score >= 6 else 'Fail' score_color = '#28a745' if avg_individual_score >= 6 else '#dc3545' technical_html = _render_technical_section_html(analysis_result.get('technical_report', {})) return f''' Visual AI QC Results for {filename}

🤖 Visual AI QC Results

Analysis completed on: {timestamp}

📎 Analyzed File

File thumbnail
{filename}
Original file processed for quality control analysis

📊 Analysis Summary

{overall_score}/100
Overall Score
{grade_text}
Grade
{completed_checks}
Checks Performed
{profile_name}
Profile Used
{'✅ Used' if reference_asset_used else '➖ None'}
Reference Asset
{technical_html}

🔍 Detailed Analysis Results

Click on any section below to expand and view detailed analysis

{check_results_html}
''' def get_reference_image_path(check_name): """Find a matching reference image - deprecated function, returns None""" # This function is deprecated since numbered criteria images are no longer used # Reference assets are now handled through the brand guidelines system return None def get_reference_asset_image_path(reference_asset_id): """ Get the actual file path for a reference asset image to send to LLM. Args: reference_asset_id: ID of the reference asset to retrieve Returns: File path to the reference image, or None if not found or not an image """ if not reference_asset_id or not reference_asset_id.strip(): return None try: # Get the reference asset file information from brand guidelines DB file_record = brand_db.db["files"].get(reference_asset_id) if not file_record: print(f"DEBUG: Reference asset not found: {reference_asset_id}") return None file_path = file_record.get("stored_path", "") if not file_path or not os.path.exists(file_path): print(f"DEBUG: Reference asset file not found at path: {file_path}") return None image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.webp', '.gif', '.tiff'] file_ext = os.path.splitext(file_path)[1].lower() if file_ext in image_extensions: print(f"DEBUG: Found reference image at: {file_path}") return file_path elif file_ext == '.pdf': # Check for pre-extracted cover image cover_path = brand_db.get_cover_image_path(reference_asset_id) if cover_path: print(f"DEBUG: Found PDF cover image at: {cover_path}") return cover_path # Fallback: extract cover on the fly try: from pdf_processor import extract_cover_image fallback_cover = os.path.splitext(file_path)[0] + "_cover.png" result = extract_cover_image(file_path, fallback_cover) if result: print(f"DEBUG: Extracted PDF cover on-the-fly: {result}") return result except Exception as e: print(f"DEBUG: Could not extract PDF cover: {e}") return None else: print(f"DEBUG: Reference asset is not an image: {file_path}") return None except Exception as e: print(f"DEBUG: Error getting reference asset image path: {e}") return None def get_reference_asset_content(reference_asset_id): """ Retrieve and format reference asset content for use in QC prompts. Args: reference_asset_id: ID of the reference asset to retrieve Returns: Formatted string with reference asset information, or empty string if not found """ if not reference_asset_id or not reference_asset_id.strip(): return "" try: # Get the reference asset file information from brand guidelines DB file_record = brand_db.db["files"].get(reference_asset_id) if not file_record: print(f"Reference asset not found: {reference_asset_id}") return "" brand_name = file_record["brand_name"] description = file_record.get("description", "") file_path = file_record.get("stored_path", "") original_filename = file_record.get("original_filename", "") # Build reference asset context for the prompt reference_content = "\n\n=== REFERENCE ASSET GUIDELINES ===\n" reference_content += f"Brand: {brand_name}\n" reference_content += f"Reference File: {original_filename}\n" if description: reference_content += f"Description: {description}\n" reference_content += "\nPlease use this reference asset as your guideline for analysis. " reference_content += "Compare the uploaded image against these brand standards and requirements. " reference_content += "Pay special attention to brand consistency, visual standards, and any specific " reference_content += "requirements shown in the reference material.\n" # Read reference content based on file type if file_path and os.path.exists(file_path): try: file_ext = os.path.splitext(file_path)[1].lower() if file_ext in ['.txt', '.md', '.json']: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() if len(content) > 3000: content = content[:3000] + "... [content truncated]" reference_content += f"\nReference Content:\n{content}\n" elif file_ext == '.pdf': # Check for pre-computed summary summary_path = brand_db.get_summary_path(reference_asset_id) if summary_path: with open(summary_path, 'r', encoding='utf-8') as f: summary = f.read() reference_content += f"\nBrand Guidelines Summary (extracted from {original_filename}):\n{summary}\n" else: # Fallback: extract text inline try: from pdf_processor import extract_text_from_pdf raw_text = extract_text_from_pdf(file_path) if raw_text and len(raw_text) > 100: if len(raw_text) > 5000: raw_text = raw_text[:5000] + "... [content truncated]" reference_content += f"\nExtracted PDF Content:\n{raw_text}\n" else: reference_content += f"\nReference PDF contains mostly images with limited extractable text.\n" except Exception: reference_content += f"\nReference file (.pdf) is available for visual comparison.\n" elif file_ext in ['.xlsx', '.xls']: # For localization matrices, the context is built separately via localization_processor if file_record.get('asset_type') == 'localization_matrix': loc_messages = file_record.get('localization_messages', []) loc_countries = file_record.get('localization_countries', []) reference_content += f"\nLocalization Matrix: Contains {', '.join(loc_messages)} " reference_content += f"for {len(loc_countries)} markets ({', '.join(loc_countries[:10])}).\n" reference_content += "Expected copy will be cross-referenced with the media plan during analysis.\n" elif file_record.get('summary_path'): # Source-messaging Excel (HP and similar) — inject the Gemini-generated Markdown summary try: with open(file_record['summary_path'], 'r', encoding='utf-8') as f: summary = f.read() reference_content += f"\nSource Messaging Summary (extracted from {original_filename}):\n{summary}\n" except Exception as e: print(f"Failed to read source-messaging summary at {file_record['summary_path']}: {e}") reference_content += f"\nReference file ({file_ext}) uploaded but summary unreadable.\n" else: reference_content += f"\nReference file ({file_ext}) uploaded as reference.\n" else: reference_content += f"\nReference file ({file_ext}) is available for visual comparison.\n" except Exception as e: print(f"Error reading reference asset content: {e}") reference_content += "\n[Reference file available but content could not be read]\n" reference_content += "=== END REFERENCE ASSET GUIDELINES ===\n" return reference_content except Exception as e: print(f"Error retrieving reference asset {reference_asset_id}: {e}") return "" def load_qc_apps(): """Load all QC apps and their prompts""" for check_name in QC_CHECKS: try: # Import the app module try: module_path = f"visual_qc_apps.{check_name}.app" module = importlib.import_module(module_path) # Get app class name app_class_name = None for key in dir(module): if key.endswith('App') and key != 'FlaskAppTemplate': app_class_name = key break except ImportError as e: print(f"Import error for {check_name}: {e}") continue if app_class_name: # Get the app class app_class = getattr(module, app_class_name) # Create an instance to get the prompt app_instance = app_class() # Find reference image for this check reference_image = get_reference_image_path(check_name) # Store the prompt, app class, and reference image qc_apps[check_name] = { "name": app_class_name, "prompt": app_instance.prompt, "instance": app_instance, "display_name": app_class_name.replace("App", "").replace("_", " "), "reference_image": reference_image } print(f"Loaded QC check: {check_name}" + (f" with reference image" if reference_image else "")) else: print(f"No app class found in {module_path}") except Exception as e: print(f"Error loading {check_name} app: {e}") traceback.print_exc() @app.route('/', methods=['GET']) def serve_web_ui(): """Serve the web UI""" try: # Use absolute path to web_ui.html (located in parent directory) base_dir = os.path.dirname(os.path.abspath(__file__)) web_ui_path = os.path.join(os.path.dirname(base_dir), 'web_ui.html') with open(web_ui_path, 'r') as f: html_content = f.read() return Response(html_content, mimetype='text/html') except FileNotFoundError: return jsonify({'error': 'Web UI not found'}), 404 @app.route('/health', methods=['GET']) def health_check(): """Simple health check endpoint""" return jsonify({ 'status': 'healthy', 'timestamp': datetime.now().isoformat() }) @app.route('/api/health/folders', methods=['GET']) def health_check_folders(): """Check if required folders exist and are writable""" try: upload_folder = app.config.get('UPLOAD_FOLDER', 'uploads') output_folder = app.config.get('OUTPUT_FOLDER', 'output') # Test if folders exist upload_exists = os.path.exists(upload_folder) output_exists = os.path.exists(output_folder) # Test if we can create directories (if they don't exist) upload_writable = False output_writable = False try: os.makedirs(upload_folder, exist_ok=True) upload_writable = True except Exception as e: upload_error = str(e) try: os.makedirs(output_folder, exist_ok=True) output_writable = True except Exception as e: output_error = str(e) # Test writing a file test_file_path = os.path.join(upload_folder, 'test_write.tmp') can_write_files = False try: with open(test_file_path, 'w') as f: f.write('test') os.remove(test_file_path) can_write_files = True except Exception as e: write_error = str(e) return jsonify({ 'status': 'success', 'folders': { 'upload_folder': upload_folder, 'upload_exists': upload_exists, 'upload_writable': upload_writable, 'output_folder': output_folder, 'output_exists': output_exists, 'output_writable': output_writable, 'can_write_files': can_write_files }, 'errors': { 'upload_error': locals().get('upload_error'), 'output_error': locals().get('output_error'), 'write_error': locals().get('write_error') } }) except Exception as e: import traceback return jsonify({ 'status': 'error', 'message': str(e), 'traceback': traceback.format_exc() }), 500 @app.route('/api/progress/', methods=['GET']) def get_progress(session_id): """Get current progress for a session""" if session_id not in progress_tracker: return jsonify({'status': 'error', 'message': 'Session not found'}), 404 return jsonify({ 'status': 'success', 'progress': progress_tracker[session_id] }) @app.route('/api/start_analysis', methods=['POST']) @auth.require_auth def start_analysis(): """Start analysis and return session ID immediately""" import threading try: # Check if file is in request if 'file' not in request.files: return jsonify({'status': 'error', 'message': 'No file part'}), 400 file = request.files['file'] # Check if file was selected if file.filename == '': return jsonify({'status': 'error', 'message': 'No selected file'}), 400 # Get parameters profile = request.form.get('profile', 'general').lower() brand = request.form.get('brand', 'general').lower() output_mode = request.form.get('mode', 'json').lower() model = request.form.get('model', 'profile') reference_asset = request.form.get('reference_asset', '') model_version = request.form.get('model_version', None) # Optional model override use_media_plan = request.form.get('use_media_plan', 'false').lower() == 'true' # Use profile if provided, otherwise fall back to brand if profile and profile != 'general': brand = profile.split('_')[0] if '_' in profile else profile print(f"Starting analysis with profile: {profile}, brand: {brand}, mode: {output_mode}") print(f"DEBUG: Raw mode parameter from request: '{request.form.get('mode')}'") print(f"DEBUG: Processed output_mode: '{output_mode}'") # Create unique session ID and save file session_id = datetime.now().strftime('%Y%m%d_%H%M%S') session_folder = os.path.join(app.config['UPLOAD_FOLDER'], session_id) os.makedirs(session_folder, exist_ok=True) file_path = os.path.join(session_folder, file.filename) file.save(file_path) # Machine-side technical pre-flight (PIL/PyMuPDF/ffprobe, no LLM). # Stored on progress_tracker so process_single_check can prepend it to # every LLM prompt, and surfaced in result_data for the UI. technical_report = technical_inspect(file_path) # Derive client from profile if not provided client = request.form.get('client_id', request.form.get('client', 'general')).lower() if not client or client == 'general': if profile.startswith('diageo_'): client = 'diageo' elif profile.startswith('unilever_'): client = 'unilever' elif profile.startswith('loreal_'): client = 'loreal' elif profile.startswith('amazon_'): client = 'amazon' elif profile.startswith('boots_'): client = 'boots' elif profile.startswith(('dow_jones_', 'dj_', 'marketwatch_', 'mw_', 'wsj_')): client = 'dow_jones' else: client = 'general' access_err = _require_client_access(client) if access_err: return access_err # Log analysis start try: from usage_tracker import log_analysis_start file_info = { 'filename': file.filename, 'size': os.path.getsize(file_path) if os.path.exists(file_path) else 0 } # Check if g.user is set, otherwise use a default user info user_info = getattr(g, 'user', {'user_id': 'unknown', 'email': 'unknown', 'name': 'unknown'}) print(f"DEBUG: user_info = {user_info}") log_analysis_start(session_id, client, profile, user_info, file_info) except Exception as log_error: # Log the error but don't fail the analysis print(f"WARNING: Failed to log analysis start: {log_error}") import traceback traceback.print_exc() # Initialize progress tracking with estimated total checks # We'll update this with the actual number once we determine the profile estimated_checks = 25 # Reasonable estimate for most profiles progress_tracker[session_id] = { 'total_checks': estimated_checks, 'completed_checks': 0, 'current_check': 'Initializing', 'current_check_display': 'Initializing Analysis', 'stage': 'setup', 'percentage': 0, 'session_id': session_id, 'status': 'started', 'technical_report': technical_report, } # Start analysis in background thread with explicit parameters def run_analysis(session_id, file_path, filename, brand, profile, output_mode, reference_asset, user_info): print(f"Background thread started for session: {session_id}") print(f"Parameters: brand={brand}, profile={profile}, mode={output_mode}") try: # Force reload QC apps to ensure they're available if not qc_apps: load_qc_apps() # Use the explicitly passed parameters analysis_brand = brand analysis_profile = profile analysis_mode = output_mode analysis_reference_asset = reference_asset print(f"DEBUG: analysis_mode = '{analysis_mode}'") # Write debug info to file for easier debugging with open('debug_mode.txt', 'a') as f: f.write(f"Session {session_id}: analysis_mode = '{analysis_mode}'\n") # Validate brand if not analysis_brand or analysis_brand.strip() == '': analysis_brand = 'general' # Validate output mode print(f"DEBUG: Before validation, analysis_mode = '{analysis_mode}'") if analysis_mode not in ['json', 'html']: print(f"DEBUG: analysis_mode '{analysis_mode}' not valid, defaulting to 'json'") analysis_mode = 'json' else: print(f"DEBUG: analysis_mode '{analysis_mode}' is valid") print(f"DEBUG: After validation, analysis_mode = '{analysis_mode}'") # Use the directly specified profile (no triage needed) suggested_profile = analysis_profile if analysis_profile and analysis_profile.strip() else 'general' print(f"Using specified profile: {suggested_profile}") # Update progress to show starting QC analysis progress_tracker[session_id].update({ 'stage': 'qc_analysis', 'current_check': 'initializing', 'current_check_display': 'Preparing Quality Analysis', 'completed_checks': 0, 'percentage': 5 }) # STEP 1: Run Quality Control Analysis print(f"Step 1: Running QC analysis with profile '{suggested_profile}'") # Get the profile configuration profile_config = get_profile(suggested_profile) if not profile_config: raise Exception(f'Profile {suggested_profile} not found') # Get enabled checks from profile enabled_checks = profile_config.get_enabled_checks() profile_weights = profile_config.get_check_weights() # Filter to only include checks that exist in qc_apps enabled_checks = [check for check in enabled_checks if check in qc_apps] if not enabled_checks: raise Exception(f'No enabled checks found for profile {suggested_profile}') # Update progress tracker with total checks progress_tracker[session_id].update({ 'total_checks': len(enabled_checks), 'stage': 'qc_analysis', 'percentage': 10 }) # Build media plan context if selected by user mp_context = None mp_match_data = None try: if use_media_plan: mp_data = _get_active_media_plan(client) else: mp_data = None if mp_data: from media_plan_processor import find_matching_asset, build_media_plan_context mp_match = find_matching_asset(file.filename, mp_data) if mp_match: mp_match_data = mp_match['match'] mp_context = build_media_plan_context(mp_match_data) except Exception as mp_err: print(f"Media plan context build error: {mp_err}") # Build localization context if reference asset is a localization matrix localization_context = "" try: if analysis_reference_asset and mp_match_data: file_record = brand_db.db.get("files", {}).get(analysis_reference_asset) if file_record and file_record.get('asset_type') == 'localization_matrix': loc_path = file_record.get('localization_path') if loc_path and os.path.exists(loc_path): with open(loc_path, 'r', encoding='utf-8') as f: loc_data = json.load(f) from localization_processor import detect_message_type, build_localization_context creative_name = mp_match_data.get('creative_name', '') msg_type = detect_message_type(creative_name) country_code = mp_match_data.get('country', '') if msg_type and country_code: localization_context = build_localization_context(loc_data, msg_type, country_code) if localization_context: print(f"Localization context built: {msg_type} / {country_code}") else: print(f"No localization data for {msg_type} / {country_code}") else: print(f"No message type detected from creative_name: '{creative_name}'") except Exception as loc_err: print(f"Localization context build error: {loc_err}") # Combine media plan and localization contexts if localization_context and mp_context: mp_context = mp_context + "\n" + localization_context elif localization_context: mp_context = localization_context # OCR layout measurement - provides pixel-level data to supplement LLM visual checks ocr_ctx = None try: from ocr_measurement import OCR_RELEVANT_CHECKS ocr_check_names = set(enabled_checks) & set(OCR_RELEVANT_CHECKS) if ocr_check_names: from ocr_measurement import run_ocr_measurement print(f"Running OCR layout measurement for {len(ocr_check_names)} checks...") ocr_result = run_ocr_measurement(file_path) if ocr_result and ocr_result.get('context'): ocr_ctx = ocr_result['context'] print(f"OCR measurements computed successfully") print(ocr_ctx) else: print("OCR measurement returned no results") except Exception as ocr_err: print(f"OCR measurement error (non-fatal, continuing without): {ocr_err}") # Run QC checks in parallel batches check_results = process_checks_in_batches( enabled_checks, qc_apps, profile_config, profile_weights, file_path, analysis_reference_asset, brand_db, progress_tracker, session_id, batch_size=15, media_plan_context=mp_context, ocr_context=ocr_ctx ) # STEP 4: Calculate Overall Score print(f"Step 4: Calculating overall score") total_weighted_score = 0 total_weight = 0 completed_checks = 0 failed_checks = 0 for check_name, result in check_results.items(): weight = result.get('weight', 0.1) total_weight += weight if result['status'] == 'success': completed_checks += 1 score = result.get('score') if score is not None: total_weighted_score += score * weight else: failed_checks += 1 # Calculate overall score # For profiles with total_weight = 10.0 (like General Check), use direct weighted score # For profiles with total_weight = 1.0, multiply by 10 to scale to 100 if total_weight >= 10.0: overall_score = min(total_weighted_score, 100) # Cap at 100 else: overall_score = min(total_weighted_score * 10, 100) # Scale to 100-point system, cap at 100 # STEP 5: Prepare Combined Response print(f"Step 5: Preparing response") # Create comprehensive response with all data result_data = { 'status': 'success', 'session_id': session_id, 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'filename': file.filename, 'profile': suggested_profile, 'profile_id': suggested_profile, 'profile_name': profile_config.name, 'model': 'Profile-based selection', 'results': check_results, 'triage_analysis': { 'status': 'skipped', 'results': {'primary_format': 'unknown', 'specific_type': 'user_selected', 'confidence_score': 10, 'recommended_qc_profile': suggested_profile}, 'raw_response': 'Triage skipped - using user-selected profile directly' }, 'profile_selection': { 'selected_profile': suggested_profile, 'profile_source': 'user_selected', 'brand': analysis_brand, 'format_suffix': suggested_profile, 'reference_asset': analysis_reference_asset if analysis_reference_asset else None, 'reference_asset_used': bool(analysis_reference_asset) }, 'qc_analysis': { 'profile_used': suggested_profile, 'total_checks': len(enabled_checks), 'completed_checks': completed_checks, 'failed_checks': failed_checks, 'check_results': check_results }, 'summary': { 'overall_score': round(overall_score, 1), 'profile': profile_config.name, 'checks_count': completed_checks, 'total_checks': len(enabled_checks), 'total_weighted_score': total_weighted_score, 'total_weight': total_weight, 'grade': determine_grade(overall_score) }, 'technical_report': progress_tracker[session_id].get('technical_report', {}), } # L'Oreal Static override: fail if ANY individual check fails (score < 6) if suggested_profile == 'loreal_static': for cn, cd in check_results.items(): if cd.get('status') == 'success': cs = cd.get('score', 0) if cs is not None and cs < 6: result_data['summary']['grade'] = 'Fail' break # Amazon Static override: fail if ANY individual check fails (score < 6) if suggested_profile == 'amazon_static': for cn, cd in check_results.items(): if cd.get('status') == 'success': cs = cd.get('score', 0) if cs is not None and cs < 6: result_data['summary']['grade'] = 'Fail' break # Boots Static override: fail if ANY individual check fails (score < 6) if suggested_profile == 'boots_static': for cn, cd in check_results.items(): if cd.get('status') == 'success': cs = cd.get('score', 0) if cs is not None and cs < 6: result_data['summary']['grade'] = 'Fail' break # Log analysis completion from usage_tracker import log_analysis_complete results_summary = { 'checks_completed': completed_checks, 'overall_score': overall_score, 'status': 'success', 'check_results': check_results } log_analysis_complete(session_id, client, suggested_profile, user_info, results_summary) print(f"Analysis completed successfully") # Save results to file try: print(f"DEBUG: Saving file with mode: '{analysis_mode}'") output_path = save_results_to_file(result_data, file.filename, analysis_mode, session_id, file_path) result_data['output_file'] = { 'path': output_path, 'filename': os.path.basename(output_path), 'url': f'{request.environ.get("SCRIPT_NAME", "")}/output/{os.path.basename(output_path)}' } print(f"Results saved to: {output_path}") except Exception as save_error: print(f"Error saving results to file: {save_error}") # Store result in progress tracker print(f"Analysis result status: {result_data.get('status')}") if session_id in progress_tracker: progress_tracker[session_id]['result'] = result_data progress_tracker[session_id]['status'] = 'completed' progress_tracker[session_id]['stage'] = 'complete' progress_tracker[session_id]['percentage'] = 100 print(f"Results stored in progress tracker for session: {session_id}") else: print(f"ERROR: Session {session_id} not found in progress tracker!") except Exception as e: print(f"ERROR in background thread for session {session_id}: {str(e)}") print(f"Exception type: {type(e)}") import traceback print(f"Traceback: {traceback.format_exc()}") if session_id in progress_tracker: progress_tracker[session_id]['status'] = 'error' progress_tracker[session_id]['stage'] = 'error' progress_tracker[session_id]['error'] = str(e) progress_tracker[session_id]['current_check'] = 'Error' progress_tracker[session_id]['current_check_display'] = 'Analysis Failed' # Start background thread with explicit parameters # Get user_info before starting thread (g is request-scoped and won't be available in thread) user_info = getattr(g, 'user', {'user_id': 'unknown', 'email': 'unknown', 'name': 'unknown'}) threading.Thread( target=run_analysis, args=(session_id, file_path, file.filename, brand, profile, output_mode, reference_asset, user_info), daemon=True ).start() # Return session ID immediately return jsonify({ 'status': 'success', 'session_id': session_id, 'message': 'Analysis started' }) except Exception as e: print(f"ERROR in start_analysis: {type(e).__name__}: {e}") import traceback traceback.print_exc() return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/document/start_analysis', methods=['POST']) @auth.require_auth def start_document_analysis(): """Start a document-mode analysis (multi-page PDF as QC target). Mirrors /api/start_analysis but routes through document_mode.dispatcher. Reuses the same progress_tracker dict, output folder convention, and /api/progress polling endpoint, so the frontend integration is minimal. """ import threading try: if 'file' not in request.files: return jsonify({'status': 'error', 'message': 'No file part'}), 400 file = request.files['file'] if file.filename == '': return jsonify({'status': 'error', 'message': 'No selected file'}), 400 if not file.filename.lower().endswith('.pdf'): return jsonify({'status': 'error', 'message': 'Document mode requires a PDF file'}), 400 profile_id = request.form.get('profile', '').lower() if not profile_id: return jsonify({'status': 'error', 'message': 'profile is required'}), 400 output_mode = request.form.get('mode', 'both').lower() if output_mode not in ('json', 'html', 'both'): output_mode = 'both' reference_asset = request.form.get('reference_asset', '') or None client = request.form.get('client_id', request.form.get('client', '')).lower() if not client: return jsonify({'status': 'error', 'message': 'client is required for document mode'}), 400 access_err = _require_client_access(client) if access_err: return access_err # Resolve and validate the profile is document-mode profile_config = get_profile(profile_id) if not profile_config: return jsonify({'status': 'error', 'message': f'Profile "{profile_id}" not found'}), 404 if getattr(profile_config, 'mode', 'asset') != 'document': return jsonify({ 'status': 'error', 'message': f'Profile "{profile_id}" is not a document-mode profile. Use /api/start_analysis instead.' }), 400 # Save the uploaded PDF session_id = datetime.now().strftime('%Y%m%d_%H%M%S') session_folder = os.path.join(app.config['UPLOAD_FOLDER'], session_id) os.makedirs(session_folder, exist_ok=True) file_path = os.path.join(session_folder, file.filename) file.save(file_path) # Machine-side technical pre-flight (PyMuPDF for PDFs, no LLM). technical_report = technical_inspect(file_path) # Pre-render directory for per-page PNGs lives alongside the PDF pages_dir = os.path.join(session_folder, 'pages') # Log start try: from usage_tracker import log_analysis_start file_info = { 'filename': file.filename, 'size': os.path.getsize(file_path) if os.path.exists(file_path) else 0, 'mode': 'document', } user_info = getattr(g, 'user', {'user_id': 'unknown', 'email': 'unknown', 'name': 'unknown'}) log_analysis_start(session_id, client, profile_id, user_info, file_info) except Exception as log_error: print(f"WARNING: Failed to log document analysis start: {log_error}") user_info = getattr(g, 'user', {'user_id': 'unknown', 'email': 'unknown', 'name': 'unknown'}) # Initialize progress progress_tracker[session_id] = { 'total_checks': 0, 'completed_checks': 0, 'current_check': 'Initializing', 'current_check_display': 'Initializing document analysis', 'stage': 'setup', 'percentage': 0, 'session_id': session_id, 'status': 'started', 'mode': 'document', 'technical_report': technical_report, } def run_document(session_id, file_path, filename, profile_id, client, output_mode, reference_asset, user_info): try: if not qc_apps: load_qc_apps() profile_config_local = get_profile(profile_id) # Document-mode checks may live in qc_apps (legacy LLM checks) # OR in document_mode.checks registry (new deterministic checks). # Accept either as a valid enabled check. from document_mode.checks import is_document_scope_check enabled_checks = [ c for c in profile_config_local.get_enabled_checks() if c in qc_apps or is_document_scope_check(c) ] profile_weights = profile_config_local.get_check_weights() if not enabled_checks: raise Exception(f'No enabled checks found for profile {profile_id}') from document_mode.dispatcher import run_document_analysis from document_mode.ingest import ingest_pdf from document_mode.result_writer import write_document_report doc_result = run_document_analysis( pdf_path=file_path, profile_config=profile_config_local, profile_id=profile_id, profile_weights=profile_weights, enabled_checks=enabled_checks, qc_apps=qc_apps, brand_db=brand_db, analysis_reference_asset=reference_asset, media_plan_context=None, ocr_context=None, progress_tracker=progress_tracker, session_id=session_id, process_checks_in_batches=process_checks_in_batches, ingest_pdf_fn=ingest_pdf, pages_output_dir=os.path.join(os.path.dirname(file_path), 'pages'), ) # Write JSON + HTML to the client-scoped output folder client_folder = ensure_client_output_folder(client) paths = write_document_report( doc_result, original_filename=filename, session_id=session_id, output_dir=client_folder, output_mode=output_mode, ) # Wrap doc_result in a frontend-compatible envelope summary = doc_result.get('document_summary', {}) result_data = { 'status': 'success', 'session_id': session_id, 'timestamp': doc_result.get('timestamp'), 'filename': filename, 'profile': profile_id, 'profile_id': profile_id, 'profile_name': profile_config_local.name, 'mode': 'document', 'document_result': doc_result, 'summary': { 'overall_score': summary.get('overall_score', 0), 'profile': profile_config_local.name, 'grade': summary.get('grade', ''), 'pages_processed': doc_result.get('pages_processed', 0), 'page_count': doc_result.get('page_count', 0), }, 'technical_report': progress_tracker[session_id].get('technical_report', {}), } if paths.get('html'): result_data['output_file'] = { 'path': paths['html'], 'filename': os.path.basename(paths['html']), 'url': f'/output/{client}/{os.path.basename(paths["html"])}', } if paths.get('json'): result_data['output_data_file'] = { 'path': paths['json'], 'filename': os.path.basename(paths['json']), 'url': f'/output/{client}/{os.path.basename(paths["json"])}', } # Log completion try: from usage_tracker import log_analysis_complete completed = sum( 1 for page in doc_result.get('pages', []) for r in (page.get('results') or {}).values() if r.get('status') == 'success' ) log_analysis_complete( session_id, client, profile_id, user_info, { 'checks_completed': completed, 'overall_score': summary.get('overall_score', 0), 'status': 'success', 'mode': 'document', 'pages_processed': doc_result.get('pages_processed', 0), }, ) except Exception as log_err: print(f"WARNING: failed to log doc-mode analysis completion: {log_err}") progress_tracker[session_id]['result'] = result_data progress_tracker[session_id]['status'] = 'completed' progress_tracker[session_id]['stage'] = 'complete' progress_tracker[session_id]['percentage'] = 100 except Exception as e: print(f"ERROR in document run for session {session_id}: {e}") import traceback traceback.print_exc() if session_id in progress_tracker: progress_tracker[session_id]['status'] = 'error' progress_tracker[session_id]['stage'] = 'error' progress_tracker[session_id]['error'] = str(e) progress_tracker[session_id]['current_check_display'] = 'Document analysis failed' threading.Thread( target=run_document, args=(session_id, file_path, file.filename, profile_id, client, output_mode, reference_asset, user_info), daemon=True, ).start() return jsonify({ 'status': 'success', 'session_id': session_id, 'mode': 'document', 'message': 'Document analysis started', }) except Exception as e: print(f"ERROR in start_document_analysis: {type(e).__name__}: {e}") import traceback traceback.print_exc() return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/document/start_diff', methods=['POST']) @auth.require_auth def start_document_diff_analysis(): """Start an old-vs-new PDF diff analysis (Phase 3). Accepts two PDFs as 'old_file' and 'new_file' multipart fields. Routes through document_mode.diff_engine.run_document_diff_analysis. """ import threading try: if 'old_file' not in request.files or 'new_file' not in request.files: return jsonify({'status': 'error', 'message': "Both 'old_file' and 'new_file' are required"}), 400 old_file = request.files['old_file'] new_file = request.files['new_file'] if not old_file.filename or not new_file.filename: return jsonify({'status': 'error', 'message': 'Both files must be selected'}), 400 if not old_file.filename.lower().endswith('.pdf') or not new_file.filename.lower().endswith('.pdf'): return jsonify({'status': 'error', 'message': 'Both old and new must be PDFs'}), 400 profile_id = request.form.get('profile', '').lower() if not profile_id: return jsonify({'status': 'error', 'message': 'profile is required'}), 400 output_mode = request.form.get('mode', 'both').lower() if output_mode not in ('json', 'html', 'both'): output_mode = 'both' client = request.form.get('client_id', request.form.get('client', '')).lower() if not client: return jsonify({'status': 'error', 'message': 'client is required for diff mode'}), 400 access_err = _require_client_access(client) if access_err: return access_err profile_config = get_profile(profile_id) if not profile_config: return jsonify({'status': 'error', 'message': f'Profile "{profile_id}" not found'}), 404 if getattr(profile_config, 'mode', 'asset') != 'document_diff': return jsonify({ 'status': 'error', 'message': f'Profile "{profile_id}" is not a document_diff profile.' }), 400 session_id = datetime.now().strftime('%Y%m%d_%H%M%S') session_folder = os.path.join(app.config['UPLOAD_FOLDER'], session_id) os.makedirs(session_folder, exist_ok=True) old_path = os.path.join(session_folder, f'old_{old_file.filename}') new_path = os.path.join(session_folder, f'new_{new_file.filename}') old_file.save(old_path) new_file.save(new_path) try: from usage_tracker import log_analysis_start user_info = getattr(g, 'user', {'user_id': 'unknown', 'email': 'unknown', 'name': 'unknown'}) log_analysis_start(session_id, client, profile_id, user_info, { 'filename': f'{old_file.filename} vs {new_file.filename}', 'mode': 'document_diff', }) except Exception as log_error: print(f"WARNING: failed to log diff start: {log_error}") user_info = getattr(g, 'user', {'user_id': 'unknown', 'email': 'unknown', 'name': 'unknown'}) progress_tracker[session_id] = { 'total_checks': 0, 'completed_checks': 0, 'current_check': 'Initializing diff', 'current_check_display': 'Initializing diff analysis', 'stage': 'setup', 'percentage': 0, 'session_id': session_id, 'status': 'started', 'mode': 'document_diff', } def run_diff(): try: profile_config_local = get_profile(profile_id) from document_mode.diff_engine import run_document_diff_analysis from document_mode.ingest import ingest_pdf from document_mode.diff_report_writer import write_diff_report from llm_config import call_gemini_vision doc_result = run_document_diff_analysis( old_pdf_path=old_path, new_pdf_path=new_path, old_filename=old_file.filename, new_filename=new_file.filename, profile_config=profile_config_local, profile_id=profile_id, progress_tracker=progress_tracker, session_id=session_id, ingest_pdf_fn=ingest_pdf, call_gemini_vision_fn=call_gemini_vision, pages_output_dir_old=os.path.join(session_folder, 'pages_old'), pages_output_dir_new=os.path.join(session_folder, 'pages_new'), ) client_folder = ensure_client_output_folder(client) paths = write_diff_report( doc_result, old_filename=old_file.filename, new_filename=new_file.filename, session_id=session_id, output_dir=client_folder, output_mode=output_mode, ) summary = doc_result.get('document_summary', {}) result_data = { 'status': 'success', 'session_id': session_id, 'timestamp': doc_result.get('timestamp'), 'filename': f'{old_file.filename} vs {new_file.filename}', 'profile': profile_id, 'profile_id': profile_id, 'profile_name': profile_config_local.name, 'mode': 'document_diff', 'document_result': doc_result, 'summary': { 'overall_score': summary.get('overall_score', 0), 'profile': profile_config_local.name, 'grade': summary.get('grade', ''), }, } if paths.get('html'): result_data['output_file'] = { 'path': paths['html'], 'filename': os.path.basename(paths['html']), 'url': f'/output/{client}/{os.path.basename(paths["html"])}', } if paths.get('json'): result_data['output_data_file'] = { 'path': paths['json'], 'filename': os.path.basename(paths['json']), 'url': f'/output/{client}/{os.path.basename(paths["json"])}', } try: from usage_tracker import log_analysis_complete log_analysis_complete( session_id, client, profile_id, user_info, { 'checks_completed': len(doc_result.get('pair_diffs', {})), 'overall_score': summary.get('overall_score', 0), 'status': 'success', 'mode': 'document_diff', 'token_usage': doc_result.get('token_usage', {}), }, ) except Exception as log_err: print(f"WARNING: failed to log diff completion: {log_err}") progress_tracker[session_id]['result'] = result_data progress_tracker[session_id]['status'] = 'completed' progress_tracker[session_id]['stage'] = 'complete' progress_tracker[session_id]['percentage'] = 100 except Exception as e: print(f"ERROR in diff run for session {session_id}: {e}") import traceback traceback.print_exc() if session_id in progress_tracker: progress_tracker[session_id]['status'] = 'error' progress_tracker[session_id]['stage'] = 'error' progress_tracker[session_id]['error'] = str(e) progress_tracker[session_id]['current_check_display'] = 'Diff analysis failed' threading.Thread(target=run_diff, daemon=True).start() return jsonify({ 'status': 'success', 'session_id': session_id, 'mode': 'document_diff', 'message': 'Diff analysis started', }) except Exception as e: print(f"ERROR in start_document_diff_analysis: {type(e).__name__}: {e}") import traceback traceback.print_exc() return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/output//', methods=['GET']) @auth.require_auth def serve_client_output_file(client, filename): """Serve saved output files from client-specific folders""" access_err = _require_client_access(client) if access_err: return access_err try: file_path = os.path.join(app.config['OUTPUT_FOLDER'], client, filename) if os.path.exists(file_path): if filename.endswith('.html'): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() return Response(content, mimetype='text/html') elif filename.endswith('.json'): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() return Response(content, mimetype='application/json') else: return Response(open(file_path, 'rb').read(), mimetype='application/octet-stream') else: return jsonify({'error': 'File not found'}), 404 except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/output/', methods=['GET']) def serve_output_file(filename): """Serve saved output files (legacy route for backward compatibility)""" try: # First try to find file in root output folder file_path = os.path.join(app.config['OUTPUT_FOLDER'], filename) if os.path.exists(file_path): if filename.endswith('.html'): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() return Response(content, mimetype='text/html') elif filename.endswith('.json'): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() return Response(content, mimetype='application/json') else: return Response(open(file_path, 'rb').read(), mimetype='application/octet-stream') else: return jsonify({'error': 'File not found'}), 404 except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/api/output_files', methods=['GET']) @auth.require_auth def list_output_files(): """List saved output files filtered by client, sorted by creation date (newest first)""" try: # Get client parameter (optional) client_filter = request.args.get('client', None) print(f"DEBUG: list_output_files called with client_filter='{client_filter}'") if client_filter: access_err = _require_client_access(client_filter) if access_err: return access_err # Run cleanup of files older than 14 days cleanup_old_files(max_age_days=14) files = [] output_folder = app.config['OUTPUT_FOLDER'] if os.path.exists(output_folder): # If client filter is provided, only check that client's folder if client_filter: client_folder = os.path.join(output_folder, client_filter) if os.path.exists(client_folder): for filename in os.listdir(client_folder): if filename.endswith(('.html', '.json')): file_path = os.path.join(client_folder, filename) if os.path.isfile(file_path): file_stats = os.stat(file_path) files.append({ 'filename': filename, 'size': file_stats.st_size, 'created': datetime.fromtimestamp(file_stats.st_ctime).strftime('%Y-%m-%d %H:%M:%S'), 'created_timestamp': file_stats.st_ctime, 'url': f'{request.environ.get("SCRIPT_NAME", "")}/output/{client_filter}/{filename}', 'client': client_filter }) else: # No client filter - check all client folders for item in os.listdir(output_folder): item_path = os.path.join(output_folder, item) if os.path.isdir(item_path): # This is a client folder client_name = item for filename in os.listdir(item_path): if filename.endswith(('.html', '.json')): file_path = os.path.join(item_path, filename) if os.path.isfile(file_path): file_stats = os.stat(file_path) files.append({ 'filename': filename, 'size': file_stats.st_size, 'created': datetime.fromtimestamp(file_stats.st_ctime).strftime('%Y-%m-%d %H:%M:%S'), 'created_timestamp': file_stats.st_ctime, 'url': f'{request.environ.get("SCRIPT_NAME", "")}/output/{client_name}/{filename}', 'client': client_name }) # Sort files by creation time (newest first) files.sort(key=lambda x: x['created_timestamp'], reverse=True) # Remove the timestamp field from response (not needed by frontend) for file in files: del file['created_timestamp'] print(f"DEBUG: Returning {len(files)} files for client_filter='{client_filter}'") if files: print(f"DEBUG: Sample file clients: {[f.get('client', 'unknown') for f in files[:3]]}") return jsonify({'files': files}) except Exception as e: print(f"Error listing output files: {e}") import traceback traceback.print_exc() return jsonify({'error': str(e)}), 500 @app.route('/api/delete_output_files', methods=['POST']) @auth.require_auth def delete_output_files(): """Delete selected output files""" try: data = request.get_json() filenames = data.get('filenames', []) client = data.get('client', '') if not filenames or not client: return jsonify({'error': 'Missing filenames or client'}), 400 output_folder = app.config['OUTPUT_FOLDER'] deleted = [] errors = [] for filename in filenames: # Sanitize filename to prevent path traversal safe_filename = os.path.basename(filename) file_path = os.path.join(output_folder, client, safe_filename) if os.path.isfile(file_path): try: os.remove(file_path) deleted.append(safe_filename) print(f"Deleted output file: {file_path}") except Exception as e: errors.append({'filename': safe_filename, 'error': str(e)}) else: errors.append({'filename': safe_filename, 'error': 'File not found'}) return jsonify({ 'deleted': deleted, 'errors': errors, 'deleted_count': len(deleted) }) except Exception as e: print(f"Error deleting output files: {e}") return jsonify({'error': str(e)}), 500 @app.route('/api/profiles', methods=['GET']) def get_available_profiles(): """Get all available profiles grouped by type, optionally filtered by client""" from profile_config import get_profile_summary, get_profile from client_config import get_profiles_with_visibility # Check if client parameter is provided client_id = request.args.get('client', None) # Force reload profiles to ensure they're up to date from profile_config import load_profiles load_profiles() profiles_summary = get_profile_summary() # Filter by client if specified (using visibility-aware function) if client_id: allowed_profiles = get_profiles_with_visibility(client_id) profiles_summary = { pid: pinfo for pid, pinfo in profiles_summary.items() if pid in allowed_profiles } # Group profiles by type triage_profiles = {} format_profiles = {} # Build detailed profile data including checks all_profiles_detailed = {} for profile_id, profile_info in profiles_summary.items(): # Get full profile data try: profile = get_profile(profile_id) # Load the raw profile JSON to get weight_scale if it exists profile_path = os.path.join('profiles', f'{profile_id}.json') weight_scale = 100 # Default to 100 if os.path.exists(profile_path): with open(profile_path, 'r') as f: profile_data = json.load(f) weight_scale = profile_data.get('weight_scale', 100) all_profiles_detailed[profile_id] = { 'name': profile.name, 'description': profile.description, 'enabled_checks': profile.get_enabled_checks(), 'total_checks': len(profile.checks), 'enabled_count': len(profile.get_enabled_checks()), 'weight_scale': weight_scale, 'mode': getattr(profile, 'mode', 'asset'), 'checks': {check_name: {'weight': config.weight, 'llm': config.llm, 'enabled': config.enabled} for check_name, config in profile.checks.items()} } except: # Fallback to summary if full profile can't be loaded all_profiles_detailed[profile_id] = profile_info if '_triage' in profile_id: brand = profile_id.replace('_triage', '') triage_profiles[brand] = { 'id': profile_id, 'name': profile_info['name'], 'description': profile_info['description'] } elif any(suffix in profile_id for suffix in ['_print', '_digital', '_ooh', '_packaging', '_event', '_indoor']): format_profiles[profile_id] = { 'id': profile_id, 'name': profile_info['name'], 'description': profile_info['description'], 'enabled_checks': profile_info['enabled_checks'], 'total_checks': profile_info['total_checks'] } response_data = { 'status': 'success', 'triage_profiles': triage_profiles, 'format_profiles': format_profiles, 'all_profiles': all_profiles_detailed } # Include client info if filtered if client_id: response_data['client'] = client_id return jsonify(response_data) @app.route('/api/clients', methods=['GET']) def get_clients_endpoint(): """Get clients visible to the current user. Admins see all; others see only their grants.""" from client_config import get_all_clients from user_access import get_user_clients, is_admin try: all_clients = get_all_clients() # Resolve the current user's email if authenticated. Unauthenticated # callers get an empty list so the UI can prompt for sign-in without # leaking the full client catalogue. user_email = '' try: auth_result = app.auth_middleware.is_authenticated() if auth_result.get('authenticated'): user_email = auth_result.get('user', {}).get('email', '') except Exception: pass if not user_email: return jsonify({'status': 'success', 'clients': {}, 'is_admin': False}) allowed_ids = get_user_clients(user_email) filtered = {cid: all_clients[cid] for cid in allowed_ids if cid in all_clients} return jsonify({ 'status': 'success', 'clients': filtered, 'is_admin': is_admin(user_email) }) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/all_clients', methods=['GET']) @auth.require_auth def list_all_clients_endpoint(): """ Full client catalogue (auth required). Used by the Request Access form so users can pick clients they don't currently have access to. """ try: from client_config import get_all_clients return jsonify({'status': 'success', 'clients': get_all_clients()}) except Exception as e: return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/clients//default_profile', methods=['GET']) @auth.require_auth def get_client_default_profile_endpoint(client_id): """Return the effective default profile for a client (override or static). Auth-required (any signed-in user) — read-only, no admin gate. """ try: from client_config import get_all_clients, get_default_profile, get_client_profiles clients = get_all_clients() if client_id not in clients: return jsonify({'status': 'error', 'message': f'unknown client: {client_id}'}), 404 access_err = _require_client_access(client_id) if access_err: return access_err return jsonify({ 'status': 'success', 'client_id': client_id, 'profiles': get_client_profiles(client_id), 'default_profile': get_default_profile(client_id), 'static_default': clients[client_id].get('default_profile'), }) except Exception as e: return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/clients//default_profile', methods=['PUT']) @auth.require_auth def set_client_default_profile_endpoint(client_id): """Admin-only: set the default profile for a client (persisted as a runtime override). Body: {"profile_id": ""}. The profile must already be in the client's `profiles` list — we don't allow defaulting to a profile the client can't see. Posts to backend/client_defaults.json so a bad write can never break server boot. """ actor_email, err = _require_admin() if err: return err try: body = request.get_json(silent=True) or {} profile_id = (body.get('profile_id') or '').strip() if not profile_id: return jsonify({'status': 'error', 'message': 'profile_id is required'}), 400 from client_config import set_default_profile ok, reason = set_default_profile(client_id, profile_id) if not ok: return jsonify({'status': 'error', 'message': reason}), 400 print(f'Admin {actor_email}: set default_profile for {client_id} → {profile_id}') return jsonify({ 'status': 'success', 'client_id': client_id, 'default_profile': profile_id, }) except Exception as e: return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/clients//default_profile', methods=['DELETE']) @auth.require_auth def clear_client_default_profile_endpoint(client_id): """Admin-only: clear the runtime override so the static default applies again.""" actor_email, err = _require_admin() if err: return err try: from client_config import clear_default_profile_override, get_default_profile clear_default_profile_override(client_id) print(f'Admin {actor_email}: cleared default_profile override for {client_id}') return jsonify({ 'status': 'success', 'client_id': client_id, 'default_profile': get_default_profile(client_id), }) except Exception as e: return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/access_request', methods=['POST']) @auth.require_auth def request_client_access(): """ Signed-in users request access to additional clients. Sends an email to all admins with the requester's identity (taken from the verified session, never the request body) and the clients they want. """ from html import escape as _html_escape try: payload = request.get_json(silent=True) or {} requested_clients = payload.get('clients') or [] reason = (payload.get('reason') or '').strip() if not isinstance(requested_clients, list) or not requested_clients: return jsonify({'status': 'error', 'message': 'Select at least one client'}), 400 # Always source identity from the verified session, not the body. user_email = getattr(g, 'user', {}).get('email', '') user_name = getattr(g, 'user', {}).get('name', '') or user_email if not user_email: return jsonify({'status': 'error', 'message': 'Authentication required'}), 401 from client_config import get_all_clients all_clients = get_all_clients() invalid = [c for c in requested_clients if c not in all_clients] if invalid: return jsonify({'status': 'error', 'message': f'Unknown client(s): {", ".join(invalid)}'}), 400 client_labels = [all_clients[c].get('display_name', c) for c in requested_clients] from user_access import list_access_entries access_data = list_access_entries() admin_recipients = [e['email'] for e in access_data.get('entries', []) if e.get('is_admin') and e.get('email')] if not admin_recipients: return jsonify({'status': 'error', 'message': 'No admin recipients configured'}), 500 subject = f'AI QC: Client access request from {user_name}' text_body_lines = [ f'{user_name} ({user_email}) has requested access to:', '', ] + [f' - {label}' for label in client_labels] if reason: text_body_lines += ['', 'Reason / context:', reason] text_body_lines += ['', 'Review and grant access via the Admin panel > User Access tab.'] text_body = '\n'.join(text_body_lines) html_clients = ''.join(f'
  • {_html_escape(label)}
  • ' for label in client_labels) html_reason = '' if reason: html_reason = ( '

    Reason / context:
    ' + _html_escape(reason).replace('\n', '
    ') + '

    ' ) html_body = ( f'

    {_html_escape(user_name)} ' f'({_html_escape(user_email)}) has requested access to:

    ' f'
      {html_clients}
    ' f'{html_reason}' '

    Review and grant access via the Admin panel > User Access tab.

    ' ) from email_service import send_email ok, err = send_email( to_addresses=admin_recipients, subject=subject, body=text_body, html_body=html_body, reply_to=user_email, ) try: from usage_tracker import log_access_request log_access_request({ 'user_email': user_email, 'user_name': user_name, 'requested_clients': requested_clients, 'reason': reason, 'recipients': admin_recipients, 'email_sent': ok, 'email_error': err, }) except Exception as log_err: print(f'[access_request] log failed: {log_err}') if not ok: return jsonify({'status': 'error', 'message': f'Could not send email: {err}'}), 502 return jsonify({ 'status': 'success', 'message': 'Request sent — an admin will review it shortly.', 'recipients_count': len(admin_recipients), }) except Exception as e: print(f'Error in /api/access_request: {e}') import traceback traceback.print_exc() return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/models', methods=['GET']) def get_models_endpoint(): """Get all available LLM models for selection""" from llm_config import get_available_models, get_model_info try: available_models = get_available_models() model_info = get_model_info() return jsonify({ 'status': 'success', 'models': available_models, 'model_info': model_info }) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/run_check', methods=['POST']) def run_check(): """API endpoint to run a single QC check""" data = request.json # Validate inputs if not data or 'check_name' not in data or 'file_path' not in data: return jsonify({'status': 'error', 'message': 'Missing required parameters'}), 400 check_name = data['check_name'] file_path = data['file_path'] model_name = data.get('model_name', 'Gemini') # Default to Gemini if not specified output_mode = data.get('mode', 'json').lower() # Get output mode, default to JSON # Validate check exists if check_name not in qc_apps: return jsonify({'status': 'error', 'message': f'QC check "{check_name}" not found'}), 404 # Validate file exists if not os.path.exists(file_path): return jsonify({'status': 'error', 'message': 'File not found'}), 404 try: # Get the prompt prompt = qc_apps[check_name]['prompt'] # Get reference image if available reference_path = qc_apps[check_name].get('reference_image') # Run the QC check with reference image if available result = run_visual_qc( prompt=prompt, asset_path=file_path, reference_path=reference_path, model_name=model_name ) # Extract score from result if possible score = None try: # Use our extraction function to get score from JSON blocks json_data = extract_json_from_response(result['response']) if 'score' in json_data: score = json_data.get('score') print(f"Extracted score from JSON block: {score}") # If we still don't have a score, look for any score in text if score is None: # Try to find a score pattern in the text score_pattern = r'["\']score["\']\s*:\s*(\d+)' score_match = re.search(score_pattern, result['response']) if score_match: score = int(score_match.group(1)) print(f"Extracted score from regex: {score}") else: # Look for visual evidence of actual scores in text like "score: 7", "score is 8", "score of 9 out of 10" descriptive_score_pattern = r'score(?:\s+is|\s*:\s*|\s+of\s+)(?:\s*)(\d+)(?:\s*out\s*of\s*10)?' descriptive_match = re.search(descriptive_score_pattern, result['response'].lower()) if descriptive_match: score = int(descriptive_match.group(1)) print(f"Extracted score from descriptive text: {score}") else: # Try to determine score from pass/fail status (legacy mode) result_text = result.get('response', '').upper() if "PASS" in result_text: score = 10 # Pass = 10/10 print("Detected PASS keyword, setting score to 10") elif "FAIL" in result_text: score = 3 # Fail = 3/10 print("Detected FAIL keyword, setting score to 3") else: print(f"Could not extract score, using default of 5") score = 5 # Default middle score except Exception as parse_error: print(f"Error parsing score from response: {parse_error}") score = 5 # Default to middle score # Add the score to the result result['score'] = score if score is not None else 5 # Default to middle score if extraction fails # Process result for JSON mode if output_mode == 'json': # For JSON mode, we update the response to be ONLY the JSON part json_data = extract_json_from_response(result['response']) if json_data: # If we found JSON in the response, replace the full text with just the extracted JSON result['original_response'] = result['response'] # Save original for debugging result['response'] = json.dumps(json_data, indent=2) # Pretty print the JSON # Add metadata but don't save to file output_json = { "timestamp": datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "check_name": check_name, "display_name": qc_apps[check_name]['display_name'], "model": model_name, "file_analyzed": os.path.basename(file_path), "result": result, "score": result['score'], "has_reference": reference_path is not None } return jsonify(output_json) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e), 'traceback': traceback.format_exc() }), 500 @app.route('/api/triage_file', methods=['POST']) def api_triage_file(): """API endpoint to perform file type triage detection""" try: # Force reload QC apps to ensure triage app is available if not qc_apps: load_qc_apps() # Check if file is in request if 'file' not in request.files: return jsonify({'status': 'error', 'message': 'No file part'}), 400 file = request.files['file'] # Check if file was selected if file.filename == '': return jsonify({'status': 'error', 'message': 'No selected file'}), 400 # Get the brand/profile base name for triage routing brand = request.form.get('brand', 'default') # Create unique session ID and save file session_id = datetime.now().strftime('%Y%m%d_%H%M%S') session_folder = os.path.join(app.config['UPLOAD_FOLDER'], session_id) os.makedirs(session_folder, exist_ok=True) file_path = os.path.join(session_folder, file.filename) file.save(file_path) # Run file type triage detection if 'file_type_triage' not in qc_apps: return jsonify({'status': 'error', 'message': 'File type triage app not available'}), 500 # Get the triage prompt prompt = qc_apps['file_type_triage']['prompt'] # Run the triage check result = run_visual_qc( prompt=prompt, asset_path=file_path, model_name='Gemini' # Use Gemini for triage ) # Extract the triage results triage_data = extract_json_from_response(result['response']) if not triage_data: return jsonify({ 'status': 'error', 'message': 'Could not extract triage results', 'raw_response': result['response'] }), 500 # Generate the suggested profile name format_suffix = triage_data.get('recommended_qc_profile', '_digital') suggested_profile = f"{brand}{format_suffix}" # Return triage results return jsonify({ 'status': 'success', 'session_id': session_id, 'file_path': file_path, 'filename': file.filename, 'triage_results': triage_data, 'suggested_profile': suggested_profile, 'brand': brand, 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S') }) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e), 'traceback': traceback.format_exc() }), 500 @app.route('/api/process_triaged_file', methods=['POST']) @auth.require_auth def api_process_triaged_file(): """API endpoint to process a file that has already been triaged""" try: data = request.json # Validate required parameters if not data or 'session_id' not in data or 'profile' not in data: return jsonify({'status': 'error', 'message': 'Missing required parameters: session_id and profile'}), 400 session_id = data['session_id'] profile_name = data['profile'] output_mode = data.get('mode', 'json').lower() # Reconstruct file path from session session_folder = os.path.join(app.config['UPLOAD_FOLDER'], session_id) if not os.path.exists(session_folder): return jsonify({'status': 'error', 'message': 'Session not found'}), 404 # Find the file in the session folder files = os.listdir(session_folder) if not files: return jsonify({'status': 'error', 'message': 'No file found in session'}), 404 filename = files[0] # Use the first file found file_path = os.path.join(session_folder, filename) # Validate profile exists if profile_name not in PROFILES: available_profiles = list(PROFILES.keys()) return jsonify({ 'status': 'error', 'message': f'Invalid profile: {profile_name}. Available profiles: {available_profiles}' }), 400 # Get profile and process checks profile = get_profile(profile_name) selected_checks = profile.get_enabled_checks() profile_weights = profile.get_check_weights() if not selected_checks: return jsonify({'status': 'error', 'message': 'No QC checks available for the selected profile'}), 400 # Process each check (same logic as original process_file) check_results = {} overall_weighted_score = 0 total_weight = 0 for i, check_name in enumerate(selected_checks): print(f"Processing check {i+1}/{len(selected_checks)}: {check_name}") # Skip if check is not available if check_name not in qc_apps: check_results[check_name] = { 'status': 'error', 'message': f'QC check not found' } continue # Get LLM preference for this check ai_model = profile.get_check_llm(check_name) # Get the prompt and reference image prompt = qc_apps[check_name]['prompt'] reference_path = qc_apps[check_name].get('reference_image') try: # Run the QC check result = run_visual_qc( prompt=prompt, asset_path=file_path, reference_path=reference_path, model_name=ai_model ) # Extract score from result score = extract_score_from_result(result, profile, check_name) result['score'] = score # Process result for JSON mode if output_mode == 'json': json_data = extract_json_from_response(result['response']) if json_data: result['original_response'] = result['response'] result['response'] = json.dumps(json_data, indent=2) # Calculate weighted score weight = profile_weights.get(check_name, 1.0 / len(selected_checks)) weighted_score = result['score'] * weight overall_weighted_score += weighted_score total_weight += weight # Store result check_results[check_name] = { 'status': 'success', 'display_name': qc_apps[check_name]['display_name'], 'score': result['score'], 'weight': weight, 'weighted_score': weighted_score, 'has_reference': reference_path is not None, 'result': result } except Exception as e: check_results[check_name] = { 'status': 'error', 'message': str(e), 'traceback': traceback.format_exc() } # Calculate overall score overall_score = 0 if total_weight > 0: # Special case for Unilever key visual profile - show percentage of 120 if profile_name == 'unilever_key_visual': # For Unilever profile, calculate as percentage of 120 # Maximum possible score is 10 * 1.2 = 12, so scale to 120 max_possible_score = 10 * total_weight # 10 * 1.2 = 12 overall_score = min(round((overall_weighted_score / max_possible_score) * 120, 1), 120) else: # Maximum possible score is 10 * total_weight, so normalize to 100% max_possible_score = 10 * total_weight overall_score = min(round((overall_weighted_score / max_possible_score) * 100, 1), 100) else: successful_checks = [r for r in check_results.values() if r.get('status') == 'success'] if successful_checks: sum_scores = sum(r.get('score', 0) for r in successful_checks) overall_score = min(round((sum_scores / len(successful_checks)) * 10, 1), 100) # Generate report data report_data = { 'session_id': session_id, 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'filename': filename, 'profile': profile_name, 'profile_id': profile_name, # Store the profile ID for HTML detection 'profile_name': profile.name, 'model': 'Profile-based selection', 'results': check_results, 'summary': { 'overall_score': overall_score, 'profile': profile.name, 'checks_count': len([r for r in check_results.values() if r.get('status') == 'success']), 'total_checks': len(check_results), 'total_weighted_score': overall_weighted_score, 'total_weight': total_weight } } # Determine overall grade grade = determine_grade(overall_score) # L'Oreal Static override: fail if ANY individual check fails (score < 6) if profile_name == 'loreal_static': for check_name, check_data in check_results.items(): if check_data.get('status') == 'success': check_score = check_data.get('score', 0) if check_score is not None and check_score < 6: grade = 'Fail' break # Boots Static override: fail if ANY individual check fails (score < 6) if profile_name == 'boots_static': for check_name, check_data in check_results.items(): if check_data.get('status') == 'success': check_score = check_data.get('score', 0) if check_score is not None and check_score < 6: grade = 'Fail' break report_data['summary']['grade'] = grade # Save results to file regardless of output mode output_path = save_results_to_file(report_data, filename, output_mode, session_id, file_path) # Return results based on output mode if output_mode == 'html': html_response = generate_html_response(report_data, filename) # Add file path info to the response return html_response else: # For JSON mode, return the data with file path info report_data['output_file'] = { 'path': output_path, 'filename': os.path.basename(output_path), 'url': f'{request.environ.get("SCRIPT_NAME", "")}/output/{os.path.basename(output_path)}' } return jsonify(report_data) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e), 'traceback': traceback.format_exc() }), 500 @app.route('/api/process_file', methods=['POST']) @auth.require_auth def api_process_file(): """API endpoint to process a file with specified profiles and checks""" try: # Force reload QC apps to ensure they're available for this request if not qc_apps: load_qc_apps() print(f"API: Loaded {len(qc_apps)} QC apps for processing") # Check if file is in request if 'file' not in request.files: return jsonify({'status': 'error', 'message': 'No file part'}), 400 file = request.files['file'] # Check if file was selected if file.filename == '': return jsonify({'status': 'error', 'message': 'No selected file'}), 400 # Get output mode (json or html) output_mode = request.form.get('mode', 'json').lower() if output_mode not in ['json', 'html']: output_mode = 'json' # Default to JSON if invalid mode # Get selected profiles from request (can be single or multiple) # Support both single profile parameter and multiple profiles array selected_profiles = [] if 'profiles[]' in request.form: selected_profiles = request.form.getlist('profiles[]') elif 'profile' in request.form: selected_profiles = [request.form.get('profile')] if not selected_profiles: selected_profiles = ['default'] # Use default if none specified # Validate profiles for profile_id in selected_profiles: if profile_id != 'custom' and profile_id not in PROFILES: available_profiles = list(PROFILES.keys()) return jsonify({ 'status': 'error', 'message': f'Invalid profile: {profile_id}. Available profiles: {available_profiles}' }), 400 # Track all selected checks and weights from all profiles all_selected_checks = set() combined_profile_weights = {} selected_profile_names = [] custom_selection = False # Process each selected profile for profile_id in selected_profiles: if profile_id == 'custom': # Custom selection custom_checks = request.form.getlist('checks') if not custom_checks: return jsonify({'status': 'error', 'message': 'Custom profile selected but no checks were specified'}), 400 all_selected_checks.update(custom_checks) custom_selection = True else: # Standard profile profile = get_profile(profile_id) selected_profile_names.append(profile.name) profile_checks = profile.get_enabled_checks() all_selected_checks.update(profile_checks) # Add profile weights (we'll normalize later) profile_weights = profile.get_check_weights() for check, weight in profile_weights.items(): if check in combined_profile_weights: # If a check is in multiple profiles, use the higher weight combined_profile_weights[check] = max(combined_profile_weights[check], weight) else: combined_profile_weights[check] = weight # Convert to list for consistency with the rest of the code selected_checks = list(all_selected_checks) profile_weights = combined_profile_weights if not selected_checks: return jsonify({'status': 'error', 'message': 'No QC checks available for the selected profile'}), 400 # Create unique session ID and save file session_id = datetime.now().strftime('%Y%m%d_%H%M%S') # Create session folder and save file session_folder = os.path.join(app.config['UPLOAD_FOLDER'], session_id) os.makedirs(session_folder, exist_ok=True) file_path = os.path.join(session_folder, file.filename) file.save(file_path) # Process each check check_results = {} overall_weighted_score = 0 total_weight = 0 # Track progress for client progress = { 'total_checks': len(selected_checks), 'completed_checks': 0, 'current_check': '' } for i, check_name in enumerate(selected_checks): # Update progress progress['current_check'] = check_name progress['completed_checks'] = i print(f"API Progress: {i+1}/{len(selected_checks)} - Processing {check_name}") # Skip if check is not available if check_name not in qc_apps: check_results[check_name] = { 'status': 'error', 'message': f'QC check not found' } continue # Determine which LLM to use based on the selected profiles # Default to Gemini ai_model = 'Gemini' # Check each profile's LLM preference for this check for profile_id in selected_profiles: if profile_id != 'custom': # Skip custom profile check_llm_map = get_check_llm_map(profile_id) if check_name in check_llm_map: # If any profile uses OpenAI for this check, prioritize OpenAI if check_llm_map[check_name] == 'OpenAI': ai_model = 'OpenAI' break # Get the prompt prompt = qc_apps[check_name]['prompt'] # Get reference image if available reference_path = qc_apps[check_name].get('reference_image') try: # Run the QC check result = run_visual_qc( prompt=prompt, asset_path=file_path, reference_path=reference_path, model_name=ai_model ) # Extract score from result score = None try: # Use our extraction function to get score from JSON blocks json_data = extract_json_from_response(result['response']) if 'score' in json_data: score = json_data.get('score') print(f"Extracted score from JSON block: {score}") # If we still don't have a score, look for any score in text if score is None: # Try to find a score pattern in the text score_pattern = r'["\']score["\']\s*:\s*(\d+)' score_match = re.search(score_pattern, result['response']) if score_match: score = int(score_match.group(1)) print(f"Extracted score from regex: {score}") else: # Look for descriptive scores in text descriptive_score_pattern = r'score(?:\s+is|\s*:\s*|\s+of\s+)(?:\s*)(\d+)(?:\s*out\s*of\s*10)?' descriptive_match = re.search(descriptive_score_pattern, result['response'].lower()) if descriptive_match: score = int(descriptive_match.group(1)) print(f"Extracted score from descriptive text: {score}") else: # Try to determine score from pass/fail status (legacy mode) result_text = result.get('response', '').upper() if "PASS" in result_text: score = 10 # Pass = 10/10 print("Detected PASS keyword, setting score to 10") elif "FAIL" in result_text: score = 3 # Fail = 3/10 print("Detected FAIL keyword, setting score to 3") else: score = 5 # Default middle score print(f"Could not extract score, using default of 5") except Exception as parse_error: print(f"Error parsing score from response: {parse_error}") score = 5 # Default to middle score # Add the score to the result result['score'] = score if score is not None else 5 # Process result for JSON mode if output_mode == 'json': # For JSON mode, we update the response to be ONLY the JSON part json_data = extract_json_from_response(result['response']) if json_data: # If we found JSON in the response, replace the full text with just the extracted JSON result['original_response'] = result['response'] # Save original for debugging result['response'] = json.dumps(json_data, indent=2) # Pretty print the JSON # Calculate weighted score weight = profile_weights.get(check_name, 0) if weight == 0 and ('default' in selected_profiles or len(selected_profiles) == 0): weight = 1.0 / len(selected_checks) weighted_score = result['score'] * weight overall_weighted_score += weighted_score total_weight += weight # Store result check_results[check_name] = { 'status': 'success', 'display_name': qc_apps[check_name]['display_name'], 'score': result['score'], 'weight': weight, 'weighted_score': weighted_score, 'has_reference': reference_path is not None, 'result': result } # Update progress after successful check progress['completed_checks'] = i + 1 except Exception as e: check_results[check_name] = { 'status': 'error', 'message': str(e), 'traceback': traceback.format_exc() } # Calculate overall score overall_score = 0 if total_weight > 0: # Special case for Unilever key visual profile - show percentage of 120 if len(selected_profiles) == 1 and selected_profiles[0] == 'unilever_key_visual': # For Unilever profile, calculate as percentage of 120 # Maximum possible score is 10 * 1.2 = 12, so scale to 120 max_possible_score = 10 * total_weight # 10 * 1.2 = 12 overall_score = min(round((overall_weighted_score / max_possible_score) * 120, 1), 120) else: # Maximum possible score is 10 * total_weight, so normalize to 100% max_possible_score = 10 * total_weight overall_score = min(round((overall_weighted_score / max_possible_score) * 100, 1), 100) else: # Simple average if no weights successful_checks = [r for r in check_results.values() if r.get('status') == 'success'] if successful_checks: sum_scores = sum(r.get('score', 0) for r in successful_checks) overall_score = min(round((sum_scores / len(successful_checks)) * 10, 1), 100) # Generate report data report_data = { 'session_id': session_id, 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'filename': file.filename, 'profiles': selected_profiles, 'profile_id': selected_profiles[0] if len(selected_profiles) == 1 else None, # Store single profile ID for HTML detection 'profile_name': ", ".join(selected_profile_names) or "Multiple Profiles", 'model': 'Profile-based selection', 'results': check_results, 'summary': { 'overall_score': overall_score, 'profile': ", ".join(selected_profile_names) or "Multiple Profiles", 'checks_count': len([r for r in check_results.values() if r.get('status') == 'success']), 'total_checks': len(check_results), 'total_weighted_score': overall_weighted_score, 'total_weight': total_weight } } # Determine overall grade based on score (Pass/Fail) avg_individual_score = overall_score / 10 # Convert to 1-10 scale grade = 'Pass' if avg_individual_score >= 6 else 'Fail' # Add grade to summary report_data['summary']['grade'] = grade # Save results to file regardless of output mode output_path = save_results_to_file(report_data, file.filename, output_mode, session_id, file_path) # Return data based on output mode if output_mode == 'html': # Create a more interactive HTML view with formatted results # Define a function to get color based on score def get_score_result(score): if score >= 6: return "Pass", "#28a745" # Green for pass else: return "Fail", "#dc3545" # Red for fail # Build HTML for each check result check_results_html = "" for check_name, check_data in report_data['results'].items(): if check_data['status'] == 'success': display_name = check_data.get('display_name', check_name) score = check_data.get('score', 0) result_text, score_color = get_score_result(score) # Extract response text (strip JSON blocks for cleaner display) response_text = check_data['response'] # Remove JSON code blocks for cleaner reading import re response_text = re.sub(r'```json.*?```', '', response_text, flags=re.DOTALL) response_text = response_text.strip() check_results_html += f"""

    {display_name} {result_text}

    {response_text.replace(chr(10), '
    ')}

    """ # Get summary score result overall_score = report_data['summary']['overall_score'] overall_result, overall_color = get_score_result(overall_score/10) # Normalize to 0-10 scale html_content = f""" Visual AI QC Results for {file.filename}

    Visual AI QC Results

    File analyzed: {file.filename} | Timestamp: {report_data['timestamp']}

    Summary

    Overall Result: {overall_result}

    Status: {report_data['summary']['grade']}

    Profile: {report_data['profile_name']}

    Total Checks: {report_data['summary']['checks_count']}

    Detailed Results

    {check_results_html}

    Show/Hide Raw JSON Data

    {json.dumps(report_data, indent=2)}
    """ # For HTML mode, redirect to the saved file return Response(html_content, mimetype='text/html') else: # For JSON mode, return the data with file path info report_data['output_file'] = { 'path': output_path, 'filename': os.path.basename(output_path), 'url': f'{request.environ.get("SCRIPT_NAME", "")}/output/{os.path.basename(output_path)}' } return jsonify(report_data) except Exception as e: # Include progress information in error response error_progress = progress.get('completed_checks', 0) if 'progress' in locals() else 0 total_checks = progress.get('total_checks', 0) if 'progress' in locals() else 0 if not total_checks and 'selected_checks' in locals(): total_checks = len(selected_checks) error_data = { 'status': 'error', 'message': str(e), 'traceback': traceback.format_exc(), 'progress': { 'total_checks': total_checks, 'completed_checks': error_progress, 'percentage': (error_progress / total_checks * 100) if total_checks else 0 } } # Get output mode if possible, default to JSON if not defined or in case of error output_mode = request.form.get('mode', 'json').lower() if 'mode' in request.form else 'json' if output_mode == 'html': # Create a more user-friendly error page html_content = f""" Visual AI QC Error

    Error Processing Request

    Error Message

    {error_data['message']}

    Processing Progress

    Completed {error_data['progress']['completed_checks']} of {error_data['progress']['total_checks']} checks

    Show/Hide Technical Details

    {json.dumps(error_data, indent=2)}
    """ return Response(html_content, mimetype='text/html'), 500 else: # Default JSON mode return jsonify(error_data), 500 @app.route('/analyze', methods=['POST']) @app.route('/api/analyze', methods=['POST']) @auth.require_auth def api_analyze_with_triage(): """ Smart analysis endpoint that combines triage and profile-based QC. This endpoint: 1. Runs triage to determine file type 2. Automatically selects appropriate profile based on brand + file type 3. Runs full QC analysis with selected profile 4. Returns combined results showing both triage and QC analysis Parameters: - file: Image file to analyze - brand: Brand name (diageo, unilever, general) - defaults to 'general' - mode: Output mode (json, html) - defaults to 'json' - return_file: Whether to return file info (true/false) - defaults to 'false' """ try: # Force reload QC apps to ensure they're available if not qc_apps: load_qc_apps() # Check if file is in request if 'file' not in request.files: return jsonify({'status': 'error', 'message': 'No file part'}), 400 file = request.files['file'] # Check if file was selected if file.filename == '': return jsonify({'status': 'error', 'message': 'No selected file'}), 400 # Get parameters brand = request.form.get('brand', 'general').lower() profile = request.form.get('profile', '').lower() output_mode = request.form.get('mode', 'json').lower() return_file = request.form.get('return_file', 'false').lower() == 'true' # Validate brand - allow any brand name now that users can create custom profiles # The old validation was too restrictive for custom profiles if not brand or brand.strip() == '': brand = 'general' # Validate output mode if output_mode not in ['json', 'html']: output_mode = 'json' # Create unique session ID and save file session_id = datetime.now().strftime('%Y%m%d_%H%M%S') session_folder = os.path.join(app.config['UPLOAD_FOLDER'], session_id) os.makedirs(session_folder, exist_ok=True) file_path = os.path.join(session_folder, file.filename) file.save(file_path) # Initialize progress tracking for this session progress_tracker[session_id] = { 'total_checks': 0, 'completed_checks': 0, 'current_check': 'Initializing', 'current_check_display': 'Initializing Analysis', 'stage': 'setup', 'percentage': 0, 'session_id': session_id } # STEP 1: Run Triage Analysis print(f"Step 1: Running triage analysis for {file.filename}") # Update progress progress_tracker[session_id].update({ 'stage': 'triage', 'current_check': 'file_type_triage', 'current_check_display': 'File Type Detection', 'percentage': 10 }) if 'file_type_triage' not in qc_apps: return jsonify({'status': 'error', 'message': 'File type triage app not available'}), 500 # Get the triage prompt and run triage triage_prompt = qc_apps['file_type_triage']['prompt'] triage_result = run_visual_qc( prompt=triage_prompt, asset_path=file_path, model_name='Gemini' ) # Extract triage results triage_data = extract_json_from_response(triage_result['response']) if not triage_data: return jsonify({ 'status': 'error', 'message': 'Could not extract triage results', 'raw_triage_response': triage_result['response'] }), 500 # STEP 2: Determine Appropriate Profile print(f"Step 2: Determining profile based on {'direct profile parameter' if profile else 'triage results'}") if profile and profile.strip(): # Use the directly specified profile suggested_profile = profile print(f"Using directly specified profile: {suggested_profile}") else: # Get format suffix from triage results format_suffix = triage_data.get('recommended_qc_profile', '_digital') # Construct profile name suggested_profile = f"{brand}{format_suffix}" # Check if the suggested profile exists, fallback to main brand profile if not if suggested_profile not in PROFILES: suggested_profile = brand if suggested_profile not in PROFILES: suggested_profile = 'general' # Final fallback print(f"Profile constructed from triage: {suggested_profile}") print(f"Final selected profile: {suggested_profile}") # STEP 3: Run Full QC Analysis with Selected Profile print(f"Step 3: Running QC analysis with profile '{suggested_profile}'") # Get the profile configuration profile_config = get_profile(suggested_profile) if not profile_config: return jsonify({ 'status': 'error', 'message': f'Profile {suggested_profile} not found' }), 400 # Get enabled checks from profile using Profile object methods enabled_checks = profile_config.get_enabled_checks() profile_weights = profile_config.get_check_weights() # Filter to only include checks that exist in qc_apps enabled_checks = [check for check in enabled_checks if check in qc_apps] if not enabled_checks: return jsonify({ 'status': 'error', 'message': f'No enabled checks found for profile {suggested_profile}' }), 400 # Run QC checks in parallel batches # Update progress tracker with total checks progress_tracker[session_id].update({ 'total_checks': len(enabled_checks), 'stage': 'qc_analysis', 'percentage': 20 }) # Get reference asset from form for this endpoint reference_asset = request.form.get('reference_asset', '') # Use the parallel processing function check_results = process_checks_in_batches_with_triage( enabled_checks, qc_apps, profile_config, profile_weights, file_path, reference_asset, brand_db, progress_tracker, session_id, batch_size=15, base_percentage=20, percentage_range=70 ) # STEP 4: Calculate Overall Score print(f"Step 4: Calculating overall score") total_weighted_score = 0 total_weight = 0 completed_checks = 0 failed_checks = 0 for check_name, result in check_results.items(): weight = result.get('weight', 0.1) total_weight += weight if result['status'] == 'completed': completed_checks += 1 score = result.get('score') if score is not None: total_weighted_score += score * weight else: failed_checks += 1 # Calculate overall score - sum of weighted scores scaled to 100 # For profiles with total_weight = 10.0 (like General Check), use direct weighted score # For profiles with total_weight = 1.0, multiply by 10 to scale to 100 if total_weight >= 10.0: overall_score = min(total_weighted_score, 100) # Cap at 100 elif total_weight > 0: overall_score = min(total_weighted_score * 10, 100) # Scale to 100-point system, cap at 100 else: overall_score = 0 # STEP 5: Prepare Combined Response print(f"Step 5: Preparing response") # Update progress to completion progress_tracker[session_id].update({ 'stage': 'complete', 'current_check': 'Finalizing', 'current_check_display': 'Finalizing Report', 'percentage': 100, 'completed_checks': len(enabled_checks) }) # Since triage is skipped, set default triage data triage_data = { 'primary_format': 'user_specified', 'specific_type': 'profile_based_analysis', 'confidence_score': 10, 'format_indicators': 'User selected profile directly', 'secondary_format': '', 'recommended_qc_profile': suggested_profile } triage_result = {'response': 'Triage skipped - using user-selected profile directly'} # Build comprehensive response response_data = { 'status': 'success', 'session_id': session_id, 'filename': file.filename, 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), # Triage Results Section 'triage_analysis': { 'primary_format': triage_data.get('primary_format', 'unknown'), 'specific_type': triage_data.get('specific_type', 'unknown'), 'confidence_score': triage_data.get('confidence_score', 0), 'format_indicators': triage_data.get('format_indicators', ''), 'secondary_format': triage_data.get('secondary_format', ''), 'recommended_qc_profile': triage_data.get('recommended_qc_profile', ''), 'full_triage_response': triage_result['response'] }, # Profile Selection Section 'profile_selection': { 'requested_brand': brand, 'suggested_profile': suggested_profile, 'profile_used': suggested_profile, 'profile_name': profile_config.name, 'profile_description': profile_config.description, 'reference_asset': reference_asset if reference_asset else None, 'reference_asset_used': bool(reference_asset) }, # QC Analysis Results Section 'qc_analysis': { 'overall_score': round(overall_score, 2), 'total_checks': len(enabled_checks), 'completed_checks': completed_checks, 'failed_checks': failed_checks, 'total_weight': round(total_weight, 2), 'check_results': check_results }, # Summary Section 'summary': { 'file_type_detected': f"{triage_data.get('primary_format', 'unknown')} - {triage_data.get('specific_type', 'unknown')}", 'profile_applied': profile_config.name, 'overall_score': round(overall_score, 2), 'score_percentage': f"{round(overall_score, 1)}%", 'checks_passed': completed_checks, 'checks_failed': failed_checks, 'confidence': triage_data.get('confidence_score', 0) } } # Add file info if requested if return_file: response_data['file_info'] = { 'path': file_path, 'size': os.path.getsize(file_path), 'session_folder': session_folder } # Media plan matching try: media_plan_data = _get_active_media_plan(client) if media_plan_data: from media_plan_processor import find_matching_asset, validate_asset_specs match_result = find_matching_asset(file.filename, media_plan_data) if match_result: validation = validate_asset_specs(file_path, match_result['match']) response_data['media_plan_match'] = { 'matched': True, 'confidence': match_result['confidence'], 'match_type': match_result['match_type'], 'asset_id': match_result['match']['asset_id'], 'spec': match_result['match'], 'validation': validation, } else: response_data['media_plan_match'] = {'matched': False} except Exception as mp_err: print(f"Media plan matching error: {mp_err}") # Auto-save HTML report to output directory (regardless of output_mode) try: # Generate comprehensive HTML report using the same format as the web UI html_report_content = generate_comprehensive_html_report(response_data, file.filename, file_path) # Create output filename in client-specific folder safe_filename = re.sub(r'[^a-zA-Z0-9.-]', '_', file.filename) output_filename = f"{session_id}_{safe_filename}_report.html" client_output_folder = ensure_client_output_folder(client) output_path = os.path.join(client_output_folder, output_filename) # Save HTML report to output directory with open(output_path, 'w', encoding='utf-8') as f: f.write(html_report_content) print(f"HTML report auto-saved to: {output_path}") # Add output file info to response response_data['output_file'] = { 'path': output_path, 'filename': output_filename, 'auto_saved': True } except Exception as e: print(f"Error auto-saving HTML report: {str(e)}") # Don't fail the entire request if auto-save fails # Return appropriate format if output_mode == 'html': # Create HTML response with both triage and QC results triage_html = f"""

    📋 File Type Analysis (Triage)

    Detected Format: {triage_data.get('primary_format', 'unknown').title()} - {triage_data.get('specific_type', 'unknown')}
    Confidence: {triage_data.get('confidence_score', 0)}/10
    Key Indicators: {triage_data.get('format_indicators', 'N/A')}
    Recommended Profile: {triage_data.get('recommended_qc_profile', 'N/A')}
    """ profile_html = f"""

    🎯 Profile Selection

    Brand: {brand.title()}
    Profile Used: {profile_config.name}
    Description: {profile_config.description}
    """ # Generate QC results HTML check_results_html = "" for check_name, result in check_results.items(): score = result.get('score', 0) if result.get('score') is not None else 0 status_class = "pass" if result['status'] == 'completed' and score >= 6 else "fail" result_display = "Pass" if score >= 6 else "Fail" if score > 0 else "N/A" check_results_html += f"""

    {check_name.replace('_', ' ').title()}

    Result: {result_display}
    Weight: {result.get('weight', 0.1)}
    {result.get('response', 'No response')[:500]}...
    """ html_content = f""" Visual AI QC Analysis Results

    🤖 Visual AI QC Analysis Results

    File: {file.filename} | Analyzed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

    📊 Summary

    Overall Result: {'Pass' if overall_score/10 >= 6 else 'Fail'}
    File Type: {triage_data.get('primary_format', 'unknown').title()}
    Profile Used: {profile_config.name}
    Checks Performed: {completed_checks}/{len(enabled_checks)}
    {triage_html} {profile_html}

    🔍 Quality Control Analysis

    {check_results_html}

    Show/Hide Raw JSON Data

    {json.dumps(response_data, indent=2)}
    """ return Response(html_content, mimetype='text/html') else: # Add session_id to response for progress tracking response_data['session_id'] = session_id # Return JSON response return jsonify(response_data) except Exception as e: # Update progress to error state if session exists if 'session_id' in locals() and session_id in progress_tracker: progress_tracker[session_id].update({ 'stage': 'error', 'current_check': 'Error', 'current_check_display': 'Analysis Failed', 'percentage': 0 }) return jsonify({ 'status': 'error', 'message': str(e), 'traceback': traceback.format_exc() }), 500 finally: # Schedule cleanup of progress tracker after 5 minutes import threading import time def cleanup_progress(): time.sleep(300) # 5 minutes if 'session_id' in locals() and session_id in progress_tracker: del progress_tracker[session_id] if 'session_id' in locals(): threading.Thread(target=cleanup_progress, daemon=True).start() def _update_client_config_for_profile(profile_id, client_ids, action='add'): """ Update client_config.py to add or remove a profile from client configurations Args: profile_id: Profile ID to add/remove client_ids: List of client IDs to update action: 'add' or 'remove' """ try: client_config_path = os.path.join(os.path.dirname(__file__), 'client_config.py') # Read current client_config.py with open(client_config_path, 'r') as f: config_content = f.read() # Parse CLIENT_PROFILES dictionary # This is a simple approach - we'll reload the module and update it from client_config import CLIENT_PROFILES import copy updated_config = copy.deepcopy(CLIENT_PROFILES) # Update each client for client_id in client_ids: if client_id in updated_config: current_profiles = updated_config[client_id].get('profiles', []) if action == 'add' and profile_id not in current_profiles: current_profiles.append(profile_id) updated_config[client_id]['profiles'] = current_profiles elif action == 'remove' and profile_id in current_profiles: current_profiles.remove(profile_id) updated_config[client_id]['profiles'] = current_profiles # Write updated config back to file config_lines = [ "#!/usr/bin/env python3", '"""', "Client configuration module for managing client-profile relationships", '"""', "", "CLIENT_PROFILES = {", ] for client_id, client_data in updated_config.items(): config_lines.append(f" '{client_id}': {{") config_lines.append(f" 'name': '{client_data['name']}',") profiles_str = ", ".join([f"'{p}'" for p in client_data['profiles']]) config_lines.append(f" 'profiles': [{profiles_str}],") config_lines.append(f" 'display_name': '{client_data['display_name']}',") config_lines.append(f" 'description': '{client_data['description']}'") config_lines.append(" },") config_lines.append("}") config_lines.append("") config_lines.append("def get_client_profiles(client_id):") config_lines.append(' """Get profiles available for a specific client"""') config_lines.append(" return CLIENT_PROFILES.get(client_id, {}).get('profiles', [])") config_lines.append("") config_lines.append("def get_all_clients():") config_lines.append(' """Get all available clients"""') config_lines.append(" return CLIENT_PROFILES") config_lines.append("") config_lines.append("def validate_client_profile(client_id, profile_id):") config_lines.append(' """Validate that a profile belongs to a client"""') config_lines.append(" client_profiles = get_client_profiles(client_id)") config_lines.append(" return profile_id in client_profiles") config_lines.append("") with open(client_config_path, 'w') as f: f.write('\n'.join(config_lines)) # Reload the client_config module to pick up changes import importlib import client_config importlib.reload(client_config) return True except Exception as e: print(f"Error updating client_config: {str(e)}") return False @app.route('/api/profiles', methods=['POST']) @auth.require_auth def create_profile(): """Create a new QC profile""" try: data = request.get_json() if not data: return jsonify({'status': 'error', 'message': 'No data provided'}), 400 profile_name = data.get('name', '').strip() if not profile_name: return jsonify({'status': 'error', 'message': 'Profile name is required'}), 400 # Create profile filename (sanitize name) import re safe_name = re.sub(r'[^a-zA-Z0-9_-]', '_', profile_name.lower()) profile_filename = f"{safe_name}.json" profile_path = os.path.join('profiles', profile_filename) # Check if profile already exists if os.path.exists(profile_path): return jsonify({'status': 'error', 'message': 'Profile already exists'}), 400 # Get visibility settings visibility = data.get('visibility', 'all') # 'all' or 'client_specific' visible_to_clients = data.get('visible_to_clients', []) # List of client IDs # Create profile data structure profile_data = { "name": profile_name, "description": data.get('description', ''), "pass_threshold": data.get('pass_threshold', 85), "visibility": visibility, "visible_to_clients": visible_to_clients, "created_at": datetime.now().isoformat(), "created_by": g.user.get('email', 'unknown'), "version": 1, "checks": data.get('checks', {}) } # Save profile to file with open(profile_path, 'w') as f: json.dump(profile_data, f, indent=2) # Update client_config if client_specific visibility if visibility == 'client_specific' and visible_to_clients: _update_client_config_for_profile(safe_name, visible_to_clients, action='add') elif visibility == 'all': # Add to all clients in client_config from client_config import get_all_clients all_clients = list(get_all_clients().keys()) _update_client_config_for_profile(safe_name, all_clients, action='add') return jsonify({ 'status': 'success', 'message': f'Profile "{profile_name}" created successfully', 'profile_id': safe_name, 'profile_path': profile_path, 'visibility': visibility }) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/profiles/', methods=['PUT']) @auth.require_auth def update_profile(profile_id): """Update an existing QC profile by creating a new version""" try: data = request.get_json() if not data: return jsonify({'status': 'error', 'message': 'No data provided'}), 400 profile_filename = f"{profile_id}.json" profile_path = os.path.join('profiles', profile_filename) # Check if profile exists if not os.path.exists(profile_path): return jsonify({'status': 'error', 'message': 'Profile not found'}), 404 # Load existing profile with open(profile_path, 'r') as f: existing_profile = json.load(f) # Determine if profile has been modified (check if any field changed) has_changes = False fields_to_check = ['name', 'description', 'pass_threshold', 'checks', 'visibility', 'visible_to_clients'] for field in fields_to_check: if field in data and data[field] != existing_profile.get(field): has_changes = True break if not has_changes: return jsonify({ 'status': 'success', 'message': 'No changes detected', 'profile_id': profile_id }) # Find next version number current_version = existing_profile.get('version', 1) next_version = current_version + 1 # Create new versioned profile ID # Remove any existing version suffix (e.g., my_profile_v2 -> my_profile) base_profile_id = re.sub(r'_v\d+$', '', profile_id) new_profile_id = f"{base_profile_id}_v{next_version}" new_profile_filename = f"{new_profile_id}.json" new_profile_path = os.path.join('profiles', new_profile_filename) # Check if new version already exists (shouldn't happen, but be safe) counter = 0 while os.path.exists(new_profile_path) and counter < 100: next_version += 1 new_profile_id = f"{base_profile_id}_v{next_version}" new_profile_filename = f"{new_profile_id}.json" new_profile_path = os.path.join('profiles', new_profile_filename) counter += 1 # Create new profile version with updated data new_profile_data = existing_profile.copy() new_profile_data.update({ "name": data.get('name', existing_profile.get('name')), "description": data.get('description', existing_profile.get('description', '')), "pass_threshold": data.get('pass_threshold', existing_profile.get('pass_threshold', 85)), "checks": data.get('checks', existing_profile.get('checks', {})), "visibility": data.get('visibility', existing_profile.get('visibility', 'all')), "visible_to_clients": data.get('visible_to_clients', existing_profile.get('visible_to_clients', [])), "version": next_version, "modified_at": datetime.now().isoformat(), "modified_by": g.user.get('email', 'unknown'), "previous_version": profile_id }) # Save new versioned profile with open(new_profile_path, 'w') as f: json.dump(new_profile_data, f, indent=2) # Update client_config to use new version visibility = new_profile_data.get('visibility', 'all') visible_to_clients = new_profile_data.get('visible_to_clients', []) # Remove old version from client configs from client_config import get_all_clients all_clients = list(get_all_clients().keys()) _update_client_config_for_profile(profile_id, all_clients, action='remove') # Add new version to appropriate client configs if visibility == 'client_specific' and visible_to_clients: _update_client_config_for_profile(new_profile_id, visible_to_clients, action='add') elif visibility == 'all': _update_client_config_for_profile(new_profile_id, all_clients, action='add') return jsonify({ 'status': 'success', 'message': f'Profile updated to version {next_version}. Original kept as "{profile_id}"', 'profile_id': new_profile_id, 'previous_profile_id': profile_id, 'version': next_version }) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/profiles/', methods=['DELETE']) @auth.require_auth def delete_profile(profile_id): """Delete a QC profile""" try: profile_filename = f"{profile_id}.json" profile_path = os.path.join('profiles', profile_filename) # Check if profile exists if not os.path.exists(profile_path): return jsonify({'status': 'error', 'message': 'Profile not found'}), 404 # Load profile to get name for response with open(profile_path, 'r') as f: profile_data = json.load(f) profile_name = profile_data.get('name', profile_id) # Delete profile file os.remove(profile_path) return jsonify({ 'status': 'success', 'message': f'Profile "{profile_name}" deleted successfully' }) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/brand_guidelines', methods=['GET']) def get_brand_guidelines(): """Get all brand guidelines, optionally filtered by brand or client""" try: brand_name = request.args.get('brand') client_id = request.args.get('client') if brand_name: guidelines = brand_db.get_brand_guidelines(brand_name) return jsonify({ 'status': 'success', 'guidelines': guidelines, 'brands': brand_db.get_all_brands() }) elif client_id: # Filter by client filtered = brand_db.get_guidelines_by_client(client_id) return jsonify({ 'status': 'success', 'brands': filtered['brands'], 'files': filtered['files'] }) else: # Return full database structure that frontend expects return jsonify({ 'status': 'success', 'brands': brand_db.db['brands'], 'files': brand_db.db['files'] }) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/brand_guidelines', methods=['POST']) @auth.require_auth def upload_brand_guideline(): """Upload a new brand guideline file""" try: # Check if file is in request if 'file' not in request.files: return jsonify({'status': 'error', 'message': 'No file provided'}), 400 file = request.files['file'] if file.filename == '': return jsonify({'status': 'error', 'message': 'No file selected'}), 400 # Get brand name and other metadata brand_name = request.form.get('brand_name', '').strip() description = request.form.get('description', '').strip() tags = request.form.get('tags', '').strip().split(',') if request.form.get('tags') else [] client_id = request.form.get('client_id', 'general').strip() if not brand_name: return jsonify({'status': 'error', 'message': 'Brand name is required'}), 400 # Save uploaded file temporarily temp_filename = f"temp_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{file.filename}" temp_path = os.path.join(app.config['UPLOAD_FOLDER'], temp_filename) file.save(temp_path) try: # Add to brand guidelines database file_record = brand_db.add_brand_guideline( brand_name=brand_name, file_path=temp_path, description=description, tags=[tag.strip() for tag in tags if tag.strip()], client_id=client_id ) # Remove temporary file os.remove(temp_path) # Trigger background PDF processing if applicable if file_record.get('file_type') == '.pdf': import threading def _process_pdf_bg(fid, spath, bname, fdir): try: from pdf_processor import process_pdf_guideline result = process_pdf_guideline(spath, fid, bname, fdir) brand_db.update_file_record(fid, result) print(f"PDF processing complete for {fid}") except Exception as e: print(f"PDF processing failed for {fid}: {e}") brand_db.update_file_record(fid, { 'processed': 'error', 'processing_error': str(e) }) threading.Thread( target=_process_pdf_bg, args=(file_record['id'], file_record['stored_path'], brand_name, str(brand_db.files_dir)), daemon=True ).start() file_record['processing_status'] = 'processing' # Trigger Excel processing: try localization matrix first (existing # clients), fall back to Source Messaging summary (HP and similar). elif file_record.get('file_type') in ('.xlsx', '.xls'): import threading def _process_excel_bg(fid, spath, fdir): try: from localization_processor import parse_localization_matrix parsed = parse_localization_matrix(spath) if parsed: json_path = os.path.join(fdir, f"{fid}_localization.json") with open(json_path, 'w', encoding='utf-8') as f: json.dump(parsed, f, indent=2, ensure_ascii=False) brand_db.update_file_record(fid, { 'processed': True, 'processed_at': datetime.now().isoformat(), 'localization_path': json_path, 'localization_messages': list(parsed.get('messages', {}).keys()), 'localization_countries': parsed.get('countries', []), 'asset_type': 'localization_matrix', }) print(f"Localization matrix parsing complete for {fid}: " f"{len(parsed.get('messages', {}))} messages, " f"{len(parsed.get('countries', []))} countries") return # Not a localization matrix — process as Source Messaging # (HP-style structured Markdown summary via Gemini). from excel_processor import process_excel_file summary_text, summary_path = process_excel_file(spath, fid) brand_db.update_file_record(fid, { 'processed': True, 'processed_at': datetime.now().isoformat(), 'summary_path': summary_path, 'summary_length': len(summary_text), 'cover_image_path': None, 'asset_type': 'source_messaging', }) print(f"Source-messaging summary complete for {fid}: " f"{len(summary_text)} chars") except Exception as e: print(f"Excel processing failed for {fid}: {e}") brand_db.update_file_record(fid, { 'processed': 'error', 'processing_error': str(e), }) threading.Thread( target=_process_excel_bg, args=(file_record['id'], file_record['stored_path'], str(brand_db.files_dir)), daemon=True, ).start() file_record['processing_status'] = 'processing' return jsonify({ 'status': 'success', 'message': f'Brand guideline uploaded successfully for {brand_name}', 'file_record': file_record }) except Exception as e: # Clean up temp file on error if os.path.exists(temp_path): os.remove(temp_path) raise e except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/brand_guidelines/', methods=['DELETE']) @auth.require_auth def delete_brand_guideline(file_id): """Delete a brand guideline file""" try: success = brand_db.delete_guideline(file_id) if success: return jsonify({ 'status': 'success', 'message': 'Brand guideline deleted successfully' }) else: return jsonify({ 'status': 'error', 'message': 'Brand guideline not found' }), 404 except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/brand_guidelines//status', methods=['GET']) def get_guideline_processing_status(file_id): """Check processing status of a brand guideline""" file_record = brand_db.db["files"].get(file_id) if not file_record: return jsonify({'status': 'error', 'message': 'Not found'}), 404 return jsonify({ 'status': 'success', 'processed': file_record.get('processed', False), 'processing_error': file_record.get('processing_error'), 'page_count': file_record.get('page_count'), 'summary_length': file_record.get('summary_length'), 'processed_at': file_record.get('processed_at') }) @app.route('/api/brand_guidelines//reprocess', methods=['POST']) @auth.require_auth def reprocess_guideline(file_id): """Re-process a PDF brand guideline (re-extract and re-summarize)""" file_record = brand_db.db["files"].get(file_id) if not file_record: return jsonify({'status': 'error', 'message': 'Not found'}), 404 if file_record.get('file_type') != '.pdf': return jsonify({'status': 'error', 'message': 'Only PDF files can be reprocessed'}), 400 import threading def _reprocess_bg(fid, spath, bname, fdir): try: from pdf_processor import process_pdf_guideline result = process_pdf_guideline(spath, fid, bname, fdir) brand_db.update_file_record(fid, result) print(f"PDF reprocessing complete for {fid}") except Exception as e: print(f"PDF reprocessing failed for {fid}: {e}") brand_db.update_file_record(fid, { 'processed': 'error', 'processing_error': str(e) }) threading.Thread( target=_reprocess_bg, args=(file_id, file_record['stored_path'], file_record['brand_name'], str(brand_db.files_dir)), daemon=True ).start() return jsonify({'status': 'success', 'message': 'Reprocessing started'}) @app.route('/api/media_plan', methods=['POST']) @auth.require_auth def upload_media_plan(): """Upload a media plan Excel file for a client""" try: if 'file' not in request.files: return jsonify({'status': 'error', 'message': 'No file provided'}), 400 file = request.files['file'] if not file.filename: return jsonify({'status': 'error', 'message': 'No file selected'}), 400 client_id = request.form.get('client_id', 'general').strip().lower() if not client_id: return jsonify({'status': 'error', 'message': 'client_id is required'}), 400 access_err = _require_client_access(client_id) if access_err: return access_err display_name = (request.form.get('display_name') or '').strip() # Save the Excel file timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') safe_name = re.sub(r'[^a-zA-Z0-9._-]', '_', file.filename) excel_filename = f"{client_id}_{timestamp}_{safe_name}" excel_path = os.path.join(MEDIA_PLANS_DIR, excel_filename) file.save(excel_path) # Parse the media plan from media_plan_processor import parse_media_plan parsed = parse_media_plan(excel_path) # Save parsed JSON for fast lookup json_filename = f"{client_id}_{timestamp}.json" json_path = os.path.join(MEDIA_PLANS_DIR, json_filename) with open(json_path, 'w') as f: json.dump(parsed, f, indent=2) # Remove old plan files for this client db = _load_media_plans_db() old_plan = db.get(client_id) if old_plan: for key in ('excel_path', 'json_path'): old_path = old_plan.get(key) if old_path and os.path.exists(old_path): try: os.remove(old_path) except Exception: pass # Save to DB db[client_id] = { 'client_id': client_id, 'original_filename': file.filename, 'display_name': display_name or file.filename, 'excel_path': excel_path, 'json_path': json_path, 'upload_date': datetime.now().isoformat(), 'total_assets': parsed['total_assets'], 'channels': parsed['channels'], } _save_media_plans_db(db) return jsonify({ 'status': 'success', 'message': f'Media plan uploaded with {parsed["total_assets"]} assets across {len(parsed["channels"])} channels', 'total_assets': parsed['total_assets'], 'channels': parsed['channels'], }) except Exception as e: print(f"Error uploading media plan: {e}") import traceback traceback.print_exc() return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/media_plan', methods=['GET']) @auth.require_auth def get_media_plan(): """Get the active media plan for a client""" client_id = request.args.get('client', '').strip().lower() if not client_id: return jsonify({'status': 'error', 'message': 'client parameter required'}), 400 access_err = _require_client_access(client_id) if access_err: return access_err db = _load_media_plans_db() plan_info = db.get(client_id) if not plan_info: return jsonify({'status': 'success', 'plan': None}) return jsonify({ 'status': 'success', 'plan': { 'original_filename': plan_info.get('original_filename'), 'upload_date': plan_info.get('upload_date'), 'total_assets': plan_info.get('total_assets'), 'channels': plan_info.get('channels'), } }) @app.route('/api/media_plan/', methods=['DELETE']) @auth.require_auth def delete_media_plan(client_id): """Delete the media plan for a client""" access_err = _require_client_access(client_id) if access_err: return access_err db = _load_media_plans_db() plan_info = db.get(client_id) if not plan_info: return jsonify({'status': 'error', 'message': 'No media plan found'}), 404 # Delete files for key in ('excel_path', 'json_path'): path = plan_info.get(key) if path and os.path.exists(path): try: os.remove(path) except Exception: pass del db[client_id] _save_media_plans_db(db) return jsonify({'status': 'success', 'message': 'Media plan deleted'}) @app.route('/api/detect_brand', methods=['POST']) def detect_brand_from_file(): """Detect brand from uploaded file using AI analysis""" try: if 'file' not in request.files: return jsonify({'status': 'error', 'message': 'No file provided'}), 400 file = request.files['file'] if file.filename == '': return jsonify({'status': 'error', 'message': 'No file selected'}), 400 # Save file temporarily for analysis temp_filename = f"brand_detect_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{file.filename}" temp_path = os.path.join(app.config['UPLOAD_FOLDER'], temp_filename) file.save(temp_path) try: # Get available brands available_brands = brand_db.get_all_brands() # Create brand detection prompt - works whether brands are available or not if available_brands: brands_list = ', '.join(available_brands) brand_detection_prompt = f""" Analyze this image and determine: 1. Which brand this belongs to from the following options: {brands_list} 2. Whether this is a Key Visual or POS (Point of Sale) material 3. Your confidence level (0-100%) Look for brand logos, colors, typography, and other brand elements. If the brand is not in the provided list, still attempt to identify it. Respond with JSON format: {{ "detected_brand": "brand_name or null if uncertain", "file_type": "Key Visual" or "POS", "confidence": confidence_percentage, "reasoning": "explanation of detection" }} """ else: brand_detection_prompt = """ Analyze this image and determine: 1. What brand this belongs to (identify any visible brand names, logos, or distinctive brand elements) 2. Whether this is a Key Visual or POS (Point of Sale) material 3. Your confidence level (0-100%) Look for brand logos, colors, typography, and other brand elements. Even if no brand guidelines are available, attempt to identify the brand from visual elements. Respond with JSON format: { "detected_brand": "brand_name or null if uncertain", "file_type": "Key Visual" or "POS", "confidence": confidence_percentage, "reasoning": "explanation of detection" } """ # Run AI analysis result = run_visual_qc( prompt=brand_detection_prompt, asset_path=temp_path, model_name="Gemini" ) # Extract JSON from response detection_data = extract_json_from_response(result['response']) # Clean up temp file os.remove(temp_path) return jsonify({ 'status': 'success', 'detection': { 'detected_brand': detection_data.get('detected_brand'), 'file_type': detection_data.get('file_type', 'unknown'), 'confidence': detection_data.get('confidence', 0), 'reasoning': detection_data.get('reasoning', ''), 'available_brands': available_brands } }) except Exception as e: # Clean up temp file on error if os.path.exists(temp_path): os.remove(temp_path) raise e except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/profile_usage_stats', methods=['GET']) def get_profile_usage_stats(): """Get usage statistics for a specific profile over a time period""" try: profile_id = request.args.get('profile', '') days = int(request.args.get('days', 30)) if not profile_id: return jsonify({'status': 'error', 'message': 'Profile parameter required'}), 400 # Import load_logs from generate_usage_report from generate_usage_report import load_logs start_date = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') end_date = datetime.now().strftime('%Y-%m-%d') entries = load_logs(start_date=start_date, end_date=end_date) # Filter to analysis_complete events for this profile profile_entries = [ e for e in entries if e.get('event') == 'analysis_complete' and e.get('profile_id', '') == profile_id ] total_analyses = len(profile_entries) total_checks = sum(e.get('checks_run', e.get('checks_completed', 0)) for e in profile_entries) total_cost = sum(e.get('total_cost_usd', e.get('estimated_cost_usd', 0)) for e in profile_entries) scores = [e.get('overall_score', 0) for e in profile_entries if e.get('overall_score') is not None] avg_score = round(sum(scores) / len(scores), 1) if scores else 0 # Build recent analyses list (last 50) recent = [] for e in sorted(profile_entries, key=lambda x: x.get('timestamp', ''), reverse=True)[:50]: recent.append({ 'date': e.get('timestamp', '')[:19], 'user': e.get('user_email', 'Unknown'), 'client': e.get('client', ''), 'checks': e.get('checks_run', e.get('checks_completed', 0)), 'score': round(e.get('overall_score', 0), 2), 'cost': round(e.get('total_cost_usd', e.get('estimated_cost_usd', 0)), 4) }) return jsonify({ 'status': 'success', 'total_analyses': total_analyses, 'total_checks': total_checks, 'total_cost_usd': round(total_cost, 4), 'avg_score': avg_score, 'recent': recent }) except Exception as e: print(f"Error getting profile usage stats: {e}") import traceback traceback.print_exc() return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/client_usage_stats', methods=['GET']) @auth.require_auth def get_client_usage_stats(): """Get usage statistics scoped to a specific client with date filtering""" try: client = request.args.get('client') if not client: return jsonify({'status': 'error', 'message': 'client parameter required'}), 400 access_err = _require_client_access(client) if access_err: return access_err start_date = request.args.get('start_date') end_date = request.args.get('end_date') # Default to last 30 days if not start_date: start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') if not end_date: end_date = datetime.now().strftime('%Y-%m-%d') from generate_usage_report import load_logs entries = load_logs(start_date=start_date, end_date=end_date) # Filter to analysis_complete events for this client client_entries = [ e for e in entries if e.get('event') == 'analysis_complete' and e.get('client') == client ] unique_users = set(e.get('user_email', 'unknown') for e in client_entries) total_checks = sum(e.get('checks_run', e.get('checks_completed', 0)) for e in client_entries) total_cost = sum(e.get('total_cost_usd', e.get('estimated_cost_usd', 0)) for e in client_entries) scores = [e.get('overall_score', 0) for e in client_entries if e.get('overall_score') is not None] avg_score = round(sum(scores) / len(scores), 1) if scores else 0 total_input_tokens = 0 total_output_tokens = 0 by_provider = {} for e in client_entries: tu = e.get('token_usage') or {} total_input_tokens += tu.get('total_prompt_tokens', 0) total_output_tokens += tu.get('total_completion_tokens', 0) for provider, pstats in (tu.get('by_provider') or {}).items(): agg = by_provider.setdefault(provider, { 'input_tokens': 0, 'output_tokens': 0, 'cost_usd': 0.0 }) agg['input_tokens'] += pstats.get('prompt_tokens', 0) agg['output_tokens'] += pstats.get('completion_tokens', 0) agg['cost_usd'] += pstats.get('cost', 0) for agg in by_provider.values(): agg['cost_usd'] = round(agg['cost_usd'], 4) # Build recent analyses list (last 50) recent = [] for e in sorted(client_entries, key=lambda x: x.get('timestamp', ''), reverse=True)[:50]: tu = e.get('token_usage') or {} recent.append({ 'date': e.get('timestamp', '')[:19], 'user': e.get('user_email', 'Unknown'), 'profile': e.get('profile_id', e.get('profile', '')), 'checks': e.get('checks_run', e.get('checks_completed', 0)), 'score': round(e.get('overall_score', 0), 2), 'cost': round(e.get('total_cost_usd', e.get('estimated_cost_usd', 0)), 4), 'input_tokens': tu.get('total_prompt_tokens', 0), 'output_tokens': tu.get('total_completion_tokens', 0) }) return jsonify({ 'status': 'success', 'client': client, 'start_date': start_date, 'end_date': end_date, 'total_analyses': len(client_entries), 'unique_users': len(unique_users), 'total_checks': total_checks, 'estimated_cost_usd': round(total_cost, 4), 'total_input_tokens': total_input_tokens, 'total_output_tokens': total_output_tokens, 'by_provider': by_provider, 'avg_score': avg_score, 'recent': recent }) except Exception as e: print(f"Error getting client usage stats: {e}") import traceback traceback.print_exc() return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/admin/check', methods=['GET']) @auth.require_auth def check_admin(): """Check if the current user is an admin""" from user_access import is_admin user_email = getattr(g, 'user', {}).get('email', '') return jsonify({'is_admin': is_admin(user_email)}) @app.route('/api/admin/users', methods=['GET']) @auth.require_auth def get_admin_users(): """Get all users who have accessed the platform (admin only)""" from user_access import is_admin user_email = getattr(g, 'user', {}).get('email', '') if not is_admin(user_email): return jsonify({'status': 'error', 'message': 'Admin access required'}), 403 try: from generate_usage_report import load_logs entries = load_logs() # All time users = {} total_platform_analyses = 0 total_platform_cost = 0 total_platform_input_tokens = 0 total_platform_output_tokens = 0 for e in entries: event_type = e.get('event', '') email = e.get('user_email', 'unknown') if not email or email == 'unknown': continue # Track user from any event type (login visits + analyses) if email not in users: users[email] = { 'name': e.get('user_name', ''), 'email': email, 'total_analyses': 0, 'total_checks': 0, 'clients': set(), 'last_active': '', 'total_cost': 0, 'input_tokens': 0, 'output_tokens': 0 } # Update last active from any event ts = e.get('timestamp', '') if ts > users[email]['last_active']: users[email]['last_active'] = ts # Update name if we have a better one name = e.get('user_name', '') if name and not users[email]['name']: users[email]['name'] = name # Only count analysis stats from analysis_complete events if event_type == 'analysis_complete': total_platform_analyses += 1 cost = e.get('total_cost_usd', e.get('estimated_cost_usd', 0)) total_platform_cost += cost tu = e.get('token_usage') or {} input_tokens = tu.get('total_prompt_tokens', 0) output_tokens = tu.get('total_completion_tokens', 0) total_platform_input_tokens += input_tokens total_platform_output_tokens += output_tokens users[email]['total_analyses'] += 1 users[email]['total_checks'] += e.get('checks_run', e.get('checks_completed', 0)) client = e.get('client', '') if client: users[email]['clients'].add(client) users[email]['total_cost'] += cost users[email]['input_tokens'] += input_tokens users[email]['output_tokens'] += output_tokens # Convert sets to lists for JSON serialization user_list = [] for u in users.values(): u['clients'] = sorted(list(u['clients'])) u['total_cost'] = round(u['total_cost'], 4) user_list.append(u) # Sort by last active descending user_list.sort(key=lambda x: x['last_active'], reverse=True) return jsonify({ 'status': 'success', 'users': user_list, 'total_unique_users': len(users), 'total_platform_analyses': total_platform_analyses, 'total_platform_cost': round(total_platform_cost, 4), 'total_platform_input_tokens': total_platform_input_tokens, 'total_platform_output_tokens': total_platform_output_tokens }) except Exception as e: print(f"Error getting admin users: {e}") import traceback traceback.print_exc() return jsonify({'status': 'error', 'message': str(e)}), 500 def _require_admin(): """Return (user_email, None) if admin, else (None, error_response).""" from user_access import is_admin user_email = getattr(g, 'user', {}).get('email', '') if not is_admin(user_email): return None, (jsonify({'status': 'error', 'message': 'Admin access required'}), 403) return user_email, None def _require_client_access(client_id): """ Validate the authed user can access `client_id`. Returns None on success, or a (response, status) tuple to short-circuit the endpoint. """ if not client_id: return None # endpoint didn't scope by client from user_access import get_user_clients user_email = getattr(g, 'user', {}).get('email', '') if not user_email: return jsonify({'status': 'error', 'message': 'Authentication required'}), 401 allowed = get_user_clients(user_email) if client_id not in allowed: return jsonify({ 'status': 'error', 'code': 'client_access_denied', 'message': f'You do not have access to client "{client_id}"' }), 403 return None @app.route('/api/admin/user_access', methods=['GET']) @auth.require_auth def get_user_access_list(): """List all users with their client grants. Joins login-log users with explicit grants.""" _, err = _require_admin() if err: return err try: from user_access import list_access_entries from generate_usage_report import load_logs access = list_access_entries() entries_by_email = {e['email'].lower(): e for e in access['entries']} # Enrich with login-log data (name + last_active) for users who have signed in login_users = {} for log_entry in load_logs(): email = log_entry.get('user_email') if not email: continue lower = email.lower() ts = log_entry.get('timestamp', '') if lower not in login_users or ts > login_users[lower].get('last_active', ''): login_users[lower] = { 'email': email, 'name': log_entry.get('user_name', ''), 'last_active': ts } # Merge: everyone in login logs + everyone with an explicit grant all_emails = set(login_users.keys()) | set(entries_by_email.keys()) merged = [] for lower in all_emails: entry = entries_by_email.get(lower, { 'email': login_users.get(lower, {}).get('email', lower), 'clients': access['default_clients'], 'is_admin': False, 'updated_at': None, 'updated_by': None }) login = login_users.get(lower, {}) merged.append({ **entry, 'name': login.get('name', ''), 'last_active': login.get('last_active', ''), 'has_explicit_grant': lower in entries_by_email }) merged.sort(key=lambda x: (not x.get('is_admin'), x.get('email', '').lower())) return jsonify({ 'status': 'success', 'default_clients': access['default_clients'], 'users': merged }) except Exception as e: print(f"Error listing user access: {e}") import traceback traceback.print_exc() return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/admin/user_access/', methods=['PUT']) @auth.require_auth def set_user_access(target_email): """Set the list of clients a user can see. Body: {"clients": ["general", ...]}""" actor_email, err = _require_admin() if err: return err data = request.get_json(silent=True) or {} clients = data.get('clients') if not isinstance(clients, list): return jsonify({'status': 'error', 'message': 'clients must be a list'}), 400 try: from user_access import set_user_clients from usage_tracker import log_access_change audit = set_user_clients(target_email, clients, actor_email) log_access_change(audit) return jsonify({'status': 'success', 'audit': audit}) except ValueError as ve: return jsonify({'status': 'error', 'message': str(ve)}), 400 except Exception as e: print(f"Error setting user access: {e}") return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/admin/user_access//promote', methods=['POST']) @auth.require_auth def promote_user_admin(target_email): """Promote a user to admin.""" actor_email, err = _require_admin() if err: return err try: from user_access import promote_admin from usage_tracker import log_access_change audit = promote_admin(target_email, actor_email) log_access_change(audit) return jsonify({'status': 'success', 'audit': audit}) except ValueError as ve: return jsonify({'status': 'error', 'message': str(ve)}), 400 except Exception as e: return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/admin/user_access//demote', methods=['POST']) @auth.require_auth def demote_user_admin(target_email): """Remove admin role from a user. Blocked if it would leave zero admins.""" actor_email, err = _require_admin() if err: return err try: from user_access import demote_admin from usage_tracker import log_access_change audit = demote_admin(target_email, actor_email) log_access_change(audit) return jsonify({'status': 'success', 'audit': audit}) except ValueError as ve: return jsonify({'status': 'error', 'message': str(ve)}), 400 except Exception as e: return jsonify({'status': 'error', 'message': str(e)}), 500 @app.route('/api/consolidate_reports', methods=['POST']) @auth.require_auth def consolidate_reports(): """Generate a consolidated HTML summary from multiple saved reports""" try: data = request.get_json() if not data or 'files' not in data: return jsonify({'status': 'error', 'message': 'files list required'}), 400 files_list = data['files'] if len(files_list) < 2: return jsonify({'status': 'error', 'message': 'At least 2 files required'}), 400 output_folder = app.config['OUTPUT_FOLDER'] reports = [] for file_info in files_list: filename = file_info.get('filename', '') client = file_info.get('client', '') file_path = os.path.join(output_folder, client, filename) if client else os.path.join(output_folder, filename) if not os.path.exists(file_path) or not file_path.endswith('.html'): continue with open(file_path, 'r') as f: html_content = f.read() # Extract embedded JSON from
    ...
    json_match = re.search(r'
    ]*>\s*
    (.*?)
    \s*
    ', html_content, re.DOTALL) if json_match: try: report_data = json.loads(json_match.group(1)) reports.append({ 'filename': filename, 'client': client, 'data': report_data }) except json.JSONDecodeError: continue if len(reports) < 2: return jsonify({'status': 'error', 'message': 'Could not extract data from enough reports (need at least 2)'}), 400 # Build consolidated HTML timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') # Summary table rows summary_rows = '' all_failed_checks = [] for r in reports: rd = r['data'] summary = rd.get('summary', {}) score = summary.get('overall_score', 0) # Use the grade from the report (respects profile-specific grading overrides) grade = summary.get('grade', rd.get('grade', determine_grade(score) if score else 'N/A')) profile_name = rd.get('profile_name', rd.get('profile', 'Unknown')) fname = rd.get('filename', r['filename']) pass_fail_color = '#2e7d32' if grade == 'Pass' else '#c62828' summary_rows += f''' {fname} {profile_name} {round(score, 1) if score else 'N/A'} {grade} ''' # Collect failed checks results = rd.get('results', {}) for check_name, check_data in results.items(): if check_data.get('status') != 'success': continue check_score = check_data.get('score', 0) if check_score is not None and check_score < 6: # Extract human-readable explanation from json_data json_data = check_data.get('json_data', {}) explanation = json_data.get('explanation', '') recommendations = json_data.get('recommendations', []) # Build issue summary from explanation + recommendations issue_parts = [] if explanation: issue_parts.append(explanation) if recommendations: rec_text = ' | '.join(r for r in recommendations if r) if rec_text: issue_parts.append(f'Recommendations: {rec_text}') issue_summary = ' '.join(issue_parts) if issue_parts else 'No details available' # Truncate if very long if len(issue_summary) > 500: issue_summary = issue_summary[:497] + '...' all_failed_checks.append({ 'filename': fname, 'check': check_data.get('display_name', check_name.replace('_', ' ').title()), 'score': check_score, 'summary': issue_summary }) # Failed checks rows failed_rows = '' if all_failed_checks: for fc in all_failed_checks: failed_rows += f''' {fc['filename']} {fc['check']} {fc['score']}/10 {fc['summary']} ''' else: failed_rows = 'No failed checks found across all reports!' consolidated_html = f''' Consolidated QC Report - {timestamp}

    Consolidated QC Report

    Generated: {timestamp} | Reports: {len(reports)}

    Summary

    {summary_rows}
    File Profile Score Result

    Failed Checks

    {failed_rows}
    File Check Score Issue Summary
    ''' # Save consolidated report consolidated_filename = f"consolidated_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html" # Determine client folder - use the first report's client or 'general' client_folder = reports[0].get('client', 'general') or 'general' client_output_dir = os.path.join(output_folder, client_folder) os.makedirs(client_output_dir, exist_ok=True) consolidated_path = os.path.join(client_output_dir, consolidated_filename) with open(consolidated_path, 'w') as f: f.write(consolidated_html) url = f"{request.environ.get('SCRIPT_NAME', '')}/output/{client_folder}/{consolidated_filename}" return jsonify({ 'status': 'success', 'filename': consolidated_filename, 'url': url }) except Exception as e: print(f"Error consolidating reports: {e}") import traceback traceback.print_exc() return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/qc-apps', methods=['GET']) def get_qc_apps(): """Get list of all QC applications with their metadata""" try: # User-friendly descriptions for each QC tool tool_descriptions = { 'accessibility': 'Evaluates color contrast, text legibility, and accessibility compliance to ensure content is usable by people with visual impairments and meets WCAG standards.', 'animation_transitions': 'Checks animation quality, smoothness, timing, and transitions in video or animated content to ensure professional motion design.', 'aspect_ratio': 'Verifies the image or video aspect ratio matches intended display formats (16:9, 4:3, 1:1, etc.) to prevent stretching or cropping issues.', 'background_contrast': 'Analyzes contrast between background and foreground elements to ensure text and important content remain clearly visible and readable.', 'brand_assets_visibility': 'Validates that required brand assets (logos, trademarks, brand elements) are present, visible, and properly positioned according to brand guidelines.', 'call_to_action': 'Evaluates the presence, clarity, and prominence of call-to-action (CTA) elements to ensure they effectively guide user behavior.', 'color_format': 'Checks color space and format (RGB, CMYK, etc.) to ensure correct color reproduction for the intended output medium (digital, print).', 'crop_marks': 'Verifies presence and correct placement of crop marks, bleeds, and trim marks for print-ready files to ensure accurate cutting and finishing.', 'curved_edges': 'Checks for smooth, professional curved edges and rounded corners in design elements, ensuring consistent styling across the creative.', 'curved_edges_digital': 'Evaluates curved edges specifically for digital displays, checking for anti-aliasing and smooth rendering on screens.', 'curved_edges_print': 'Validates curved edges for print output, ensuring clean vector paths and proper resolution for physical reproduction.', 'dark_mode_legibility': 'Tests content legibility in dark mode or low-light environments, ensuring text remains readable with appropriate contrast.', 'element_alignment': 'Checks alignment of design elements (text, images, buttons) to ensure professional layout and visual consistency.', 'face_gaze_direction': 'Analyzes the direction people in images are looking to ensure gaze guides viewer attention toward key content or CTAs.', 'face_visibility': 'Checks for clear, well-lit, and unobscured faces in imagery, ensuring people are properly featured when human connection is important.', 'file_naming': 'Validates file naming conventions for proper organization, version control, and asset management in production workflows.', 'image_resolution': 'Verifies image resolution and quality to ensure crisp, professional output without pixelation or blur at intended display size.', 'imperative_verb': 'Checks that CTAs use strong imperative verbs ("Get," "Start," "Join") to create clear, action-oriented messaging.', 'inclusive': 'Evaluates representation and inclusivity in imagery and messaging to ensure diverse, respectful, and welcoming content.', 'layer_organization': 'Reviews file layer structure and organization for clean, maintainable design files that other team members can easily work with.', 'logo_visibility': 'Ensures brand logos are present, properly sized, clearly visible, and positioned according to brand guidelines.', 'lowercase_text': 'Checks for improper use of all-lowercase text where standard capitalization would improve readability and professionalism.', 'new_visibility': 'Validates presence and visibility of "NEW" badges or indicators when promoting new products, features, or offerings.', 'print_bleed': 'Verifies correct bleed area setup for print files to prevent white edges and ensure full-coverage printing to the trim edge.', 'product_visibility': 'Ensures product imagery is clear, well-lit, properly featured, and showcases key product attributes effectively.', 'responsiveness': 'Checks how design elements adapt across different screen sizes and devices to ensure consistent experience on mobile, tablet, and desktop.', 'safety_area': 'Validates that important content (text, logos, CTAs) stays within safe zones to avoid cutoff on various display formats and aspect ratios.', 'supporting_images': 'Evaluates quality, relevance, and composition of supporting images to ensure they enhance the message and maintain visual interest.', 'text_readability': 'Analyzes text size, font choice, spacing, and contrast to ensure copy is easily readable across different viewing conditions.', 'visual_elements_count': 'Checks for appropriate number of visual elements to avoid cluttered or empty designs, ensuring balanced composition.', 'visual_hierarchy': 'Evaluates the logical flow and priority of visual elements to guide viewer attention from most to least important information.', 'visuals_left_text_right': 'Validates proper layout convention (visuals left, text right) for left-to-right reading audiences to optimize information flow.', 'word_count': 'Checks text length to ensure messaging is concise and appropriate for the medium, avoiding over-crowding or insufficient information.' } qc_apps_data = {} # Build QC apps data from loaded QC checks for check_name in qc_apps.keys(): # Get display name display_name = qc_apps[check_name].get('display_name', check_name.replace('_', ' ').title()) # Get description from mapping, or fall back to auto-generated description description = tool_descriptions.get(check_name) if not description: # Fallback: Extract from prompt if custom description not available prompt = qc_apps[check_name].get('prompt', '') description_lines = prompt.split('\n\n') description = description_lines[0][:300] if description_lines else f'{display_name} quality check' # Default weights - can be customized qc_apps_data[check_name] = { 'display_name': display_name, 'enabled': True, 'required_weight': 0.1, # Default weight 'optional_weight': 0.0, 'description': description, 'full_prompt': qc_apps[check_name].get('prompt', '') # Include full prompt for detailed view } return jsonify({ 'status': 'success', 'qc_apps': qc_apps_data, 'total_apps': len(qc_apps_data) }) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/generate_report', methods=['POST']) def api_generate_report(): """Generate HTML report from existing session data""" try: # Get parameters session_id = request.form.get('session_id') if not session_id: return jsonify({'status': 'error', 'message': 'Session ID required'}), 400 session_folder = os.path.join(app.config['UPLOAD_FOLDER'], session_id) if not os.path.exists(session_folder): return jsonify({'status': 'error', 'message': 'Session not found'}), 404 # Find the analysis result file result_files = [f for f in os.listdir(session_folder) if f.endswith('_results.json')] if not result_files: return jsonify({'status': 'error', 'message': 'No analysis results found for session'}), 404 # Load the most recent results file result_file = sorted(result_files)[-1] result_path = os.path.join(session_folder, result_file) with open(result_path, 'r') as f: report_data = json.load(f) # Find the original file files = [f for f in os.listdir(session_folder) if not f.endswith('_results.json')] if not files: return jsonify({'status': 'error', 'message': 'Original file not found'}), 404 original_file = files[0] file_path = os.path.join(session_folder, original_file) # Generate HTML using existing function html_content = generate_html_content(report_data, original_file, file_path) return html_content, 200, {'Content-Type': 'text/html'} except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 # Authentication endpoints @app.route('/auth/login', methods=['POST']) def auth_login(): """Process authentication tokens from MSAL popup.""" try: # Get token from request data = request.get_json() if not data or 'token' not in data: return jsonify({ 'success': False, 'error': 'Token is required', 'authenticated': False }), 400 token = data['token'] # Validate and set authentication token - returns response with cookie set return auth.set_auth_token(token) except Exception as e: return jsonify({ 'success': False, 'error': f'Authentication failed: {str(e)}', 'authenticated': False }), 500 @app.route('/auth/logout', methods=['POST']) def auth_logout(): """Clear authentication session.""" try: return auth.clear_auth_token() except Exception as e: return jsonify({ 'success': False, 'error': f'Logout failed: {str(e)}' }), 500 @app.route('/auth/status', methods=['GET']) def auth_status(): """Get current authentication status and log user visit.""" try: status = auth.get_auth_status() # Log user visit if authenticated if status.get('authenticated') and status.get('user'): try: from usage_tracker import log_user_login log_user_login(status['user']) except Exception as log_err: print(f"Warning: Failed to log user visit: {log_err}") return jsonify(status) except Exception as e: return jsonify({ 'authenticated': False, 'error': f'Status check failed: {str(e)}' }), 500 # --------------------------------------------------------------------------- # Box.com OAuth (per-creator user authentication for automation folders). # Flow: # 1. User (signed into the QC tool via MSAL) hits GET /auth/box/login. # We generate a JWT-signed state encoding their email and 302 to Box. # 2. Box redirects back to GET /auth/box/callback with ?code=...&state=... # We verify the state, exchange the code for tokens, fetch the Box user, # and persist via box_tokens.save_tokens. # Tokens are then used by background workers (PR3) and on-demand calls. # # The Box redirect URI is computed per-request (not a fixed env var) so the # same code works on laptop, dev, and prod. When BOX_REDIRECT_URI is set in # the env it wins (escape hatch). # --------------------------------------------------------------------------- def _box_redirect_uri(): """ Compute the public OAuth callback URL. Resolution order: 1. BOX_REDIRECT_URI env var if set (escape hatch / unusual deploys). 2. X-Forwarded-Host header if Apache sets it (some setups do). 3. Otherwise, infer from request.host: anything that isn't localhost is treated as being behind the Apache proxy at /ai_qc/ over HTTPS (this matches optical-dev / optical-prod where Apache uses ProxyPreserveHost so request.host is already the public hostname, but the backend connection is plain http and Flask sees no prefix). """ explicit = (os.environ.get('BOX_REDIRECT_URI') or '').strip() if explicit: return explicit forwarded_host = request.headers.get('X-Forwarded-Host') if forwarded_host: host = forwarded_host.split(',')[0].strip() proto = request.headers.get('X-Forwarded-Proto', 'https') return f'{proto}://{host}/ai_qc/auth/box/callback' host = (request.host or '').strip() is_local = (not host) or 'localhost' in host or host.startswith('127.0.0.1') if not is_local: # Behind the optical-dev / optical-prod Apache proxy, mounted at /ai_qc/. return f'https://{host}/ai_qc/auth/box/callback' return f'{request.scheme}://{host}/auth/box/callback' # ---------- Box JWT service-account: webhook ingestion + workflow helper ---------- # Bounded in-memory dedup for box-delivery-id. Box uses at-least-once delivery; # a 200 from us tells it not to retry, so duplicates are rare. The maxlen keeps # memory tiny while still catching the common retry window. _box_recent_deliveries = collections.deque(maxlen=500) _box_recent_deliveries_set = set() _box_recent_deliveries_lock = threading.Lock() # Extensions accepted from a Box upload. Keeps the webhook from kicking off QC # on Word docs, ZIPs, etc. Mirrors what technical_check.inspect knows how to read. _BOX_QC_EXTS = { '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp', '.pdf', '.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm', } def _box_remember_delivery(delivery_id): """Return True if first time seeing this delivery_id; False if duplicate.""" with _box_recent_deliveries_lock: if delivery_id in _box_recent_deliveries_set: return False _box_recent_deliveries.append(delivery_id) _box_recent_deliveries_set.add(delivery_id) if len(_box_recent_deliveries_set) > _box_recent_deliveries.maxlen: _box_recent_deliveries_set.clear() _box_recent_deliveries_set.update(_box_recent_deliveries) return True def _run_box_triggered_analysis(client_id, profile_id, file_id, filename, session_id): """Background worker for the Box webhook flow. Downloads the Box file, runs the technical pre-flight + LLM check pipeline, writes the HTML report to disk under output//, and uploads the same report back to the client's box_reports_folder_id (or box_folder_id as a fallback). Uses a synthetic 'box_webhook' user for usage tracking. Skips media-plan and localization context — those are user-UI concepts that don't have a meaningful source in a webhook-triggered run. """ try: from client_config import get_all_clients as _get_all_clients client_cfg = _get_all_clients().get(client_id, {}) session_folder = os.path.join(app.config['UPLOAD_FOLDER'], session_id) os.makedirs(session_folder, exist_ok=True) file_path = os.path.join(session_folder, filename) # 1. Download the asset from Box. box_jwt_client.download_file(file_id, file_path) print(f'Box webhook: downloaded {filename} → {file_path}') # 2. Technical pre-flight (same as user-uploaded flow). technical_report = technical_inspect(file_path) # 3. Init progress tracker. progress_tracker[session_id] = { 'total_checks': 25, 'completed_checks': 0, 'current_check': 'Initializing', 'current_check_display': 'Box-triggered analysis', 'stage': 'setup', 'percentage': 0, 'session_id': session_id, 'status': 'started', 'source': 'box_webhook', 'box_file_id': file_id, 'technical_report': technical_report, } # 4. Log analysis start with a synthetic system user. try: from usage_tracker import log_analysis_start log_analysis_start( session_id, client_id, profile_id, {'user_id': 'box_webhook', 'email': 'box_webhook@system', 'name': 'Box Webhook'}, {'filename': filename, 'size': os.path.getsize(file_path)}, ) except Exception as log_err: print(f'WARNING: usage log_analysis_start failed: {log_err}') # 5. Resolve profile + enabled checks. profile_config = get_profile(profile_id) if not profile_config: raise Exception(f'Profile {profile_id} not found') enabled_checks = [c for c in profile_config.get_enabled_checks() if c in qc_apps] if not enabled_checks: raise Exception(f'No enabled checks for profile {profile_id}') profile_weights = profile_config.get_check_weights() progress_tracker[session_id].update({ 'total_checks': len(enabled_checks), 'stage': 'qc_analysis', 'percentage': 10, }) # 6. Run check batches (no media plan / localization / OCR in webhook MVP). check_results = process_checks_in_batches( enabled_checks, qc_apps, profile_config, profile_weights, file_path, None, brand_db, progress_tracker, session_id, batch_size=15, media_plan_context=None, ocr_context=None, ) # 7. Score aggregation. total_weighted_score = 0 total_weight = 0 completed_checks = 0 failed_checks = 0 for _check_name, result in check_results.items(): w = result.get('weight', 0.1) total_weight += w if result.get('status') == 'success': completed_checks += 1 s = result.get('score') if s is not None: total_weighted_score += s * w else: failed_checks += 1 if total_weight >= 10.0: overall_score = min(total_weighted_score, 100) else: overall_score = min(total_weighted_score * 10, 100) # 8. Result envelope matching the user-flow shape. result_data = { 'status': 'success', 'session_id': session_id, 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'filename': filename, 'profile': profile_id, 'profile_id': profile_id, 'profile_name': profile_config.name, 'model': 'Profile-based selection', 'results': check_results, 'profile_selection': { 'selected_profile': profile_id, 'profile_source': 'box_webhook', 'brand': client_id, 'format_suffix': profile_id, 'reference_asset': None, 'reference_asset_used': False, }, 'qc_analysis': { 'profile_used': profile_id, 'total_checks': len(enabled_checks), 'completed_checks': completed_checks, 'failed_checks': failed_checks, 'check_results': check_results, }, 'summary': { 'overall_score': round(overall_score, 1), 'profile': profile_config.name, 'checks_count': completed_checks, 'total_checks': len(enabled_checks), 'total_weighted_score': total_weighted_score, 'total_weight': total_weight, 'grade': determine_grade(overall_score), }, 'technical_report': progress_tracker[session_id].get('technical_report', {}), 'source': 'box_webhook', 'box_file_id': file_id, } # Strict-grade override applies the same way for webhook-triggered runs. if getattr(profile_config, 'strict_grade', False): for _cn, cd in check_results.items(): if cd.get('status') == 'success': cs = cd.get('score', 0) if cs is not None and cs < 6: result_data['summary']['grade'] = 'Fail' break # 9. Write HTML report to disk so the UI's saved-files listing shows it. report_filename = f'QC_Report_{session_id}_{os.path.splitext(filename)[0]}.html' client_folder = ensure_client_output_folder(client_id) report_path = os.path.join(client_folder, report_filename) with open(report_path, 'w', encoding='utf-8') as f: f.write(generate_comprehensive_html_report(result_data, filename, file_path=file_path)) result_data['output_file'] = { 'path': report_path, 'filename': report_filename, 'url': f'/output/{client_id}/{report_filename}', } # 10. Upload the report back to Box. Prefer the dedicated reports folder if # configured; fall back to the same folder the source lived in. reports_folder = client_cfg.get('box_reports_folder_id') or client_cfg.get('box_folder_id') report_uploaded_ok = False if reports_folder: try: uploaded = box_jwt_client.upload_file(report_path, str(reports_folder), name=report_filename) result_data['box_report_upload'] = { 'box_file_id': uploaded.get('id'), 'box_file_name': uploaded.get('name'), 'box_folder_id': str(reports_folder), } report_uploaded_ok = True print(f"Box webhook: uploaded report {report_filename} → folder {reports_folder} (id={uploaded.get('id')})") except Exception as up_err: print(f'WARNING: failed to upload report to Box: {up_err}') result_data['box_report_upload_error'] = str(up_err) else: print(f'Box webhook: no box_reports_folder_id (or box_folder_id) on client {client_id}; report stays local only') # 10b. Move the source file out of INCOMING into a `_PROCESSED` subfolder so the # next upload of the same filename triggers a fresh FILE.UPLOADED event (Box's # V2 webhook doesn't fire on same-name version replacements; freeing the name # is the cleanest workaround). Only runs if the report made it back to Box — # if upload failed, we want the source to stay so the user can retry by simply # re-uploading. Failures here are non-fatal: log, record, continue. source_folder_id = client_cfg.get('box_folder_id') if report_uploaded_ok and source_folder_id: try: processed_folder_id = box_jwt_client.find_or_create_subfolder( str(source_folder_id), '_PROCESSED' ) processed_name = f'{session_id}_{filename}' moved = box_jwt_client.move_file(file_id, processed_folder_id, new_name=processed_name) result_data['box_source_moved'] = { 'box_file_id': moved.get('id'), 'box_folder_id': processed_folder_id, 'box_file_name': moved.get('name'), } print(f'Box webhook: moved source → _PROCESSED/{processed_name} (folder {processed_folder_id})') except Exception as mv_err: print(f'WARNING: failed to move source file to _PROCESSED: {mv_err}') result_data['box_source_move_error'] = str(mv_err) # 11. Mark complete on progress_tracker for /api/progress consumers. progress_tracker[session_id]['result'] = result_data progress_tracker[session_id]['status'] = 'completed' progress_tracker[session_id]['stage'] = 'complete' progress_tracker[session_id]['percentage'] = 100 # 12. Usage tracker completion event. try: from usage_tracker import log_analysis_complete log_analysis_complete( session_id, client_id, profile_id, {'user_id': 'box_webhook', 'email': 'box_webhook@system', 'name': 'Box Webhook'}, {'checks_completed': completed_checks, 'overall_score': overall_score, 'status': 'success', 'source': 'box_webhook'}, ) except Exception as log_err: print(f'WARNING: usage log_analysis_complete failed: {log_err}') print(f'Box webhook: analysis complete for session {session_id}, score {overall_score}') except Exception as e: print(f'ERROR in Box-triggered analysis (session {session_id}): {e}') import traceback traceback.print_exc() if session_id in progress_tracker: progress_tracker[session_id]['status'] = 'error' progress_tracker[session_id]['stage'] = 'error' progress_tracker[session_id]['error'] = str(e) @app.route('/api/box/webhook', methods=['POST']) def box_webhook(): """Receive a Box V2 webhook. Authenticated by HMAC signature on every request. Box expects a 200 within ~10 seconds. We verify the signature, ack, and run the analysis on a background thread. """ raw_body = request.get_data(cache=True) headers = {k.lower(): v for k, v in request.headers.items()} primary_key = os.environ.get('BOX_WEBHOOK_PRIMARY_KEY') secondary_key = os.environ.get('BOX_WEBHOOK_SECONDARY_KEY') if not primary_key and not secondary_key: print('Box webhook: no signing keys in env (set BOX_WEBHOOK_PRIMARY_KEY); refusing all deliveries') return jsonify({'status': 'error', 'message': 'webhook signing not configured'}), 503 if not box_jwt_client.verify_webhook_signature(raw_body, headers, primary_key, secondary_key): print('Box webhook: signature verification failed') return jsonify({'status': 'error', 'message': 'invalid signature'}), 401 delivery_id = headers.get('box-delivery-id', '') if delivery_id and not _box_remember_delivery(delivery_id): return jsonify({'status': 'ok', 'message': 'duplicate'}), 200 try: payload = json.loads(raw_body.decode('utf-8')) except Exception: return jsonify({'status': 'error', 'message': 'invalid JSON'}), 400 trigger = payload.get('trigger', '') if trigger != 'FILE.UPLOADED': return jsonify({'status': 'ok', 'message': f'ignored trigger {trigger}'}), 200 source = payload.get('source') or {} if source.get('type') != 'file': return jsonify({'status': 'ok', 'message': 'not a file event'}), 200 file_id = str(source.get('id', '')) filename = source.get('name', '') parent = source.get('parent') or {} parent_folder_id = str(parent.get('id', '')) if not file_id or not parent_folder_id or not filename: return jsonify({'status': 'error', 'message': 'malformed payload'}), 400 ext = os.path.splitext(filename)[1].lower() if ext not in _BOX_QC_EXTS: print(f'Box webhook: skipping non-QC extension {ext} ({filename})') return jsonify({'status': 'ok', 'message': f'unsupported extension {ext}'}), 200 from client_config import get_client_by_box_folder, get_all_clients, get_default_profile client_id = get_client_by_box_folder(parent_folder_id) if not client_id: print(f'Box webhook: no client configured for Box folder {parent_folder_id}') return jsonify({'status': 'ok', 'message': 'no client mapping'}), 200 client_cfg = get_all_clients().get(client_id, {}) profile_id = get_default_profile(client_id) or (client_cfg.get('profiles') or ['static_general'])[0] session_id = datetime.now().strftime('%Y%m%d_%H%M%S') print(f'Box webhook: dispatching session={session_id} client={client_id} profile={profile_id} file_id={file_id} name={filename}') threading.Thread( target=_run_box_triggered_analysis, args=(client_id, profile_id, file_id, filename, session_id), daemon=True, ).start() return jsonify({ 'status': 'ok', 'session_id': session_id, 'client_id': client_id, 'profile_id': profile_id, }), 200 @app.route('/auth/box/login', methods=['GET']) @auth.require_auth def box_login(): """Kick off the Box OAuth flow for the signed-in user.""" import box_client if not box_client.is_configured(): return jsonify({'status': 'error', 'message': 'Box integration is not configured (BOX_CLIENT_ID / BOX_CLIENT_SECRET missing).'}), 503 user_email = getattr(g, 'user', {}).get('email', '') if not user_email: return jsonify({'status': 'error', 'message': 'Authentication required'}), 401 redirect_uri = _box_redirect_uri() state = box_client.make_state(user_email) return redirect(box_client.build_authorize_url(state, redirect_uri)) @app.route('/auth/box/callback', methods=['GET']) def box_callback(): """Handle Box's redirect back after the user approves the app.""" import box_client import box_tokens error = request.args.get('error') if error: # User cancelled, or Box rejected the request — surface a friendly page. description = request.args.get('error_description', '') return _box_callback_html('Box connection cancelled or failed.', detail=f'{error}: {description}', success=False), 400 code = request.args.get('code') state = request.args.get('state') if not code or not state: return _box_callback_html('Missing code or state in callback.', success=False), 400 user_email = box_client.verify_state(state) if not user_email: return _box_callback_html('Invalid or expired state — please start the connection from the QC tool again.', success=False), 400 redirect_uri = _box_redirect_uri() try: tokens = box_client.exchange_code_for_tokens(code, redirect_uri) except Exception as e: print(f'[box_callback] code exchange failed: {e}') return _box_callback_html('Could not complete the Box token exchange.', detail=str(e), success=False), 502 box_user = None try: box_user = box_client.get_box_user(tokens['access_token']) except Exception as e: # Non-fatal — we still got tokens, just couldn't read the Box identity. print(f'[box_callback] /users/me failed: {e}') box_tokens.save_tokens(user_email, tokens, box_user=box_user) return _box_callback_html('Box connected.', detail=(box_user or {}).get('login') or user_email, success=True) def _box_callback_html(message, detail='', success=True): """Tiny self-contained HTML response for the OAuth callback.""" color = '#28a745' if success else '#dc3545' detail_html = f'

    {detail}

    ' if detail else '' body = f""" Box connection

    {message}

    {detail_html}

    Return to AI QC →

    """ return Response(body, mimetype='text/html') @app.route('/api/box/status', methods=['GET']) @auth.require_auth def box_status(): """Whether the current user has connected their Box account.""" import box_client import box_tokens user_email = getattr(g, 'user', {}).get('email', '') record = box_tokens.get_tokens(user_email) if not record: return jsonify({ 'status': 'success', 'connected': False, 'configured': box_client.is_configured(), }) return jsonify({ 'status': 'success', 'connected': True, 'configured': True, 'box_user_login': record.get('box_user_login'), 'box_user_name': record.get('box_user_name'), 'connected_at': record.get('connected_at'), 'access_token_expires_at': record.get('access_token_expires_at'), }) @app.route('/api/box/disconnect', methods=['POST']) @auth.require_auth def box_disconnect(): """Forget the current user's Box tokens. Best-effort revoke at Box too.""" import box_client import box_tokens user_email = getattr(g, 'user', {}).get('email', '') record = box_tokens.get_tokens(user_email) if record and record.get('refresh_token'): try: box_client.revoke_tokens(record['refresh_token']) except Exception as e: print(f'[box_disconnect] revoke failed for {user_email}: {e}') box_tokens.delete_tokens(user_email) return jsonify({'status': 'success', 'message': 'Box disconnected'}) @app.route('/api/box/test_folder', methods=['GET']) @auth.require_auth def box_test_folder(): """ Smoke-test endpoint: list a Box folder's items using the current user's stored tokens. Used to prove the OAuth round-trip works end-to-end before we wire the watcher in PR3. Folder ID '0' is the user's All Files root. """ import box_client user_email = getattr(g, 'user', {}).get('email', '') folder_id = (request.args.get('folder_id') or '0').strip() access_token = box_client.get_valid_access_token(user_email) if not access_token: return jsonify({'status': 'error', 'code': 'box_not_connected', 'message': 'Connect your Box account first via /auth/box/login'}), 401 try: result = box_client.list_folder_items(access_token, folder_id) except Exception as e: print(f'[box_test_folder] list failed: {e}') return jsonify({'status': 'error', 'message': str(e)}), 502 items = [ { 'id': it.get('id'), 'name': it.get('name'), 'type': it.get('type'), 'size': it.get('size'), 'created_at': it.get('created_at'), 'created_by_login': (it.get('created_by') or {}).get('login'), } for it in (result.get('entries') or []) ] return jsonify({ 'status': 'success', 'folder_id': folder_id, 'total_count': result.get('total_count'), 'items': items, }) @app.route('/api/usage/stats', methods=['GET']) @auth.require_auth def get_usage_stats_endpoint(): """Get usage statistics (admin endpoint)""" from usage_tracker import get_usage_stats # Get query parameters start_date = request.args.get('start_date') end_date = request.args.get('end_date') client = request.args.get('client') user_id = request.args.get('user_id') try: stats = get_usage_stats( start_date=start_date, end_date=end_date, client=client, user_id=user_id ) return jsonify({ 'status': 'success', 'stats': stats }) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/usage/export', methods=['GET']) @auth.require_auth def export_usage_logs_endpoint(): """Export usage logs as CSV (admin endpoint)""" import csv import io from usage_tracker import USAGE_LOG_DIR # Get date range start_date = request.args.get('start_date') end_date = request.args.get('end_date') try: # Collect all log entries import glob log_files = glob.glob(os.path.join(USAGE_LOG_DIR, '*.jsonl')) # Create CSV output = io.StringIO() fieldnames = ['timestamp', 'event', 'session_id', 'client', 'profile', 'user_email', 'checks_completed', 'overall_score', 'estimated_cost_usd'] writer = csv.DictWriter(output, fieldnames=fieldnames) writer.writeheader() for log_file in sorted(log_files): with open(log_file, 'r') as f: for line in f: try: entry = json.loads(line.strip()) if entry.get('event') == 'analysis_complete': writer.writerow({ 'timestamp': entry.get('timestamp'), 'event': entry.get('event'), 'session_id': entry.get('session_id'), 'client': entry.get('client'), 'profile': entry.get('profile'), 'user_email': entry.get('user_email'), 'checks_completed': entry.get('checks_completed'), 'overall_score': entry.get('overall_score'), 'estimated_cost_usd': entry.get('estimated_cost_usd') }) except: continue # Return as downloadable CSV output.seek(0) return Response( output.getvalue(), mimetype='text/csv', headers={'Content-Disposition': 'attachment; filename=usage_export.csv'} ) except Exception as e: return jsonify({ 'status': 'error', 'message': str(e) }), 500 @app.route('/api/debug/test_file_upload', methods=['POST']) @auth.require_auth def debug_test_file_upload(): """Debug endpoint to test file upload handling""" try: # Check if file is in request if 'file' not in request.files: return jsonify({'status': 'error', 'message': 'No file part', 'files_in_request': list(request.files.keys())}), 400 file = request.files['file'] # Check if file was selected if file.filename == '': return jsonify({'status': 'error', 'message': 'No selected file'}), 400 # Get parameters profile = request.form.get('profile', 'general').lower() # Create session ID session_id = datetime.now().strftime('%Y%m%d_%H%M%S') # Check upload folder configuration upload_folder = app.config.get('UPLOAD_FOLDER', 'uploads') return jsonify({ 'status': 'success', 'message': 'File upload test successful', 'session_id': session_id, 'filename': file.filename, 'profile': profile, 'upload_folder': upload_folder, 'upload_folder_exists': os.path.exists(upload_folder), 'form_data': dict(request.form), 'files': list(request.files.keys()) }) except Exception as e: import traceback return jsonify({ 'status': 'error', 'message': str(e), 'traceback': traceback.format_exc() }), 500 # Initialize application if not qc_apps: load_qc_apps() print(f"Initialized app with {len(qc_apps)} QC apps") print(f"Brand Guidelines DB initialized: {len(brand_db.get_all_brands())} brands") # Backfill: process any existing unprocessed PDF guidelines def _backfill_unprocessed_pdfs(): import threading unprocessed = [] for file_id, record in brand_db.db.get("files", {}).items(): if record.get("file_type") == ".pdf" and not record.get("processed"): stored_path = record.get("stored_path", "") if stored_path and os.path.exists(stored_path): unprocessed.append((file_id, stored_path, record.get("brand_name", ""), str(brand_db.files_dir))) if unprocessed: print(f"Found {len(unprocessed)} unprocessed PDF guidelines - starting background processing") def _process_all(): for fid, spath, bname, fdir in unprocessed: try: from pdf_processor import process_pdf_guideline result = process_pdf_guideline(spath, fid, bname, fdir) brand_db.update_file_record(fid, result) print(f"Backfill complete for {fid}") except Exception as e: print(f"Backfill failed for {fid}: {e}") brand_db.update_file_record(fid, {'processed': 'error', 'processing_error': str(e)}) threading.Thread(target=_process_all, daemon=True).start() _backfill_unprocessed_pdfs() # When run directly if __name__ == "__main__": import argparse # Get default port from environment, fallback to 7183 default_port = int(os.environ.get('PORT', 7183)) default_debug = debug_mode # Use the debug setting from environment parser = argparse.ArgumentParser(description='Run Visual AI QC API server') parser.add_argument('--host', type=str, default='0.0.0.0', help='Host to bind to') parser.add_argument('--port', type=int, default=default_port, help=f'Port to listen on (default: {default_port} from environment)') parser.add_argument('--debug', action='store_true', default=default_debug, help=f'Run in debug mode (default: {default_debug} from environment)') args = parser.parse_args() print(f"Environment: {current_environment}") print(f"Starting Flask API server on {args.host}:{args.port}") print(f"Debug mode: {args.debug}") app.run(host=args.host, port=args.port, debug=args.debug)