Initial commit — AC Tool unified application

Merges ac-helper (PHP Activation Calendar) and brief-extractor (Python AI)
into a single Docker app with React/TypeScript frontend.

Features:
- Brief upload → AI extraction → review → Activation Calendar import
- Handsontable v17 spreadsheet with dependent dropdowns (148 categories)
- AI natural language commands via Gemini (YOLO mode, voice input)
- Azure AD MSAL SPA PKCE authentication, user roles (user/admin)
- CSV Activation Calendar export
- Real-time WebSocket job progress
- Admin: user management, dropdown Excel upload
- Multi-stage Dockerfile, docker-compose, nginx proxy instructions

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-03-23 13:24:46 +00:00
commit 72c50b2c92
107 changed files with 15547 additions and 0 deletions

34
.gitignore vendored Normal file
View file

@ -0,0 +1,34 @@
# Python
__pycache__/
*.py[cod]
*.so
.env
.env.*
venv/
.venv/
*.egg-info/
dist/
build/
# Node / Frontend
node_modules/
frontend/dist/
frontend/.vite/
# Data (user data, uploads — never commit)
data/uploads/
data/outputs/
data/sheets/
data/*.json
# Logs
*.log
# OS
.DS_Store
Thumbs.db
# IDE
.idea/
.vscode/
*.swp

40
Dockerfile Normal file
View file

@ -0,0 +1,40 @@
# ── Stage 1: Build React frontend ────────────────────────────────────────────
FROM node:22-alpine AS frontend-builder
WORKDIR /app/frontend
COPY frontend/package*.json ./
RUN npm ci
COPY frontend/ ./
RUN npm run build
# ── Stage 2: Python runtime ───────────────────────────────────────────────────
FROM python:3.11-slim
# System deps for document processing
RUN apt-get update && apt-get install -y --no-install-recommends \
libmagic1 \
libreoffice-core \
libreoffice-writer \
libreoffice-impress \
poppler-utils \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install Python dependencies
COPY backend/requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
# Copy backend source
COPY backend/ ./
# Copy built frontend into static directory
COPY --from=frontend-builder /app/frontend/dist ./frontend/dist
# Create data directory (will be mounted as volume in production)
RUN mkdir -p data/uploads data/outputs data/sheets
EXPOSE 8000
CMD ["python", "-m", "hypercorn", "server.app:create_app()", "--bind", "0.0.0.0:8000", "--worker-class", "asyncio"]

14
backend/.gitignore vendored Normal file
View file

@ -0,0 +1,14 @@
.venv/
__pycache__/
*.pyc
*.pyo
.env
# Data files — never commit user data
data/uploads/*
data/outputs/*
data/sheets/*
data/*.json
!data/uploads/.gitkeep
!data/outputs/.gitkeep
!data/sheets/.gitkeep

0
backend/core/__init__.py Normal file
View file

148
backend/core/config.py Executable file
View file

@ -0,0 +1,148 @@
"""
Configuration management for Enhanced Brief Processing System
Loads environment variables and provides configuration validation
"""
import os
from typing import List, Dict, Any, Optional
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
class Config:
"""Centralized configuration management"""
# API Keys
OPENAI_API_KEY: str = os.getenv('OPENAI_API_KEY', '')
ANTHROPIC_API_KEY: str = os.getenv('ANTHROPIC_API_KEY', '')
GOOGLE_API_KEY: str = os.getenv('GOOGLE_API_KEY', '')
LLAMACLOUD_API_KEY: str = os.getenv('LLAMACLOUD_API_KEY', '')
# OpenAI Configuration
OPENAI_MODEL: str = os.getenv('OPENAI_MODEL', 'gpt-5.1')
OPENAI_REASONING_EFFORT: str = os.getenv('OPENAI_REASONING_EFFORT', 'medium')
OPENAI_TIMEOUT: int = int(os.getenv('OPENAI_TIMEOUT', '3600'))
OPENAI_MAX_RETRIES: int = int(os.getenv('OPENAI_MAX_RETRIES', '2'))
# Google Configuration
GOOGLE_MODEL: str = os.getenv('GOOGLE_MODEL', 'gemini-3.1-pro-preview')
GOOGLE_TEMPERATURE: float = float(os.getenv('GOOGLE_TEMPERATURE', '0.1'))
GOOGLE_MAX_OUTPUT_TOKENS: int = int(os.getenv('GOOGLE_MAX_OUTPUT_TOKENS', '8192'))
GOOGLE_THINKING_BUDGET: int = int(os.getenv('GOOGLE_THINKING_BUDGET', '12000'))
GOOGLE_TIMEOUT: int = int(os.getenv('GOOGLE_TIMEOUT', '300'))
# Anthropic Configuration
ANTHROPIC_MODEL_OPUS: str = os.getenv('ANTHROPIC_MODEL_OPUS', 'claude-opus-4-5-20251101')
ANTHROPIC_MODEL_SONNET: str = os.getenv('ANTHROPIC_MODEL_SONNET', 'claude-sonnet-4-5-20250929')
ANTHROPIC_TEMPERATURE: float = float(os.getenv('ANTHROPIC_TEMPERATURE', '0.1'))
ANTHROPIC_MAX_TOKENS: int = int(os.getenv('ANTHROPIC_MAX_TOKENS', '32000'))
ANTHROPIC_THINKING_BUDGET: int = int(os.getenv('ANTHROPIC_THINKING_BUDGET', '12000'))
ANTHROPIC_TIMEOUT: int = int(os.getenv('ANTHROPIC_TIMEOUT', '300'))
# Processing Configuration
DEFAULT_PRIMARY_MODELS: str = os.getenv('DEFAULT_PRIMARY_MODELS', 'openai-gpt51,anthropic-sonnet45,google-gemini31')
DEFAULT_CONSOLIDATION_MODEL: str = os.getenv('DEFAULT_CONSOLIDATION_MODEL', 'openai-gpt51')
MINIMUM_SUCCESS_THRESHOLD: int = int(os.getenv('MINIMUM_SUCCESS_THRESHOLD', '1'))
ENABLE_COST_ESTIMATION: bool = os.getenv('ENABLE_COST_ESTIMATION', 'true').lower() == 'true'
MAX_PROCESSING_COST_USD: float = float(os.getenv('MAX_PROCESSING_COST_USD', '10.00'))
# Model Pricing (per 1M tokens)
PRICING = {
'openai-gpt51': {
'input': 1.25,
'cached_input': 0.625,
'output': 10.00
},
'anthropic-opus45': {
'input': 5.00,
'output': 25.00
},
'anthropic-sonnet45': {
'input': 3.00,
'output': 15.00
},
'google-gemini31': {
'input': 1.25,
'output': 5.00
}
}
# Model mappings for CLI compatibility
MODEL_MAPPINGS = {
'openai-gpt51': ('openai', OPENAI_MODEL),
'anthropic-opus45': ('anthropic', ANTHROPIC_MODEL_OPUS),
'anthropic-sonnet45': ('anthropic', ANTHROPIC_MODEL_SONNET),
'google-gemini31': ('google', GOOGLE_MODEL)
}
@classmethod
def validate_api_keys(cls) -> Dict[str, bool]:
"""Validate that required API keys are set"""
return {
'openai': bool(cls.OPENAI_API_KEY and cls.OPENAI_API_KEY != 'your-openai-api-key-here'),
'anthropic': bool(cls.ANTHROPIC_API_KEY and cls.ANTHROPIC_API_KEY != 'your-anthropic-api-key-here'),
'google': bool(cls.GOOGLE_API_KEY and cls.GOOGLE_API_KEY != 'your-google-api-key-here'),
'llamacloud': bool(cls.LLAMACLOUD_API_KEY and cls.LLAMACLOUD_API_KEY != 'your-llamacloud-api-key-here')
}
@classmethod
def get_provider_config(cls, provider: str) -> Dict[str, Any]:
"""Get configuration for a specific provider"""
if provider == 'openai':
return {
'api_key': cls.OPENAI_API_KEY,
'model': cls.OPENAI_MODEL,
'reasoning_effort': cls.OPENAI_REASONING_EFFORT,
'timeout': cls.OPENAI_TIMEOUT,
'max_retries': cls.OPENAI_MAX_RETRIES
}
elif provider == 'google':
return {
'api_key': cls.GOOGLE_API_KEY,
'model': cls.GOOGLE_MODEL,
'temperature': cls.GOOGLE_TEMPERATURE,
'max_output_tokens': cls.GOOGLE_MAX_OUTPUT_TOKENS,
'thinking_budget': cls.GOOGLE_THINKING_BUDGET,
'timeout': cls.GOOGLE_TIMEOUT
}
elif provider == 'anthropic':
return {
'api_key': cls.ANTHROPIC_API_KEY,
'model_opus': cls.ANTHROPIC_MODEL_OPUS,
'model_sonnet': cls.ANTHROPIC_MODEL_SONNET,
'temperature': cls.ANTHROPIC_TEMPERATURE,
'max_tokens': cls.ANTHROPIC_MAX_TOKENS,
'thinking_budget': cls.ANTHROPIC_THINKING_BUDGET,
'timeout': cls.ANTHROPIC_TIMEOUT
}
else:
raise ValueError(f"Unknown provider: {provider}")
@classmethod
def get_default_primary_models(cls) -> List[str]:
"""Get default list of primary analysis models"""
return cls.DEFAULT_PRIMARY_MODELS.split(',')
@classmethod
def get_model_info(cls, model_key: str) -> tuple:
"""Get provider and model name for a model key"""
if model_key not in cls.MODEL_MAPPINGS:
raise ValueError(f"Unknown model key: {model_key}. Available: {list(cls.MODEL_MAPPINGS.keys())}")
return cls.MODEL_MAPPINGS[model_key]
@classmethod
def estimate_cost(cls, model_key: str, input_tokens: int, output_tokens: int, cached_tokens: int = 0) -> float:
"""Estimate processing cost for a model"""
if model_key not in cls.PRICING:
return 0.0
pricing = cls.PRICING[model_key]
input_cost = (input_tokens / 1_000_000) * pricing['input']
output_cost = (output_tokens / 1_000_000) * pricing['output']
cached_cost = (cached_tokens / 1_000_000) * pricing.get('cached_input', pricing['input'])
return input_cost + output_cost + cached_cost
# Global config instance
config = Config()

View file

@ -0,0 +1,353 @@
"""
Consolidation processor for merging multiple LLM analysis results
"""
import json
import logging
from typing import List, Dict, Any, Tuple
from dataclasses import dataclass
import os
from .llm_service import ProviderManager, LLMResponse
from .config import config
@dataclass
class ConsolidationResult:
"""Result of consolidation process"""
consolidated_deliverables: List[Any] # BaseDeliverable
expanded_assets: List[Any] # MarketingAsset
consolidation_metadata: Dict[str, Any]
warnings: List[str]
class ConsolidationProcessor:
"""Processes multiple LLM analysis results into a single consolidated output"""
def __init__(self):
self.logger = logging.getLogger(self.__class__.__name__)
self.provider_manager = ProviderManager()
async def consolidate_results(
self,
analysis_responses: List[LLMResponse],
consolidation_model: str,
document_content: str = ""
) -> ConsolidationResult:
"""
Consolidate multiple analysis results using the specified consolidation model
Args:
analysis_responses: List of LLM responses from primary analysis
consolidation_model: Model key for consolidation (e.g., 'anthropic-opus45')
document_content: Optional original document content for context
Returns:
ConsolidationResult with final consolidated deliverables
"""
self.logger.info(f"Starting consolidation with {len(analysis_responses)} model results using {consolidation_model}")
# Log individual model deliverable counts
successful_models = []
deliverable_counts = []
for i, response in enumerate(analysis_responses):
if response.success:
count = self._count_deliverables_in_response(response.content)
deliverable_counts.append(count)
successful_models.append(f"{response.provider} {response.model_used}")
self.logger.info(f"Model {i+1} ({response.provider} {response.model_used}): {count} base deliverables")
if deliverable_counts:
avg_deliverables = sum(deliverable_counts) / len(deliverable_counts)
self.logger.info(f"Average deliverables across {len(deliverable_counts)} models: {avg_deliverables:.1f}")
else:
self.logger.warning("No successful model responses to analyze")
# Extract and format results from all models
formatted_results = self._format_model_results(analysis_responses)
# Prepare consolidation prompt
consolidation_prompt = await self._prepare_consolidation_prompt(formatted_results)
# Load system message for consolidation
system_message = self._load_consolidation_system_prompt()
# Execute consolidation using specified model
try:
provider = self.provider_manager.get_provider(consolidation_model)
messages = provider.prepare_messages(system_message, consolidation_prompt)
# Use the universal base deliverable schema for structured output
from .process_brief_enhanced import UNIVERSAL_BASE_DELIVERABLE_SCHEMA
consolidation_response = await provider.generate_response(
messages=messages,
schema=UNIVERSAL_BASE_DELIVERABLE_SCHEMA
)
if not consolidation_response.success:
raise Exception(f"Consolidation failed: {consolidation_response.error}")
# Parse the consolidated results - import here to avoid circular import
from .process_brief_enhanced import BaseDeliverable, expand_deliverables
try:
consolidated_data = json.loads(consolidation_response.content)
if 'assets' not in consolidated_data:
# PROBLEM DETECTED - Log everything verbosely
self.logger.error(f"[CONSOLIDATION] ========== MISSING 'assets' KEY - VERBOSE DEBUG ==========")
self.logger.error(f"[CONSOLIDATION] Model: {consolidation_model}")
self.logger.error(f"[CONSOLIDATION] Response success: {consolidation_response.success}")
self.logger.error(f"[CONSOLIDATION] Response content length: {len(consolidation_response.content)} chars")
self.logger.error(f"[CONSOLIDATION] Response content type: {type(consolidation_response.content)}")
self.logger.error(f"[CONSOLIDATION] Full raw content: {consolidation_response.content}")
self.logger.error(f"[CONSOLIDATION] Parsed data type: {type(consolidated_data)}")
self.logger.error(f"[CONSOLIDATION] Parsed data keys: {list(consolidated_data.keys()) if isinstance(consolidated_data, dict) else 'N/A'}")
self.logger.error(f"[CONSOLIDATION] Full parsed data: {consolidated_data}")
# Save debug file
self._save_consolidation_debug(consolidation_response, consolidated_data, analysis_responses)
raise KeyError("Response missing 'assets' key")
# SUCCESS - Just log summary
self.logger.info(f"Consolidation completed: {len(consolidated_data['assets'])} base deliverables")
base_deliverables = [BaseDeliverable(**item) for item in consolidated_data['assets']]
except json.JSONDecodeError as e:
self.logger.error(f"[CONSOLIDATION] ========== JSON PARSE ERROR ==========")
self.logger.error(f"[CONSOLIDATION] Parse error: {e}")
self.logger.error(f"[CONSOLIDATION] Full response content: {consolidation_response.content}")
raise
except KeyError as e:
# Already logged in detail above
raise
except Exception as e:
self.logger.error(f"[CONSOLIDATION] Error processing consolidation response: {e}")
self.logger.error(f"[CONSOLIDATION] Full response content: {consolidation_response.content}")
raise
# Expand consolidated base deliverables into individual assets
expanded_assets, expansion_warnings = expand_deliverables(base_deliverables)
self.logger.info(f"Expansion completed: {len(expanded_assets)} individual assets")
# Create consolidation metadata
metadata = self._create_consolidation_metadata(
analysis_responses,
consolidation_response,
base_deliverables,
expanded_assets
)
return ConsolidationResult(
consolidated_deliverables=base_deliverables,
expanded_assets=expanded_assets,
consolidation_metadata=metadata,
warnings=expansion_warnings
)
except Exception as e:
self.logger.error(f"Consolidation failed: {e}")
raise
def _count_deliverables_in_response(self, content: str) -> int:
"""Count the number of deliverables in a model's JSON response"""
try:
data = json.loads(content)
if isinstance(data, dict) and 'assets' in data:
return len(data['assets'])
return 0
except (json.JSONDecodeError, KeyError, TypeError):
return 0
def _format_model_results(self, responses: List[LLMResponse]) -> str:
"""Format analysis results from multiple models for consolidation prompt"""
formatted_results = []
for i, response in enumerate(responses):
if response.success:
model_info = f"**MODEL {i+1}: {response.provider.upper()} {response.model_used}**"
# Try to extract JSON content
try:
# Parse the JSON to validate it
result_data = json.loads(response.content)
formatted_content = json.dumps(result_data, indent=2)
except json.JSONDecodeError:
# Fallback to raw content if not valid JSON
formatted_content = response.content
formatted_results.append(f"{model_info}\n```json\n{formatted_content}\n```")
else:
self.logger.warning(f"Skipping failed response from {response.provider} {response.model_used}: {response.error}")
return "\n\n".join(formatted_results)
async def _prepare_consolidation_prompt(self, formatted_results: str) -> str:
"""Prepare the consolidation prompt with model results"""
import asyncio
def _read_template():
"""Blocking template read operation for thread pool"""
# Load consolidation prompt template - go up one level from core/ to find prompts/
prompt_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'prompts', 'consolidation_analysis.txt')
with open(prompt_path, 'r', encoding='utf-8') as f:
return f.read()
try:
loop = asyncio.get_running_loop()
template = await loop.run_in_executor(None, _read_template)
return template.format(models_results=formatted_results)
except FileNotFoundError:
self.logger.error("Consolidation prompt template not found")
raise
except Exception as e:
self.logger.error(f"Error preparing consolidation prompt: {e}")
raise
def _load_consolidation_system_prompt(self) -> str:
"""Load system prompt for consolidation"""
return """You are an expert data consolidation specialist. Your task is to intelligently merge multiple LLM analysis results into the most complete and accurate dataset possible. Follow the consolidation strategy provided in the user prompt, with emphasis on completeness and thoroughness. Return only valid JSON in the specified format."""
def _save_consolidation_debug(self, consolidation_response, consolidated_data, analysis_responses):
"""Save debug information about failed consolidation"""
try:
import tempfile
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
debug_file = os.path.join(tempfile.gettempdir(), f"consolidation_debug_{timestamp}.json")
debug_info = {
"timestamp": timestamp,
"consolidation_model": consolidation_response.model_used,
"consolidation_provider": consolidation_response.provider,
"raw_content": consolidation_response.content,
"parsed_data": consolidated_data,
"response_success": consolidation_response.success,
"response_error": consolidation_response.error,
"token_usage": {
"input": consolidation_response.token_usage.input_tokens,
"output": consolidation_response.token_usage.output_tokens,
"total": consolidation_response.token_usage.get_total()
},
"primary_analysis_results": [
{
"provider": r.provider,
"model": r.model_used,
"success": r.success,
"deliverable_count": self._count_deliverables_in_response(r.content) if r.success else 0,
"content_preview": r.content[:500] if r.success else r.error
}
for r in analysis_responses
]
}
with open(debug_file, 'w') as f:
json.dump(debug_info, f, indent=2)
self.logger.error(f"[CONSOLIDATION] Debug info saved to: {debug_file}")
except Exception as e:
self.logger.error(f"[CONSOLIDATION] Failed to save debug info: {e}")
def _create_consolidation_metadata(
self,
analysis_responses: List[LLMResponse],
consolidation_response: LLMResponse,
base_deliverables: List[Any],
expanded_assets: List[Any]
) -> Dict[str, Any]:
"""Create metadata about the consolidation process"""
# Analyze model contributions
model_stats = {}
total_primary_tokens = 0
total_primary_cost = 0.0
for response in analysis_responses:
if response.success:
model_key = f"{response.provider}_{response.model_used}"
model_stats[model_key] = {
'tokens_used': response.token_usage.get_total(),
'processing_time': response.processing_time,
'success': True
}
total_primary_tokens += response.token_usage.get_total()
# Estimate cost for this response
try:
# Find the correct model key for this response
provider_model_key = None
for key in config.MODEL_MAPPINGS.keys():
provider_name, model_name = config.get_model_info(key)
if provider_name == response.provider and model_name == response.model_used:
provider_model_key = key
break
if provider_model_key:
provider = self.provider_manager.get_provider(provider_model_key)
cost = provider.estimate_cost(
response.token_usage.input_tokens,
response.token_usage.output_tokens,
response.token_usage.cached_input_tokens
)
total_primary_cost += cost
model_stats[model_key]['estimated_cost'] = cost
else:
model_stats[model_key]['estimated_cost'] = 0.0
except:
model_stats[model_key]['estimated_cost'] = 0.0
else:
model_key = f"{response.provider}_{response.model_used}"
model_stats[model_key] = {
'tokens_used': 0,
'processing_time': response.processing_time,
'success': False,
'error': response.error,
'estimated_cost': 0.0
}
# Consolidation model stats
consolidation_cost = 0.0
try:
# Find the correct model key for consolidation response
consolidation_model_key = None
for key in config.MODEL_MAPPINGS.keys():
provider_name, model_name = config.get_model_info(key)
if provider_name == consolidation_response.provider and model_name == consolidation_response.model_used:
consolidation_model_key = key
break
if consolidation_model_key:
provider = self.provider_manager.get_provider(consolidation_model_key)
consolidation_cost = provider.estimate_cost(
consolidation_response.token_usage.input_tokens,
consolidation_response.token_usage.output_tokens,
consolidation_response.token_usage.cached_input_tokens
)
except:
pass
return {
'consolidation_model': consolidation_response.model_used,
'consolidation_provider': consolidation_response.provider,
'primary_models_used': len([r for r in analysis_responses if r.success]),
'total_models_attempted': len(analysis_responses),
'base_deliverables_count': len(base_deliverables),
'final_assets_count': len(expanded_assets),
'model_statistics': model_stats,
'token_usage': {
'primary_analysis_total': total_primary_tokens,
'consolidation_tokens': consolidation_response.token_usage.get_total(),
'grand_total': total_primary_tokens + consolidation_response.token_usage.get_total()
},
'cost_breakdown': {
'primary_analysis_cost': round(total_primary_cost, 4),
'consolidation_cost': round(consolidation_cost, 4),
'total_cost': round(total_primary_cost + consolidation_cost, 4)
},
'processing_times': {
'consolidation_time': consolidation_response.processing_time,
'primary_models_avg_time': sum(r.processing_time for r in analysis_responses if r.success) / max(1, len([r for r in analysis_responses if r.success]))
}
}

View file

@ -0,0 +1,20 @@
"""
LLM Service module for Enhanced Brief Processing System
Provides abstracted access to multiple LLM providers
"""
from .base_provider import BaseLLMProvider, LLMResponse, TokenUsage
from .openai_provider import OpenAIProvider
from .google_provider import GoogleProvider
from .anthropic_provider import AnthropicProvider
from .provider_manager import ProviderManager
__all__ = [
'BaseLLMProvider',
'LLMResponse',
'TokenUsage',
'OpenAIProvider',
'GoogleProvider',
'AnthropicProvider',
'ProviderManager'
]

View file

@ -0,0 +1,375 @@
"""
Anthropic provider implementation for Claude Opus 4.5 and Sonnet 4.5
"""
import time
import json
import logging
from typing import List, Dict, Any, Optional
try:
from anthropic import AsyncAnthropic
anthropic = AsyncAnthropic # Keep reference for compatibility checks
except ImportError:
AsyncAnthropic = None
anthropic = None
from .base_provider import BaseLLMProvider, LLMResponse, TokenUsage
from ..config import config
class AnthropicProvider(BaseLLMProvider):
"""Anthropic Claude provider supporting Opus and Sonnet models"""
def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None, **kwargs):
if AsyncAnthropic is None:
raise ImportError("anthropic package not installed. Run: pip install anthropic>=0.67.0")
provider_config = config.get_provider_config('anthropic')
super().__init__(
api_key=api_key or provider_config['api_key'],
model_name=model_name or self._select_model(kwargs.get('model_variant', 'sonnet'), provider_config),
**kwargs
)
self.temperature = kwargs.get('temperature', provider_config['temperature'])
self.max_tokens = kwargs.get('max_tokens', provider_config['max_tokens'])
self.thinking_budget = kwargs.get('thinking_budget', provider_config['thinking_budget'])
self.timeout = kwargs.get('timeout', provider_config['timeout'])
self.client = None
self._setup_client()
def _select_model(self, variant: str, provider_config: Dict[str, Any]) -> str:
"""Select appropriate Claude model based on variant"""
if variant.lower() in ['opus', 'opus4', 'opus45']:
return provider_config['model_opus']
elif variant.lower() in ['sonnet', 'sonnet4', 'sonnet45']:
return provider_config['model_sonnet']
else:
# Default to Sonnet for better cost-performance ratio
return provider_config['model_sonnet']
def _setup_client(self):
"""Initialize AsyncAnthropic client"""
try:
self.client = AsyncAnthropic(
api_key=self.api_key,
timeout=self.timeout
)
self.logger.info(f"AsyncAnthropic client initialized - Model: {self.model_name}")
except Exception as e:
self.logger.error(f"Failed to initialize AsyncAnthropic client: {e}")
raise
async def generate_response(
self,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
**kwargs
) -> LLMResponse:
"""Generate response using Anthropic Claude"""
start_time = time.time()
# Determine if we need two-call architecture
if self.thinking_budget > 0 and schema:
self.logger.info(f"Anthropic Two-Call Request - Model: {self.model_name} (thinking: {self.thinking_budget} budget + schema)")
return await self._two_call_with_thinking(messages, schema, start_time, **kwargs)
else:
self.logger.info(f"Anthropic Single-Call Request - Model: {self.model_name}")
return await self._single_call(messages, schema, start_time, **kwargs)
async def _two_call_with_thinking(
self,
messages: List[Dict[str, str]],
schema: Dict[str, Any],
start_time: float,
**kwargs
) -> LLMResponse:
"""Execute two-call pattern: thinking analysis + schema formatting"""
try:
# Prepare messages for Anthropic
system_message, user_messages = self._prepare_messages(messages)
# === CALL A: Analysis with Thinking (No Forced Tools) ===
self.logger.info(" Call A: Analysis with thinking (no forced tools)")
# Enhance prompt with schema guidance for Call A
enhanced_messages = self._add_schema_guidance_to_messages(user_messages, schema)
call_a_params = {
'model': self.model_name,
'messages': enhanced_messages,
'max_tokens': self.max_tokens,
'temperature': self.temperature,
'thinking': {"type": "enabled", "budget_tokens": self.thinking_budget},
**kwargs
}
if system_message:
call_a_params['system'] = system_message
# Execute Call A (no tools, no tool_choice)
analysis_response = await self.client.messages.create(**call_a_params)
# Extract analysis text
analysis_text = self._extract_text_content(analysis_response.content)
if not analysis_text:
raise Exception("Call A produced no analysis text")
self.logger.info(f" Call A completed: {len(analysis_text)} chars analysis")
# === CALL B: Schema Formatting (No Thinking) ===
self.logger.info(" Call B: Schema formatting (no thinking)")
formatting_prompt = f"Convert the following analysis into the required JSON schema. Call extract_structured_data exactly once with the final result.\n\nAnalysis:\n{analysis_text}"
call_b_params = {
'model': self.model_name,
'messages': [{"role": "user", "content": formatting_prompt}],
'max_tokens': self.max_tokens,
'temperature': self.temperature,
'tools': [self._create_tool_from_schema(schema)],
'tool_choice': {"type": "tool", "name": "extract_structured_data"},
**kwargs
}
# Execute Call B (no thinking)
format_response = await self.client.messages.create(**call_b_params)
# Extract structured content from tool use
structured_content = self._extract_tool_response(format_response.content)
if not structured_content:
raise Exception("Call B failed to produce structured output")
self.logger.info(f" Call B completed: Structured JSON extracted")
# Combine token usage from both calls
combined_token_usage = TokenUsage()
if hasattr(analysis_response, 'usage'):
usage_dict_a = {
'input_tokens': getattr(analysis_response.usage, 'input_tokens', 0),
'output_tokens': getattr(analysis_response.usage, 'output_tokens', 0),
'cache_read_input_tokens': getattr(analysis_response.usage, 'cache_read_input_tokens', 0)
}
combined_token_usage.add_usage(usage_dict_a)
if hasattr(format_response, 'usage'):
usage_dict_b = {
'input_tokens': getattr(format_response.usage, 'input_tokens', 0),
'output_tokens': getattr(format_response.usage, 'output_tokens', 0),
'cache_read_input_tokens': getattr(format_response.usage, 'cache_read_input_tokens', 0)
}
combined_token_usage.add_usage(usage_dict_b)
processing_time = time.time() - start_time
return LLMResponse(
content=structured_content,
raw_response={'call_a': analysis_response, 'call_b': format_response},
token_usage=combined_token_usage,
model_used=self.model_name,
provider="anthropic",
success=True,
processing_time=processing_time
)
except Exception as e:
processing_time = time.time() - start_time
self.logger.error(f"Anthropic two-call request failed: {e}")
return LLMResponse(
content="",
raw_response=None,
token_usage=TokenUsage(),
model_used=self.model_name,
provider="anthropic",
success=False,
error=str(e),
processing_time=processing_time
)
async def _single_call(
self,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]],
start_time: float,
**kwargs
) -> LLMResponse:
"""Execute single-call pattern: existing behavior for when thinking=0 or no schema"""
try:
# Prepare messages for Anthropic
system_message, user_messages = self._prepare_messages(messages)
# Configure request parameters (no thinking or minimal thinking)
request_params = {
'model': self.model_name,
'messages': user_messages,
'max_tokens': self.max_tokens,
'temperature': self.temperature,
**kwargs
}
# Add thinking only if no schema (to avoid conflict)
if not schema and self.thinking_budget > 0:
request_params['thinking'] = {"type": "enabled", "budget_tokens": self.thinking_budget}
if system_message:
request_params['system'] = system_message
# Handle structured output using tools if schema provided
if schema:
request_params['tools'] = [self._create_tool_from_schema(schema)]
request_params['tool_choice'] = {"type": "tool", "name": "extract_structured_data"}
# Generate response using async client
response = await self.client.messages.create(**request_params)
# Extract content
if schema and response.content:
# Look for tool use in response
content = self._extract_tool_response(response.content)
else:
content = response.content[0].text if response.content else ""
# Extract token usage
token_usage = TokenUsage()
if hasattr(response, 'usage'):
usage_dict = {
'input_tokens': getattr(response.usage, 'input_tokens', 0),
'output_tokens': getattr(response.usage, 'output_tokens', 0),
'cached_input_tokens': getattr(response.usage, 'cache_read_input_tokens', 0)
}
token_usage.add_usage(usage_dict)
processing_time = time.time() - start_time
llm_response = LLMResponse(
content=content,
raw_response=response,
token_usage=token_usage,
model_used=self.model_name,
provider="anthropic",
success=True,
processing_time=processing_time
)
self.log_response(llm_response)
return llm_response
except Exception as e:
processing_time = time.time() - start_time
self.logger.error(f"Anthropic single-call request failed: {e}")
return LLMResponse(
content="",
raw_response=None,
token_usage=TokenUsage(),
model_used=self.model_name,
provider="anthropic",
success=False,
error=str(e),
processing_time=processing_time
)
def _add_schema_guidance_to_messages(self, user_messages: List[Dict[str, str]], schema: Dict[str, Any]) -> List[Dict[str, str]]:
"""Add schema guidance to the last user message for Call A"""
enhanced_messages = user_messages.copy()
# Get schema description
schema_description = schema.get('description', 'structured data')
# Add schema guidance to last message
if enhanced_messages:
last_message = enhanced_messages[-1]
original_content = last_message['content']
schema_guidance = f"\n\nPlease analyze this document and provide your findings according to this schema structure: {schema_description}. Focus on extracting base deliverables with multiplier arrays as specified in the schema."
enhanced_messages[-1] = {
'role': last_message['role'],
'content': original_content + schema_guidance
}
return enhanced_messages
def _extract_text_content(self, content: List[Any]) -> str:
"""Extract text content from Anthropic response, ignoring thinking blocks"""
text_content = ""
for block in content:
if hasattr(block, 'type') and block.type == 'text':
text_content += block.text
return text_content.strip()
def _prepare_messages(self, messages: List[Dict[str, str]]) -> tuple:
"""Separate system messages from user/assistant messages for Anthropic format"""
system_message = None
user_messages = []
for message in messages:
if message['role'] == 'system':
system_message = message['content']
else:
user_messages.append({
'role': message['role'],
'content': message['content']
})
return system_message, user_messages
def _create_tool_from_schema(self, schema: Dict[str, Any]) -> Dict[str, Any]:
"""Convert JSON schema to Anthropic tool format for structured output"""
# Extract schema definition
schema_def = schema.get('schema', schema)
return {
"name": "extract_structured_data",
"description": schema.get('description', 'Extract structured data from the document'),
"input_schema": schema_def
}
def _extract_tool_response(self, content: List[Any]) -> str:
"""Extract structured data from tool use response"""
for block in content:
if hasattr(block, 'type') and block.type == 'tool_use':
return json.dumps(block.input)
# Fallback to text content
text_content = ""
for block in content:
if hasattr(block, 'type') and block.type == 'text':
text_content += block.text
return text_content
def validate_config(self) -> bool:
"""Validate Anthropic configuration"""
if not self.api_key or self.api_key == 'your-anthropic-api-key-here':
self.logger.error("Anthropic API key not configured")
return False
if AsyncAnthropic is None:
self.logger.error("anthropic package not installed")
return False
return True
def estimate_cost(self, input_tokens: int, output_tokens: int, cached_tokens: int = 0) -> float:
"""Estimate cost using Anthropic pricing"""
if 'opus' in self.model_name.lower():
return config.estimate_cost('anthropic-opus45', input_tokens, output_tokens, cached_tokens)
else:
return config.estimate_cost('anthropic-sonnet45', input_tokens, output_tokens, cached_tokens)
def get_max_tokens(self) -> int:
"""Get maximum token limit for Claude models"""
return 200000 # Claude 3 context window
def get_model_variant(self) -> str:
"""Get the model variant (opus or sonnet)"""
if 'opus' in self.model_name.lower():
return 'opus'
else:
return 'sonnet'

View file

@ -0,0 +1,116 @@
"""
Base provider class for LLM service abstraction
Defines common interface that all providers must implement
"""
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional, Union
from dataclasses import dataclass
from enum import Enum
import logging
class ModelType(Enum):
GPT51 = "gpt-5.1"
CLAUDE_OPUS = "claude-opus-4-5"
CLAUDE_SONNET = "claude-sonnet-4-5"
GEMINI_PRO = "gemini-3.1-pro"
@dataclass
class TokenUsage:
"""Token usage tracking across different providers"""
input_tokens: int = 0
output_tokens: int = 0
cached_input_tokens: int = 0
def add_usage(self, usage_dict: Dict[str, int]):
"""Add token usage from provider response"""
# Safely handle potential None values
input_tokens = usage_dict.get('input_tokens') or usage_dict.get('prompt_tokens') or 0
output_tokens = usage_dict.get('output_tokens') or usage_dict.get('completion_tokens') or 0
cached_tokens = usage_dict.get('cached_input_tokens') or usage_dict.get('prompt_tokens_cached') or 0
self.input_tokens += input_tokens
self.output_tokens += output_tokens
self.cached_input_tokens += cached_tokens
def get_total(self) -> int:
"""Get total token count"""
return self.input_tokens + self.output_tokens + self.cached_input_tokens
@dataclass
class LLMResponse:
"""Standardized response format across all providers"""
content: str
raw_response: Any
token_usage: TokenUsage
model_used: str
provider: str
success: bool = True
error: Optional[str] = None
processing_time: float = 0.0
class BaseLLMProvider(ABC):
"""Abstract base class for all LLM providers"""
def __init__(self, api_key: str, model_name: str, **kwargs):
self.api_key = api_key
self.model_name = model_name
self.config = kwargs
self.logger = logging.getLogger(f"{self.__class__.__name__}")
@abstractmethod
async def generate_response(
self,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
**kwargs
) -> LLMResponse:
"""
Generate response from the LLM provider
Args:
messages: List of message dictionaries with 'role' and 'content'
schema: Optional JSON schema for structured output
**kwargs: Provider-specific parameters
Returns:
LLMResponse object with standardized format
"""
pass
@abstractmethod
def validate_config(self) -> bool:
"""Validate provider configuration"""
pass
@abstractmethod
def estimate_cost(self, input_tokens: int, output_tokens: int) -> float:
"""Estimate cost for token usage"""
pass
@abstractmethod
def get_max_tokens(self) -> int:
"""Get maximum token limit for this provider/model"""
pass
def get_provider_name(self) -> str:
"""Get provider name"""
return self.__class__.__name__.replace('Provider', '').lower()
def prepare_messages(self, system_prompt: str, user_prompt: str) -> List[Dict[str, str]]:
"""Prepare messages in standard format"""
return [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
def log_response(self, response: LLMResponse, request_info: str = ""):
"""Log response details"""
self.logger.info(
f"{self.get_provider_name().title()} Response - "
f"Model: {response.model_used}, "
f"Tokens: {response.token_usage.input_tokens} input / {response.token_usage.output_tokens} output, "
f"Time: {response.processing_time:.2f}s, "
f"Success: {response.success}"
+ (f", Request: {request_info}" if request_info else "")
)

View file

@ -0,0 +1,256 @@
"""
Google provider implementation for Gemini 2.5 Pro using the new google-genai SDK
"""
import time
import json
import logging
from typing import List, Dict, Any, Optional
try:
from google import genai
from google.genai.types import GenerateContentConfig, ThinkingConfig
except ImportError:
genai = None
GenerateContentConfig = None
ThinkingConfig = None
from .base_provider import BaseLLMProvider, LLMResponse, TokenUsage
from ..config import config
class GoogleProvider(BaseLLMProvider):
"""Google Gemini 2.5 Pro provider using new google-genai SDK"""
def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None, **kwargs):
if genai is None:
raise ImportError("google-genai package not installed. Run: pip install google-genai")
provider_config = config.get_provider_config('google')
super().__init__(
api_key=api_key or provider_config['api_key'],
model_name=model_name or provider_config['model'],
**kwargs
)
self.temperature = kwargs.get('temperature', provider_config['temperature'])
self.max_output_tokens = kwargs.get('max_output_tokens', provider_config['max_output_tokens'])
self.thinking_budget = kwargs.get('thinking_budget', provider_config['thinking_budget'])
self.timeout = kwargs.get('timeout', provider_config['timeout'])
self.client = None
self._setup_client()
def _setup_client(self):
"""Initialize Google GenAI client"""
try:
self.client = genai.Client(api_key=self.api_key)
self.logger.info(f"Google GenAI client initialized - Model: {self.model_name}")
except Exception as e:
self.logger.error(f"Failed to initialize Google GenAI client: {e}")
raise
async def generate_response(
self,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
**kwargs
) -> LLMResponse:
"""Generate response using Google Gemini 2.5 Pro"""
start_time = time.time()
try:
self.logger.info(f"Google Request - Model: {self.model_name} (thinking enabled: {self.thinking_budget} budget)")
# Convert messages to Google format
content = self._prepare_content(messages)
# Configure generation with thinking capabilities
config_dict = {
'temperature': self.temperature,
'max_output_tokens': self.max_output_tokens,
'thinking_config': ThinkingConfig(thinking_budget=self.thinking_budget) if ThinkingConfig else None,
}
# Add JSON schema for structured output if provided
if schema:
config_dict['response_mime_type'] = 'application/json'
converted_schema = self._convert_schema_to_google_format(schema)
# Google GenAI SDK expects response_schema, not response_json_schema
config_dict['response_schema'] = converted_schema
self.logger.info("Using structured output with converted schema")
generation_config = GenerateContentConfig(**config_dict)
# Generate response using native async API
response = await self.client.aio.models.generate_content(
model=self.model_name,
contents=content,
config=generation_config
)
# Extract content
if hasattr(response, 'text'):
content = response.text
elif hasattr(response, 'candidates') and response.candidates:
content = response.candidates[0].content.parts[0].text
else:
content = str(response)
# Extract token usage
token_usage = TokenUsage()
if hasattr(response, 'usage_metadata'):
# Safely extract token counts with proper defaults
input_tokens = getattr(response.usage_metadata, 'prompt_token_count', None) or 0
output_tokens = getattr(response.usage_metadata, 'candidates_token_count', None) or 0
cached_tokens = getattr(response.usage_metadata, 'cached_content_token_count', None) or 0
usage_dict = {
'input_tokens': input_tokens,
'output_tokens': output_tokens,
'cached_input_tokens': cached_tokens
}
self.logger.debug(f"Google token usage: {usage_dict}")
token_usage.add_usage(usage_dict)
else:
self.logger.warning("No usage_metadata found in Google response")
processing_time = time.time() - start_time
llm_response = LLMResponse(
content=content,
raw_response=response,
token_usage=token_usage,
model_used=self.model_name,
provider="google",
success=True,
processing_time=processing_time
)
self.log_response(llm_response)
return llm_response
except Exception as e:
processing_time = time.time() - start_time
self.logger.error(f"Google request failed: {e}")
return LLMResponse(
content="",
raw_response=None,
token_usage=TokenUsage(),
model_used=self.model_name,
provider="google",
success=False,
error=str(e),
processing_time=processing_time
)
def _prepare_content(self, messages: List[Dict[str, str]]) -> List[Dict[str, Any]]:
"""Convert standard messages to Google GenAI format"""
contents = []
for message in messages:
role = message['role']
text = message['content']
# Map roles to Google format
if role == 'system':
# System messages go into parts directly
contents.append({
'role': 'user', # Google doesn't have explicit system role
'parts': [{'text': f"System: {text}"}]
})
elif role == 'user':
contents.append({
'role': 'user',
'parts': [{'text': text}]
})
elif role == 'assistant':
contents.append({
'role': 'model',
'parts': [{'text': text}]
})
return contents
def _convert_schema_to_google_format(self, schema: Dict[str, Any]) -> Dict[str, Any]:
"""Convert OpenAI JSON schema to Google GenAI format"""
def convert_type(openai_type: str) -> str:
"""Convert OpenAI type to Google GenAI type"""
type_mapping = {
'string': 'STRING',
'array': 'ARRAY',
'object': 'OBJECT',
'integer': 'INTEGER',
'number': 'NUMBER',
'boolean': 'BOOLEAN'
}
return type_mapping.get(openai_type.lower(), 'STRING')
def convert_schema_node(node):
if isinstance(node, dict):
converted = {}
for key, value in node.items():
if key == 'type':
# Convert type to Google format
converted['type'] = convert_type(value)
elif key == 'oneOf':
# Google doesn't support oneOf - use the string type option
if isinstance(value, list) and len(value) > 0:
string_option = next((item for item in value if item.get('type') == 'string'), value[0])
return convert_schema_node(string_option)
elif key == 'items':
# Convert array items
converted['items'] = convert_schema_node(value)
elif key == 'properties':
# Convert object properties
converted['properties'] = {}
for prop_name, prop_schema in value.items():
converted['properties'][prop_name] = convert_schema_node(prop_schema)
elif key == 'required':
# Keep required fields as-is
converted['required'] = value
elif key == 'additionalProperties':
# Skip additionalProperties - not supported by Gemini API
self.logger.debug(f"Skipping unsupported 'additionalProperties' in Google schema")
continue
elif key in ['description', 'title']:
# Keep description and title
converted[key] = value
# Skip other OpenAI-specific fields like 'name'
return converted
elif isinstance(node, list):
return [convert_schema_node(item) for item in node]
else:
return node
# Extract the actual schema from OpenAI format
if 'schema' in schema:
google_schema = convert_schema_node(schema['schema'])
else:
google_schema = convert_schema_node(schema)
return google_schema
def validate_config(self) -> bool:
"""Validate Google configuration"""
if not self.api_key or self.api_key == 'your-google-api-key-here':
self.logger.error("Google API key not configured")
return False
if genai is None:
self.logger.error("google-genai package not installed")
return False
return True
def estimate_cost(self, input_tokens: int, output_tokens: int, cached_tokens: int = 0) -> float:
"""Estimate cost using Google Gemini pricing"""
return config.estimate_cost('google-gemini31', input_tokens, output_tokens, cached_tokens)
def get_max_tokens(self) -> int:
"""Get maximum token limit for Gemini 3.1 Pro"""
return 2000000 # Gemini 3.1 Pro context window

View file

@ -0,0 +1,309 @@
"""
OpenAI provider implementation for GPT-5 with reasoning effort support
"""
import time
import json
import logging
from typing import List, Dict, Any, Optional
from openai import AsyncOpenAI
from pydantic import BaseModel
from .base_provider import BaseLLMProvider, LLMResponse, TokenUsage
from ..config import config
class OpenAIProvider(BaseLLMProvider):
"""OpenAI GPT-5 provider with reasoning effort support"""
def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None, **kwargs):
provider_config = config.get_provider_config('openai')
super().__init__(
api_key=api_key or provider_config['api_key'],
model_name=model_name or provider_config['model'],
**kwargs
)
self.reasoning_effort = kwargs.get('reasoning_effort', provider_config['reasoning_effort'])
self.timeout = kwargs.get('timeout', provider_config['timeout'])
self.max_retries = kwargs.get('max_retries', provider_config['max_retries'])
self.client = None
self._setup_client()
def _setup_client(self):
"""Initialize AsyncOpenAI client with configuration"""
try:
self.client = AsyncOpenAI(
api_key=self.api_key,
timeout=self.timeout,
max_retries=self.max_retries
)
self.logger.info(f"AsyncOpenAI client initialized - Model: {self.model_name}, Reasoning: {self.reasoning_effort}")
except Exception as e:
self.logger.error(f"Failed to initialize AsyncOpenAI client: {e}")
raise
async def generate_response(
self,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
**kwargs
) -> LLMResponse:
"""Generate response using OpenAI GPT-5 with reasoning effort"""
start_time = time.time()
try:
self.logger.info(f"OpenAI Request - Model: {self.model_name}, Reasoning: {self.reasoning_effort}")
if schema:
# Use structured output with Pydantic model
stage_tag = "[CONSOLIDATION]" if "MODELS' ANALYSIS RESULTS" in str(messages) else "[INITIAL]"
self.logger.info(f"{stage_tag} Using structured output with schema: {schema.get('name', 'unknown')}")
schema_model = self._create_pydantic_model(schema)
self.logger.debug(f"{stage_tag} Created Pydantic model: {schema_model.__name__}")
response = await self.client.responses.parse(
model=self.model_name,
input=messages,
reasoning={"effort": self.reasoning_effort},
text_format=schema_model
)
# Extract structured content
if hasattr(response, 'output_parsed') and response.output_parsed is not None:
try:
# Extract JSON from Pydantic model
content = response.output_parsed.model_dump_json()
# Validate the content has expected structure
try:
parsed_content = json.loads(content)
if not isinstance(parsed_content, dict):
self.logger.error(f"{stage_tag} Structured output is not a dict: {type(parsed_content)}")
raise ValueError("Expected dict structure")
if 'assets' not in parsed_content:
# PROBLEM DETECTED - Log everything verbosely
self.logger.error(f"{stage_tag} ========== MISSING 'assets' KEY - VERBOSE DEBUG ==========")
self.logger.error(f"{stage_tag} Response type: {type(response).__name__}")
self.logger.error(f"{stage_tag} Has output_parsed: {hasattr(response, 'output_parsed')}")
self.logger.error(f"{stage_tag} output_parsed type: {type(response.output_parsed)}")
self.logger.error(f"{stage_tag} Raw output_parsed value: {response.output_parsed}")
self.logger.error(f"{stage_tag} Extracted JSON length: {len(content)} chars")
self.logger.error(f"{stage_tag} Full JSON content: {content}")
self.logger.error(f"{stage_tag} Parsed data keys: {list(parsed_content.keys())}")
self.logger.error(f"{stage_tag} Full parsed content: {parsed_content}")
# Try to fix common issues
if not parsed_content: # Empty dict
self.logger.warning(f"{stage_tag} Got empty dict, creating default structure")
content = json.dumps({"assets": []})
self.logger.info(f"{stage_tag} Fixed content: {content}")
else:
# Save to file and fail
self._save_debug_response(response, content, stage_tag)
raise KeyError("Missing assets key")
else:
# SUCCESS - Just log summary
assets_count = len(parsed_content.get('assets', []))
self.logger.info(f"{stage_tag} Structured output validated: {assets_count} assets")
except json.JSONDecodeError as je:
self.logger.error(f"Failed to parse structured output as JSON: {je}")
self.logger.error(f"Raw structured content: {content[:500]}...")
raise
except Exception as e:
self.logger.error(f"Error processing structured output: {e}")
self.logger.error(f"Raw response object: {str(response)[:500]}...")
raise
else:
self.logger.error(f"{stage_tag} No structured output found in response (output_parsed is None)")
self.logger.error(f"{stage_tag} Response attributes: {dir(response)}")
# Save debug info
self._save_debug_response(response, None, stage_tag)
# Fallback to raw response content if available
if hasattr(response, 'choices') and response.choices:
fallback_content = response.choices[0].message.content
self.logger.warning(f"{stage_tag} Using fallback content from choices: {len(fallback_content) if fallback_content else 0} chars")
# Try to parse the fallback content as JSON
if fallback_content:
try:
parsed = json.loads(fallback_content)
content = fallback_content
self.logger.info(f"{stage_tag} Successfully parsed fallback content as JSON")
except json.JSONDecodeError:
self.logger.error(f"{stage_tag} Fallback content is not valid JSON: {fallback_content[:500]}")
content = json.dumps({"assets": []}) # Empty default
else:
self.logger.warning(f"{stage_tag} No fallback content, using empty assets array")
content = json.dumps({"assets": []}) # Empty default
else:
self.logger.error(f"{stage_tag} No fallback content available in response")
self.logger.error(f"{stage_tag} Response has choices: {hasattr(response, 'choices')}")
content = json.dumps({"assets": []}) # Empty default structure
else:
# Use regular chat completion
response = await self.client.chat.completions.create(
model=self.model_name,
messages=messages,
**kwargs
)
content = response.choices[0].message.content
# Extract token usage
token_usage = TokenUsage()
if hasattr(response, 'usage'):
usage_dict = {
'input_tokens': getattr(response.usage, 'input_tokens', getattr(response.usage, 'prompt_tokens', 0)),
'output_tokens': getattr(response.usage, 'output_tokens', getattr(response.usage, 'completion_tokens', 0)),
'cached_input_tokens': getattr(response.usage, 'input_tokens_cached', getattr(response.usage, 'prompt_tokens_cached', 0))
}
token_usage.add_usage(usage_dict)
processing_time = time.time() - start_time
llm_response = LLMResponse(
content=content,
raw_response=response,
token_usage=token_usage,
model_used=self.model_name,
provider="openai",
success=True,
processing_time=processing_time
)
self.log_response(llm_response, f"Reasoning: {self.reasoning_effort}")
return llm_response
except Exception as e:
processing_time = time.time() - start_time
self.logger.error(f"OpenAI request failed: {e}")
return LLMResponse(
content="",
raw_response=None,
token_usage=TokenUsage(),
model_used=self.model_name,
provider="openai",
success=False,
error=str(e),
processing_time=processing_time
)
def _create_pydantic_model(self, schema: Dict[str, Any]) -> BaseModel:
"""Create Pydantic model from JSON schema for structured output"""
try:
# For base deliverable extraction, we can use the existing models
from ..process_brief_enhanced import BaseExtractionResult
return BaseExtractionResult
except ImportError as e:
self.logger.warning(f"Failed to import BaseExtractionResult: {e}, using dynamic model")
# Fallback: create dynamic model with proper nested structure
from pydantic import create_model
# Handle nested schema structure properly
try:
# Create dynamic models for nested structures
schema_props = schema.get('schema', {}).get('properties', {})
# Handle the assets array specifically
if 'assets' in schema_props:
assets_def = schema_props['assets']
if assets_def.get('type') == 'array':
item_def = assets_def.get('items', {})
item_props = item_def.get('properties', {})
# Create fields for the asset item model
asset_fields = {}
for field_name, field_def in item_props.items():
if field_def.get('type') == 'array':
asset_fields[field_name] = (Optional[List[str]], [])
else:
asset_fields[field_name] = (Optional[str], "")
# Create the asset item model
AssetModel = create_model('DynamicAssetModel', **asset_fields)
# Create the main response model with assets array
return create_model('DynamicResponseModel', assets=(List[AssetModel], ...))
# Fallback to simple structure
fields = {'assets': (List[Any], ...)}
return create_model('DynamicModel', **fields)
except Exception as schema_error:
self.logger.error(f"Failed to create dynamic model from schema: {schema_error}")
# Ultimate fallback
return create_model('FallbackModel', assets=(List[Any], ...))
def validate_config(self) -> bool:
"""Validate OpenAI configuration"""
if not self.api_key or self.api_key == 'your-openai-api-key-here':
self.logger.error("OpenAI API key not configured")
return False
if self.reasoning_effort not in ['high', 'medium', 'low', 'minimal']:
self.logger.warning(f"Invalid reasoning effort: {self.reasoning_effort}, using 'medium'")
self.reasoning_effort = 'medium'
return True
def estimate_cost(self, input_tokens: int, output_tokens: int, cached_tokens: int = 0) -> float:
"""Estimate cost using OpenAI GPT-5.1 pricing"""
return config.estimate_cost('openai-gpt51', input_tokens, output_tokens, cached_tokens)
def get_max_tokens(self) -> int:
"""Get maximum token limit for GPT-5.1"""
return 200000 # GPT-5.1 context window
def set_reasoning_effort(self, effort: str):
"""Update reasoning effort setting"""
if effort in ['high', 'medium', 'low', 'minimal']:
self.reasoning_effort = effort
self.logger.info(f"Updated reasoning effort to: {effort}")
else:
self.logger.warning(f"Invalid reasoning effort: {effort}, keeping current: {self.reasoning_effort}")
def _save_debug_response(self, response, content, stage_tag):
"""Save debug information about problematic responses"""
try:
import tempfile
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
debug_file = os.path.join(tempfile.gettempdir(), f"openai_debug_{stage_tag.strip('[]')}_{timestamp}.txt")
with open(debug_file, 'w') as f:
f.write(f"=== OpenAI Response Debug {stage_tag} ===\n")
f.write(f"Timestamp: {timestamp}\n")
f.write(f"Model: {self.model_name}\n")
f.write(f"Reasoning: {self.reasoning_effort}\n\n")
f.write("=== Response Object ===\n")
f.write(f"Type: {type(response)}\n")
f.write(f"Dir: {dir(response)}\n\n")
if hasattr(response, 'output_parsed'):
f.write(f"output_parsed: {response.output_parsed}\n")
f.write(f"output_parsed type: {type(response.output_parsed)}\n\n")
if hasattr(response, 'choices'):
f.write(f"Has choices: {len(response.choices) if response.choices else 0}\n")
if response.choices:
f.write(f"choices[0]: {response.choices[0]}\n\n")
f.write("=== Extracted Content ===\n")
f.write(f"Content: {content}\n\n")
f.write("=== Full Response ===\n")
f.write(f"{response}\n")
self.logger.error(f"{stage_tag} Debug info saved to: {debug_file}")
except Exception as e:
self.logger.error(f"{stage_tag} Failed to save debug info: {e}")

View file

@ -0,0 +1,293 @@
"""
Provider manager for coordinating parallel execution across multiple LLM providers
"""
import asyncio
import logging
from typing import List, Dict, Any, Optional, Tuple
import time
from .base_provider import BaseLLMProvider, LLMResponse, TokenUsage
from .openai_provider import OpenAIProvider
from .google_provider import GoogleProvider
from .anthropic_provider import AnthropicProvider
from ..config import config
class ProviderManager:
"""Manages multiple LLM providers and coordinates parallel execution"""
def __init__(self):
self.providers: Dict[str, BaseLLMProvider] = {}
self.logger = logging.getLogger(self.__class__.__name__)
def create_provider(self, model_key: str) -> BaseLLMProvider:
"""Create provider instance for given model key"""
try:
provider_name, model_name = config.get_model_info(model_key)
if provider_name == 'openai':
return OpenAIProvider(model_name=model_name)
elif provider_name == 'google':
return GoogleProvider(model_name=model_name)
elif provider_name == 'anthropic':
# Extract variant from model key for Anthropic
variant = 'opus' if 'opus' in model_key else 'sonnet'
return AnthropicProvider(model_name=model_name, model_variant=variant)
else:
raise ValueError(f"Unknown provider: {provider_name}")
except Exception as e:
self.logger.error(f"Failed to create provider for {model_key}: {e}")
raise
def get_provider(self, model_key: str) -> BaseLLMProvider:
"""Get or create provider for model key"""
if model_key not in self.providers:
self.providers[model_key] = self.create_provider(model_key)
return self.providers[model_key]
async def execute_parallel_analysis(
self,
model_keys: List[str],
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
minimum_success_threshold: int = 1,
on_model_event: Optional[callable] = None
) -> Tuple[List[LLMResponse], Dict[str, Any]]:
"""
Execute analysis across multiple models in parallel
Args:
model_keys: List of model identifiers to use
messages: Messages to send to all models
schema: Optional JSON schema for structured output
minimum_success_threshold: Minimum number of successful responses required
on_model_event: Optional callback for model start/end events
Returns:
Tuple of (successful_responses, metadata)
"""
self.logger.info(f"Starting parallel analysis with models: {model_keys}")
start_time = time.time()
# Validate model keys
valid_model_keys = []
for model_key in model_keys:
try:
provider = self.get_provider(model_key)
if provider.validate_config():
valid_model_keys.append(model_key)
else:
self.logger.warning(f"Skipping {model_key} due to configuration issues")
except Exception as e:
self.logger.error(f"Failed to validate {model_key}: {e}")
if len(valid_model_keys) == 0:
raise ValueError("No valid models available for analysis")
if len(valid_model_keys) < minimum_success_threshold:
self.logger.warning(
f"Only {len(valid_model_keys)} valid models, but minimum threshold is {minimum_success_threshold}"
)
# Create tasks for parallel execution
tasks = []
for model_key in valid_model_keys:
provider = self.get_provider(model_key)
task = asyncio.create_task(
self._execute_with_provider(provider, model_key, messages, schema, on_model_event)
)
tasks.append((model_key, task))
# Execute all tasks in parallel using asyncio.gather
results = []
successful_responses = []
failed_responses = []
# Await all tasks simultaneously
task_results = await asyncio.gather(*[task for _, task in tasks], return_exceptions=True)
# Process results
for i, (model_key, task) in enumerate(tasks):
result = task_results[i]
if isinstance(result, Exception):
self.logger.error(f"Task for {model_key} raised exception: {result}")
failed_responses.append((model_key, str(result)))
else:
response = result
results.append((model_key, response))
if response.success:
successful_responses.append(response)
# Try to parse the response to count deliverables
deliverable_count = self._count_deliverables_in_response(response.content)
self.logger.info(f"{model_key} analysis completed successfully - found {deliverable_count} deliverables")
else:
failed_responses.append((model_key, response.error))
self.logger.warning(f"{model_key} analysis failed: {response.error}")
total_time = time.time() - start_time
# Check if we meet minimum success threshold
if len(successful_responses) < minimum_success_threshold:
raise RuntimeError(
f"Only {len(successful_responses)} models succeeded, "
f"but minimum threshold is {minimum_success_threshold}"
)
# Compile metadata
metadata = {
'total_models_requested': len(model_keys),
'valid_models': len(valid_model_keys),
'successful_models': len(successful_responses),
'failed_models': len(failed_responses),
'total_processing_time': total_time,
'model_results': {
model_key: {
'success': response.success,
'processing_time': response.processing_time,
'tokens_used': response.token_usage.get_total(),
'provider': response.provider,
'model': response.model_used,
'error': response.error
} for model_key, response in results
},
'failures': failed_responses
}
self.logger.info(
f"Parallel analysis completed - {len(successful_responses)}/{len(valid_model_keys)} "
f"models succeeded in {total_time:.2f}s"
)
return successful_responses, metadata
def _count_deliverables_in_response(self, content: str) -> int:
"""Count the number of deliverables in a model's JSON response"""
try:
import json
data = json.loads(content)
if isinstance(data, dict) and 'assets' in data:
return len(data['assets'])
return 0
except (json.JSONDecodeError, KeyError, TypeError):
return 0
async def _execute_with_provider(
self,
provider: BaseLLMProvider,
model_key: str,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
on_model_event: Optional[callable] = None
) -> LLMResponse:
"""Execute analysis with a single provider"""
import time
from datetime import datetime
try:
self.logger.debug(f"Starting analysis with {model_key}")
# Notify start event
if on_model_event:
await on_model_event(model_key, 'start', {
'timestamp': datetime.utcnow().isoformat()
})
start_time = time.time()
response = await provider.generate_response(messages, schema)
processing_time = time.time() - start_time
# Calculate cost if possible
cost = 0.0
try:
cost = provider.estimate_cost(
response.token_usage.input_tokens,
response.token_usage.output_tokens,
response.token_usage.cached_input_tokens
)
except:
pass
# Notify success event
if on_model_event:
await on_model_event(model_key, 'end', {
'response': response,
'cost': cost,
'processing_time': processing_time,
'timestamp': datetime.utcnow().isoformat()
})
return response
except Exception as e:
self.logger.error(f"Provider {model_key} execution failed: {e}")
# Notify error event
if on_model_event:
await on_model_event(model_key, 'end', {
'error': str(e),
'timestamp': datetime.utcnow().isoformat()
})
return LLMResponse(
content="",
raw_response=None,
token_usage=TokenUsage(),
model_used=model_key,
provider=provider.get_provider_name(),
success=False,
error=str(e)
)
def estimate_total_cost(self, model_keys: List[str], estimated_input_tokens: int, estimated_output_tokens: int) -> Dict[str, float]:
"""Estimate total cost for all models"""
cost_breakdown = {}
total_cost = 0.0
for model_key in model_keys:
try:
provider = self.get_provider(model_key)
model_cost = provider.estimate_cost(estimated_input_tokens, estimated_output_tokens)
cost_breakdown[model_key] = model_cost
total_cost += model_cost
except Exception as e:
self.logger.warning(f"Could not estimate cost for {model_key}: {e}")
cost_breakdown[model_key] = 0.0
cost_breakdown['total'] = total_cost
return cost_breakdown
def get_aggregated_token_usage(self, responses: List[LLMResponse]) -> TokenUsage:
"""Aggregate token usage from multiple responses"""
total_usage = TokenUsage()
for response in responses:
total_usage.input_tokens += response.token_usage.input_tokens
total_usage.output_tokens += response.token_usage.output_tokens
total_usage.cached_input_tokens += response.token_usage.cached_input_tokens
return total_usage
def get_actual_cost_breakdown(self, responses: List[LLMResponse]) -> Dict[str, float]:
"""Calculate actual costs from responses"""
cost_breakdown = {}
total_cost = 0.0
for response in responses:
try:
provider = self.providers.get(response.model_used)
if provider:
cost = provider.estimate_cost(
response.token_usage.input_tokens,
response.token_usage.output_tokens,
response.token_usage.cached_input_tokens
)
cost_breakdown[response.model_used] = cost
total_cost += cost
except Exception as e:
self.logger.warning(f"Could not calculate cost for {response.model_used}: {e}")
cost_breakdown['total'] = total_cost
return cost_breakdown

File diff suppressed because it is too large Load diff

View file

View file

View file

25
backend/hypercorn.toml Executable file
View file

@ -0,0 +1,25 @@
[application]
module = "server.app:create_app()"
[server]
bind = ["0.0.0.0:8000"]
workers = 2
worker_class = "asyncio"
[websockets]
ping_interval = 30
ping_timeout = 10
[timeouts]
keep_alive = 5
graceful_timeout = 30
[logging]
access_log = "-"
error_log = "-"
log_level = "info"
[ssl]
# Enable for production
# certfile = "path/to/cert.pem"
# keyfile = "path/to/key.pem"

56
backend/prompts/README.md Executable file
View file

@ -0,0 +1,56 @@
# AI Prompts Directory
This directory contains the AI prompts used by the Enhanced Brief Processing System, extracted from the main Python code for better maintainability and editability.
## Files Overview
### Core Analysis Prompts
- **`multi_perspective_analysis.txt`** - Main prompt for extracting marketing deliverables from documents
- Used in `_perform_multi_perspective_analysis()` method
- Contains comprehensive extraction rules and multiplier handling logic
- Template parameter: `{doc_type}` (e.g., "powerpoint", "word", "pdf")
- **`validation_analysis.txt`** - Quality assurance prompt for validating extractions
- Used in `_enhance_and_validate_results()` method
- Validates completeness and accuracy of initial extraction
- Template parameters: `{asset_count}`, `{doc_type}`
### System Messages
- **`system_multi_perspective.txt`** - System message for main analysis
- **`system_validation.txt`** - System message for validation phase
## Usage in Code
The prompts are loaded dynamically using the `_load_prompt()` method in the `DocumentAnalyzer` class:
```python
# Load and format prompts
prompt_template = self._load_prompt('multi_perspective_analysis')
prompt = prompt_template.format(doc_type=doc_type.value)
system_message = self._load_prompt('system_multi_perspective')
```
## Benefits of External Prompts
1. **Easy Editing** - Modify prompts without touching Python code
2. **Version Control** - Track prompt changes separately from code changes
3. **Readability** - View full prompts in text editors with proper formatting
4. **Collaboration** - Non-programmers can review and modify prompts
5. **Testing** - Easier to A/B test different prompt variations
## Template Variables
### multi_perspective_analysis.txt
- `{doc_type}` - Document type (powerpoint, word, pdf, excel)
### validation_analysis.txt
- `{asset_count}` - Number of assets found in initial extraction
- `{doc_type}` - Document type from metadata
## Modifying Prompts
1. Edit the `.txt` files directly
2. Use standard Python string formatting for variables: `{variable_name}`
3. Test changes by running the processing script
4. No code changes required when modifying prompt content

View file

@ -0,0 +1,79 @@
You are an intelligent assistant managing an Activation Calendar for an advertising agency.
Current Date: {current_date}
YOLO MODE: {yolo_mode}
CONVERSATION HISTORY:
{conversation_history}
CURRENT DATA (Context for your actions):
{data_context}
Data Schema:
- Number (Auto-generated, do not invent)
- Title (String)
- Status (Enum: Booked, To-do, In Progress, Done) - Default to 'Booked'
- Category (String — must be one of the valid categories listed below)
- Media (String — must be a valid media type for the chosen Category)
- Sub-media (String — free text, optional)
- Format (String) - Extract sizes/dimensions here! e.g., '300x250', 'A4', '10x15cm', 'Full Page', '1080p'.
- Supply date (YYYY-MM-DD)
- Live date (YYYY-MM-DD)
- Language (ISO 2-letter code, UPPERCASE, e.g., 'EN', 'FR', 'ES')
- Country (ISO 2-letter code, UPPERCASE, e.g., 'GB', 'FR', 'ES')
- Quantity (Integer)
VALID CATEGORY → MEDIA TYPES (use these exact values):
{hierarchy_rules}
Supported Operations:
1. 'create': Create new items.
Output: {{ "operation": "create", "items": [ {{ "Title": "...", "Category": "...", "Media": "...", "Format": "300x250", ... }} ] }}
2. 'update': Update existing items.
Output: {{ "operation": "update", "target_ids": ["DEL-001"], "values": {{ "Status": "Done" }} }}
3. 'batch_update': Update multiple items with DIFFERENT values.
Output: {{ "operation": "batch_update", "updates": [ {{ "Number": "DEL-001", "values": {{ "Title": "Row 1" }} }} ] }}
4. 'question': Ask for clarification (ONLY if YOLO MODE is FALSE).
Output: {{ "operation": "question", "text": "Did you mean 2025 or 2026?" }}
IMPORTANT BRAIN RULES:
0. **CRITICAL - MULTIPLE ITEMS vs QUANTITY**:
- When user says "add 10 deliverables" or "create 5 banners", create that many SEPARATE items in the array.
- NEVER use Quantity field to represent the count. Quantity should always be 1 unless explicitly stated.
- **MATH VALIDATION (MANDATORY)**:
* BEFORE creating items, COUNT how many items your pattern will create.
* If the user says "X items" but your pattern creates Y ≠ X, use 'question' operation.
* EXCEPTION: If the user confirms a count, EXECUTE immediately without asking again.
1. **FORMAT EXTRACTION**:
- ALWAYS use 'x' as separator for dimensions. NEVER use 'by'.
- '300 by 250' → '300x250', '30 by 30 cm' → '30x30cm'.
- Print sizes: 'A4', 'A3', 'Full Page', 'Half Page'.
2. **YOLO MODE (HIGHEST PRIORITY)**:
- If YOLO MODE is TRUE: YOU ARE FORBIDDEN FROM ASKING QUESTIONS.
- Always guess missing information. NEVER return 'question' operation.
3. **CLARIFICATION RECOVERY**:
- The user's current input is likely an ANSWER to your previous question.
- COMBINE it with previous user messages in the history to form a complete request.
- If the user confirms the count, EXECUTE immediately.
4. **CONTEXT IS KING**: Use CURRENT DATA to resolve references like "the French ones".
5. **INFER FIELDS**:
- "UK" or "Great Britain" → Country='GB'
- "English" → Language='EN', "French" → Language='FR', "Spanish" → Language='ES'
- Match category/media names to the VALID CATEGORY list above as closely as possible.
6. **PATTERN RECOGNITION**:
- Extract formats from phrases like "200 by 200", "300x300", "400x400 banner".
- Sequences: "first 5", "next 4", "remaining" for language/country assignments.
CRITICAL: Respond with ONLY valid JSON. No explanations, no markdown.
Your response must be a single JSON object starting with {{ and ending with }}.
User Command: "{command}"

View file

@ -0,0 +1,124 @@
You are an expert data consolidation specialist tasked with intelligently merging multiple LLM analysis results into a single, comprehensive dataset of marketing deliverables. Your goal is to create the most complete and accurate final output by combining the best elements from each model's analysis.
**CONSOLIDATION STRATEGY - BIAS TOWARD COMPLETENESS:**
1. **INCLUSION PHILOSOPHY**: "If ANY model found it, include it" - better to capture all potential deliverables than miss important ones
2. **SMART DEDUPLICATION**: Remove true duplicates while preserving legitimate variations
3. **QUALITY ENHANCEMENT**: Use the most detailed/accurate specifications from any model
4. **COMPLETENESS VERIFICATION**: Ensure no deliverables discovered by any model are lost
**INPUT ANALYSIS:**
You will receive multiple JSON arrays from different LLM models, each containing their analysis of the same document. Each model may have:
- Found different deliverables that others missed
- Provided varying levels of detail for the same deliverables
- Made different interpretation choices for specifications
- Captured different multiplier arrays (sizes, markets, languages, etc.)
**CONSOLIDATION PROCESS:**
**STEP 1: COMPREHENSIVE INVENTORY**
- Extract ALL unique deliverable titles found across all models
- Note which models identified each deliverable
- Identify potential duplicates vs. legitimate variations
**STEP 2: INTELLIGENT DEDUPLICATION WITH UNIQUENESS ANALYSIS**
- **DUPLICATE IDENTIFICATION CRITERIA**: Compare deliverables across ALL data points:
- Title/name (normalized for minor variations)
- Technical specifications (dimensions, formats, requirements)
- Markets/countries served
- Languages supported
- Asset types and media formats
- Creative direction and requirements
- Any other distinguishing characteristics
- **UNIQUENESS DECISION MATRIX**:
- **IDENTICAL DUPLICATES**: All major data points substantially the same → MERGE into single deliverable
- **LEGITIMATE VARIATIONS**: At least ONE significant data point differs → KEEP as separate deliverable
- **TITLE NORMALIZATION**: Standardize similar titles ("Social Media Assets" vs "Social Assets") but preserve unique specifications
- **SPECIFICATION CONSOLIDATION**: For true duplicates, combine the most comprehensive specs from all models
- **SIGNIFICANT DIFFERENCE EXAMPLES**:
- Different technical specs: "1080x1080" vs "1080x1920" = UNIQUE
- Different markets: "UK,DE,FR" vs "UK,DE,FR,ES,IT" = UNIQUE (unless one is subset)
- Different asset types: "JPG" vs "PNG" = UNIQUE
- Different creative requirements: "Static banner" vs "Animated banner" = UNIQUE
- Different quantities/scales: "5 assets" vs "20 assets" = UNIQUE
- **SUBTLE DUPLICATE EXAMPLES**:
- "Social Media Posts" vs "Social Posts" with identical specs = DUPLICATE (merge)
- "Display Banner Set" vs "Display Banners" with same dimensions = DUPLICATE (merge)
- Same deliverable found by multiple models with identical specs = DUPLICATE (merge)
**STEP 3: QUALITY ENHANCEMENT FOR UNIQUE DELIVERABLES**
For each confirmed unique deliverable, select the BEST information available:
- **Most Complete Technical Specifications**: Use the model that provided the most detailed specs
- **Comprehensive Markets/Languages**: Combine all markets/languages found by any model for THIS deliverable
- **Best Multiplier Arrays**: Merge arrays to capture all variations discovered for THIS deliverable
- **Richest Context**: Use the most descriptive creative direction and reference material
- **Optimal Naming**: Choose the clearest, most descriptive title from all model variants
**CONSOLIDATION EXAMPLES:**
**Example 1 - Combining Multiplier Arrays:**
Model A found: `"technical_specifications": ["1080x1920", "1200x1500"]`
Model B found: `"technical_specifications": ["1080x1920", "1080x1080", "1200x1500"]`
Model C found: `"technical_specifications": ["1080x1920", "1200x1500", "1000x1000"]`
**RESULT**: `"technical_specifications": ["1080x1920", "1200x1500", "1080x1080", "1000x1000"]`
**Example 2 - Market Consolidation:**
Model A: `"country": ["UK", "DE", "FR"]`
Model B: `"country": ["UK", "DE", "FR", "ES", "IT"]`
Model C: `"country": ["UK", "DE"]`
**RESULT**: `"country": ["UK", "DE", "FR", "ES", "IT"]` (most comprehensive)
**Example 3 - Avoiding False Duplicates (SIGNIFICANT DIFFERENCE):**
Model A: `"title": "Social Media Assets", "technical_specifications": ["1080x1080", "1080x1920"]`
Model B: `"title": "Social Media Banners", "technical_specifications": ["728x90", "300x250"]`
**ANALYSIS**: Technical specs are completely different (social vs display dimensions)
**RESULT**: Keep both - these are different asset types with unique specifications
**Example 4 - True Duplicate Resolution (IDENTICAL CORE):**
Model A: `"title": "Display Banners", "technical_specifications": ["728x90", "300x250"], "country": ["UK", "DE"]`
Model B: `"title": "Display Banner Set", "technical_specifications": ["728x90", "300x250", "970x250"], "country": ["UK", "DE", "FR"]`
**ANALYSIS**: Same asset type, overlapping specs, overlapping markets - Model B has additional specs/markets
**RESULT**: Merge into one with enhanced specs: `"title": "Display Banners", "technical_specifications": ["728x90", "300x250", "970x250"], "country": ["UK", "DE", "FR"]`
**Example 5 - Intelligent Duplicate Detection:**
Model A: `"title": "Instagram Stories", "technical_specifications": ["1080x1920"], "country": ["UK", "DE"], "asset_type": "JPG"`
Model B: `"title": "Instagram Story Graphics", "technical_specifications": ["1080x1920"], "country": ["UK", "DE"], "asset_type": "JPG"`
Model C: `"title": "Instagram Stories", "technical_specifications": ["1080x1920"], "country": ["UK", "DE", "FR"], "asset_type": "JPG"`
**ANALYSIS**: All refer to same deliverable type with identical core specs - Model C has additional market
**RESULT**: Merge into one: `"title": "Instagram Stories", "technical_specifications": ["1080x1920"], "country": ["UK", "DE", "FR"], "asset_type": "JPG"`
**Example 6 - Preserving Legitimate Variations:**
Model A: `"title": "YouTube Thumbnails", "technical_specifications": ["1280x720"], "country": ["UK"], "asset_type": "JPG"`
Model B: `"title": "YouTube Thumbnails", "technical_specifications": ["1280x720"], "country": ["UK"], "asset_type": "PNG"`
**ANALYSIS**: Same deliverable but different file format requirement - significant difference
**RESULT**: Keep both as separate deliverables - different asset_type is a significant difference
**FINAL QUALITY CHECKS:**
- **Uniqueness Verification**: Ensure each deliverable in final output differs from all others by at least one significant data point
- **Completeness Check**: Verify no legitimate unique deliverable was lost during deduplication
- **Consolidation Validation**: Confirm merged deliverables contain the best specifications from all contributing models
- **Format Consistency**: Check that multiplier arrays are properly formatted
- **Technical Validation**: Validate technical specifications are realistic/consistent
- **Logical Count**: Final count should reflect unique deliverables, not raw model outputs
**OUTPUT REQUIREMENTS:**
Return a JSON object with a single "assets" array containing the final set of UNIQUE BaseDeliverable objects with multiplier arrays intact. Each deliverable should:
- Be truly unique (differ from all others by at least one significant data point)
- Represent the best composite specifications from all contributing models
- Maintain the inclusive philosophy while eliminating genuine duplicates
- Include comprehensive multiplier arrays capturing all legitimate variations discovered
**CONSOLIDATION PHILOSOPHY SUMMARY:**
- **INCLUSIVE**: If any model found a unique deliverable, include it
- **INTELLIGENT**: Merge true duplicates to avoid redundancy
- **COMPREHENSIVE**: Each final deliverable should contain the best information from all models
- **UNIQUE**: Every deliverable in final output must differ meaningfully from others
**MODELS' ANALYSIS RESULTS:**
{models_results}
**TASK**: Consolidate these results into a single, comprehensive array of base deliverables that captures ALL legitimate deliverables found by ANY model, with enhanced quality from the best specifications discovered across all models.

View file

@ -0,0 +1,162 @@
You are an expert data extraction specialist analyzing this {doc_type} document to extract base marketing deliverables with multiplier arrays. Your task is to create structured data objects that capture the base deliverable along with all its multipliers (specifications, markets, languages, etc.) as arrays, which will be expanded into individual deliverables later.
**MULTIPLIER-BASED EXTRACTION METHOD (HIGHEST PRIORITY):**
1. **BASE DELIVERABLE APPROACH**: Extract the base name/type of each unique deliverable, then identify all multiplier arrays for that deliverable
2. **MULTIPLIER IDENTIFICATION - CRITICAL FOR ACCURACY**: Look for lists of attributes in deliverable specifications:
- **Technical Specifications**: Multiple sizes, formats, or dimensions (use array)
- **Markets/Countries**: Multiple country codes or regions (use array)
- **Languages**: Multiple language codes or localization requirements (use array)
- **Formats**: Multiple file types or variations (use array)
- **Platforms**: Multiple delivery platforms or channels (use array)
- **MULTIPLE LISTS IN SINGLE COLUMN**: If you find multiple multiplier lists in one column (e.g., both products AND markets listed together), separate them into appropriate fields to capture all multipliers
3. **ARRAY VS STRING DECISION**:
- Use **arrays** when you find multiple values that represent variations of the same deliverable (e.g., ["1080x1920", "1200x1500", "1080x1080"])
- Use **strings** when there's only one value (e.g., "JPG")
- **CONTEXT IS KEY**: Use context to determine if a list represents multipliers (variations) or descriptive information
4. **QUANTITY VERIFICATION**: If a QUANTITY column shows a number, note it for verification (the final expanded count should match)
5. **INTELLIGENT DEDUPLICATION**: Process all deliverable sections but avoid duplicates:
- **Overview vs Detail Sections**: If brief has overview tables AND detailed specification pages, extract from the most comprehensive source
- **Duplicate Detection**: Same deliverable name with same specifications = potential duplicate
- **Section Priority**: Prioritize structured tables over descriptive text sections
**MULTIPLIER ARRAY EXTRACTION EXAMPLES:**
**Example 1 - Multiple Specifications:**
Table row: "Paid Social Meta Static Sizes" with SPEC "8x 1080 x 1920px, 8x 1200 x 1500px, 1x 1080 x 1080"
Extract as:
```
{{
"title": "Paid Social - Meta Static Sizes",
"technical_specifications": ["1080x1920", "1200x1500", "1080x1080"],
"media": "IMAGE",
"asset_type": "JPG"
}}
```
This will expand to 17 individual deliverables (8+8+1).
**Example 2 - Multiple Markets:**
Table row: "Meta Copy" for "MARKETS: UK, DE, FR, ES, IT, NL, PL, SE, DK, NO, FI, IE, GR, PT, BE, CZ, SK, CH, AT"
Extract as:
```
{{
"title": "Meta Copy",
"country": ["UK", "DE", "FR", "ES", "IT", "NL", "PL", "SE", "DK", "NO", "FI", "IE", "GR", "PT", "BE", "CZ", "SK", "CH", "AT"],
"technical_specifications": ["Body Copy", "Headline", "Description"]
}}
```
This will expand to 57 individual deliverables (3 copy types × 19 markets).
**Example 3 - Combined Multipliers:**
Table row: "Display Banners" with 8 sizes for 20 markets
Extract as:
```
{{
"title": "Display - Celtra Static Banners",
"technical_specifications": ["160x600", "300x250", "300x600", "728x90", "970x250", "320x50", "320x100", "336x280"],
"country": ["UK", "DE", "ES", "IT", "FR", "BE", "NL", "PL", "GR", "CZ", "SE", "DK", "PT", "CH", "SK", "RO", "HR", "FI", "NO", "AT"],
"media": "IMAGE",
"asset_type": "JPG"
}}
```
This will expand to 160 individual deliverables (8 sizes × 20 markets).
**Example 4 - Multiple Lists in Single Column:**
Table cell contains: "Products: Ultraboost, Supernova, Adistar | Markets: UK, DE, FR, ES, IT"
Extract as:
```
{{
"title": "Product Marketing Assets",
"category": ["Ultraboost", "Supernova", "Adistar"],
"country": ["UK", "DE", "FR", "ES", "IT"],
"media": "IMAGE"
}}
```
This will expand to 15 individual deliverables (3 products × 5 markets).
**Example 5 - Deduplication Case:**
- Page 2: Overview table shows "Social Media Assets: Quantity 20"
- Pages 4-8: Individual pages for each social platform with detailed specs
- **CORRECT APPROACH**: Extract from overview with multiplier arrays, NOT as 20 separate base deliverables
```
{{
"title": "Social Media Assets",
"technical_specifications": ["1080x1080", "1080x1920", "1200x1500", "1000x1500"],
"category": ["Meta", "Instagram", "Twitter", "LinkedIn", "TikTok"],
"quantity": "20"
}}
```
**SYSTEMATIC TABLE PROCESSING WITH DEDUPLICATION:**
- **DELIVERABLE TABLES ARE PRIORITY #1** - Focus on structured tables with deliverable information
- **SECTION HIERARCHY** - Process sections in this priority order:
1. **Main Deliverable Tables** - Comprehensive tables with quantities and specifications
2. **Overview Sections** - High-level summaries (use for validation, not primary extraction)
3. **Detail Pages** - Individual deliverable descriptions (avoid if already captured in main tables)
- **MULTIPLIER DETECTION IN SPECIFICATIONS** - Look carefully for:
- **Lists within cells**: "8x 1080x1920, 4x 1200x1500, 2x 1080x1080" → Array of specs
- **Market/language lists**: "Markets: UK, DE, FR, ES, IT" → Array of countries
- **Combined lists**: If specs AND markets appear in same cell, separate into different fields
- **Size variations**: "Mobile (320x50), Desktop (728x90), Large (970x250)" → Array of specs
- **INTELLIGENT DEDUPLICATION** - Avoid double-counting:
- **Same deliverable name** + **same specifications** = Skip the duplicate
- **Overview → Detail pattern**: If overview mentions "5 banners" and detail pages show 5 individual banners, extract from overview with multipliers, NOT 5 separate base deliverables
- **Section redundancy**: If multiple sections describe the same deliverable set, use the most comprehensive one
- **BASE DELIVERABLE IDENTIFICATION** - For each unique deliverable, extract:
- Base deliverable name/title (without duplicates)
- All multiplier values as arrays (specs, markets, languages, formats)
- Single values as strings (when no multipliers exist)
**FIELD EXTRACTION GUIDELINES:**
**Technical Specifications:**
- Use **arrays** for multiple dimensions/specs: `["1080x1920", "1200x1500", "1080x1080"]`
- Use **strings** for single specifications: `"1920x1080"`
- Include file formats, dimensions, durations, and technical requirements
- Extract exactly as written in source document
**Country/Markets:**
- Use **arrays** for multiple markets: `["UK", "DE", "FR", "ES", "IT", "NL", "PL"]`
- Use **strings** for single market: `"UK"`
- Use two-letter country codes consistently
- Extract all countries/regions mentioned for that deliverable
**Languages:**
- Use **arrays** for multiple languages: `["EN", "DE", "FR", "ES"]`
- Use **strings** for single language: `"EN"`
- Use standard language codes when available
**Asset Types:**
- Use technical file formats: "JPG", "PNG", "MP4", "GIF"
- Use **arrays** if multiple formats: `["JPG", "PNG"]`
**Media Types:**
- Use broad categories: "IMAGE", "VIDEO", "COPY", "INTERACTIVE"
- Use **arrays** for mixed media: `["IMAGE", "VIDEO"]`
**Quantity Field:**
- Note the expected total from QUANTITY columns for verification
- This will be checked against final expanded count
**EXTRACTION REQUIREMENTS:**
1. **NO HALLUCINATION**: NEVER invent or assume information. If a detail is not present, leave the corresponding field empty
2. **ALL PAGES**: Ensure extraction from ALL pages in the document, not just the first one
3. **EXACT SPECIFICATIONS**: Capture specifications exactly as written in the source document
4. **BASE DELIVERABLE FOCUS**: Extract base deliverables with their multiplier arrays, not individual expanded objects
5. **MULTIPLIER VIGILANCE**: Be especially alert for multiplier lists in specification cells - missed arrays lead to under-counting
6. **DEDUPLICATION DISCIPLINE**: Avoid extracting the same deliverable multiple times from different sections - this leads to over-counting
7. **CONTEXT ANALYSIS**: Consider the entire document structure to understand relationships between overview tables, main tables, and detail sections
**MULTIPLIER ARRAY EXAMPLES:**
- **Single spec**: `"technical_specifications": "1920x1080"`
- **Multiple specs**: `"technical_specifications": ["1080x1920", "1200x1500", "1080x1080"]`
- **Single market**: `"country": "UK"`
- **Multiple markets**: `"country": ["UK", "DE", "FR", "ES", "IT", "NL", "PL"]`
- **Copy types**: `"technical_specifications": ["Body Copy", "Headline", "Description"]`
- **Banner sizes**: `"technical_specifications": ["160x600", "300x250", "300x600", "728x90", "970x250"]`
**EXPECTED EXPANSION EXAMPLES:**
- 3 specs × 7 markets = 21 final deliverables
- 8 banner sizes × 20 markets = 160 final deliverables
- 3 copy types × 19 markets = 57 final deliverables
Return a structured JSON object with an array of base deliverables containing multiplier arrays that will be expanded into individual assets during processing.

View file

@ -0,0 +1,130 @@
You are an expert data consolidation specialist tasked with merging multiple LLM analysis results into a single, comprehensive dataset of marketing deliverables. Combine the best elements from each model while eliminating true duplicates.
**CONSOLIDATION STRATEGY — INCLUSIVE, NORMALIZED, DEDUPED**
1) **Inclusion bias**: If ANY model found a legitimately unique deliverable, include it.
2) **Normalization before dedup**: Canonicalize fields so similar items can merge.
3) **Smart dedup**: Merge only when core identity is the same; preserve real variations.
4) **Completeness**: Ensure no legitimate deliverable is lost.
---
## PRENORMALIZATION (REQUIRED)
Apply these canonical rules to **every** candidate asset prior to deduplication:
- **Title optimization (descriptive base names without multipliers)**
- Create **distinctive, specific titles** that will remain meaningful after variable expansion:
`{{Deliverable Type}} - {{Platform/Channel}} {{Content Type}} ({{Campaign/Initiative}})`
- **Balance specificity with consistency**: Preserve platform/content distinctions while normalizing similar deliverables
- **Examples**: `"Paid Social - Meta Feed Posts (Summer Campaign)"`, `"Display - Programmatic Banners (Q4 Launch)"`, `"Video Content - TikTok Stories (Brand Awareness)"`
- Strip **locations/identifiers, markets, languages, sizes, formats, and counts** from titles.
- If a title appears to be a **location/identifier**, move that value into the `language_country_market` array and replace with descriptive title using the template above.
- **Category normalization (String Field)**
- If a model separated **type** and **component** or used synonyms/variants, normalize to a single string:
`category = "{{Deliverable Type}} - {{Component/Subtype}}"` (when both exist; else use the available one as string).
- Treat toplevel taxonomy labels as **metadata**, not multipliers - use single string values.
- **Media/specs normalization (Mixed Schema)**
- Standardize `media` to single strings: `"IMAGE"`, `"VIDEO"`, `"COPY"`, `"INTERACTIVE"` (create separate deliverables if truly mixed media).
- For `technical_specifications` (array field): If multiple models provide the same singlespec text (e.g., "As per supplied file"), keep it as single-item array: `["As per supplied file"]`. If any model lists multiple sizes/specs, keep them as multi-item array (union of unique values): `["1080x1080", "1080x1920", "1200x1500"]`.
- **Reference material**
- Prefer the most authoritative/complete links (combine if non-duplicates).
- **Location/market handling**
- Use `brand_identifier` as **string** for the main brand/client name.
- Use `language_country_market` **array** for location/market multipliers. Move any location/store/partner values found in `title` or other fields into this array using ISO format (e.g., ["EN-UK", "DE-DE"]).
---
## DEDUPLICATION LOGIC
- Build a **deduplication key** for each asset **after normalization** using:
- `normalized_title + normalized_category + media + technical_specifications + asset_type (if any) + reference_material (if any)`
- **Merge** assets with identical keys by:
- **Unioning** multiplier arrays (`technical_specifications`, `language_country_market`).
- Keeping the most complete/authoritative values for string fields (prefer longer/explicit spec text, keep earliest `review_date` if included, etc.).
- **Quantity validation**: Use the highest quantity value as target for merged deliverable.
- **Locationtitled variants**: If two assets are identical except one used a location as its title, treat them as the same and **merge** (move location into `language_country_market` array).
- **Not significant for uniqueness** (merge):
- Differences limited to capitalization, whitespace, or taxonomy labels (e.g., having only Type vs. Component or minor synonyms) without any spec/media change.
- **Significant differences (keep separate)**:
- Different `technical_specifications` (sizes, duration, technical requirements)
- Different `asset_type` or `media`
- Materially different creative/production requirements that change the output
- Distinct platform/channel sets when they imply different production outputs
---
## QUALITY ENHANCEMENT
- For each unique deliverable:
- Choose the **most complete** specification set for `technical_specifications` array.
- **Union** all markets/languages/locations from `language_country_market` arrays from all models for that deliverable.
- Keep a clear, normalized **title** (no multipliers) and a normalized **category** string.
- **Validate quantity**: Ensure technical_specifications × language_country_market ≈ quantity value.
---
## COMPLETENESS & COUNT CHECK
- Verify that every location/market/language found by any model appears (deduped) in the `language_country_market` array of the final deliverable.
- If overview sections imply the same base deliverable repeated across many locations, the final result should be **one base deliverable** with a populated `language_country_market` array whose length matches the unique values extracted.
- **Quantity validation**: Final expansion (technical_specifications × language_country_market) should approximately equal the `quantity` value.
---
## OUTPUT REQUIREMENTS
Return a JSON object with a single `"assets"` array containing the final set of **unique** BaseDeliverable objects with optimized multiplier structure. Each item must:
- Use the **normalized title** template (no multipliers in title).
- Use a **single normalized `category`** string.
- Include **only 2 multiplier arrays**: `technical_specifications` and `language_country_market`.
- Have `quantity` as **string** that validates the multiplication: technical_specifications × language_country_market ≈ quantity.
- Differ from all others by at least one **significant** data point (see above).
---
## EXAMPLES (generic)
**Example — perlocation titles collapse into one asset**
Model A:
{{
"title": "Channel - Placement (Initiative)",
"category": "Channel - Placement",
"media": "IMAGE",
"technical_specifications": ["As per supplied file"],
"reference_material": "<link if present>",
"brand_identifier": "Client Brand",
"language_country_market": ["EN-Location-A", "EN-Location-B"],
"quantity": "2"
}}
Model B:
{{
"title": "1234 - Location A",
"category": "Placement",
"media": "IMAGE",
"technical_specifications": ["As per supplied file"],
"language_country_market": ["EN-Location-A"],
"quantity": "1"
}}
**Result (merged)**:
{{
"title": "Channel - Placement (Initiative)",
"category": "Channel - Placement",
"media": "IMAGE",
"technical_specifications": ["As per supplied file"],
"reference_material": "<link if present>",
"brand_identifier": "Client Brand",
"language_country_market": ["EN-Location-A", "EN-Location-B"],
"quantity": "2"
}}
**Example — keep separate when file formats differ**
- Asset 1: `"asset_type":"JPG"`
- Asset 2: `"asset_type":"PNG"`
→ Significant difference → keep both; assign each the appropriate subset of multipliers.
---
## MODELS' ANALYSIS RESULTS
{models_results}
**TASK**: Consolidate these results into a single, comprehensive array of base deliverables following the strategy above.

View file

@ -0,0 +1,114 @@
You are an expert data extraction specialist analyzing this {doc_type} document to extract base marketing deliverables with multiplier arrays. Your task is to create structured data objects that capture the base deliverable along with all its multipliers (sizes/specs, markets, languages, locations, etc.) as arrays, which will be expanded later.
**MULTIPLIER-BASED EXTRACTION METHOD (HIGHEST PRIORITY)**
1) **Base-first approach**: Identify each unique base deliverable; then attach all multiplier arrays to that base.
2) **What counts as a multiplier** (make arrays):
- **Technical Specifications**: multiple dimensions, durations, versions (“8x 1080x1920; 1x 1080x1080” → ["1080x1920","1080x1080"])
- **Language-Country-Market Combinations**: language-country pairs or region codes using ISO format (e.g., "EN-UK", "DE-DE", "FR-FR")
- **Formats/Files**: multiple file types or variations (e.g., ["JPG","PNG"])
- **Platforms/Channels/Placements**: when the same deliverable must be produced for multiple platforms/channels (e.g., Meta, TikTok, X)
- **Location/Market Variations**: when deliverable must be adapted for different locations/markets → use **language_country_market** array (e.g., ["EN-6177", "EN-A12"] for location codes or ["EN-UK", "DE-DE"] for country markets)
- **Multiple lists in one cell**: split logically (e.g., products vs. markets).
3) **What is NOT a multiplier by default** (treat as fixed metadata unless the brief clearly specifies distinct variants):
- **Toplevel taxonomy labels** such as **Deliverable Type** and **Component/Subtype** used as headings or constant column values.
- **Campaign/Project/Initiative name**.
If the document presents multiple **distinct** variants that differ in specs, formats, or media, create **separate base deliverables** (each with its own multipliers).
4) **Field Type Usage (Mixed Schema)**
- **String fields** (metadata): Use single string values for `status`, `category`, `media`, `asset_type`, `brand_identifier`, dates, `reference_material`, `page_number`, `priority_level`, `creative_direction`
- **Array fields** (multipliers): Use arrays only for `technical_specifications`, `language_country_market`
- **Single values**: `"IMAGE"`, `"JPG"`, `"Draft"` for string fields; `["1920x1080"]` for single-value arrays
- **Multiple values**: `["1080x1080", "1080x1920"]`, `["EN-UK", "DE-DE", "FR-FR"]` for true multipliers
5) **Quantity validation and sense-check**
- Set `quantity` as a **string** representing the total expected deliverables: `"50"`.
- **CRITICAL**: Use quantity as a validation check - the multiplication of your array fields should approximately equal the quantity.
- **Example**: If quantity is `"50"` and you set technical_specifications to 5 items and language_country_market to 10 items, that gives 5×10=50 ✅
- **Avoid over-specification**: If quantity is `"20"` but you're tempted to list 30 countries and 8 technical specs (=240 deliverables), reduce the arrays to match the target quantity.
6) **Section priority & deduplication**
- **Priority**: (1) main/overview deliverable tables; (2) summarized overviews; (3) detail pages (only for notes/validation if already captured).
- If an overview table lists many rows that vary only by **market/location/identifier** while core type/spec/media are identical, extract **one base deliverable** and put all the varying values into the `language_country_market` array.
- Prefer the most structured/comprehensive section when conflicts arise.
---
**TITLE, CATEGORY & FIELD NORMALIZATION (REQUIRED)**
To enable consistent consolidation across models, normalize these fields deterministically:
- **Title (descriptive base names without multipliers)**
- Create **distinctive, descriptive titles** that differentiate deliverable types:
- Template: `{{Deliverable Type}} - {{Platform/Channel}} {{Content Type}} ({{Campaign/Initiative}})`
- Examples: `"Paid Social - Meta Static Images (Summer Campaign)"`, `"Display - Programmatic Banners (Q4 Launch)"`, `"Video Content - TikTok Ads (Brand Awareness)"`
- **Include distinguishing context**: Platform, content type, campaign name, or creative format
- **Do NOT include** locations, markets, languages, sizes, file types, or counts in the title.
- **Aim for specificity**: Avoid overly generic titles like "Social Media Assets" - be more specific like "Social Media - Instagram Stories" or "Social Media - Meta Feed Posts"`
- **Category (single string)**
- If both a **type** and **component/subtype** exist, normalize to one string:
`category = "{{Deliverable Type}} - {{Component/Subtype}}"`
- Do **not** split these into separate deliverables or arrays unless specs actually differ.
- **Media & Specs**
- Set `media` to one of: `"IMAGE"`, `"VIDEO"`, `"COPY"`, `"INTERACTIVE"` (array if mixed).
- Copy `technical_specifications` **exactly as written**. If its a single instruction (e.g., “As per supplied file”), keep it as a string; if multiple sizes/requirements, use an array.
- **Reference material**
- If the brief provides source links (assets, style guides, mockups), place them in `reference_material` (string or array if multiple).
- **Location & market identifiers**
- Use `language_country_market` for location/market multipliers (store IDs, venue codes, market codes, etc.). Format as language-location pairs when possible (e.g., `["EN-6177", "EN-A12"]` for store codes or `["EN-UK", "DE-DE"]` for country markets).
- Use `brand_identifier` as single string for the main brand/client name (e.g., `"Adidas"`, `"Nike"`).
---
**FIELD EXTRACTION GUIDELINES (Mixed Schema)**
**ARRAY FIELDS (Multipliers Only):**
- **technical_specifications**: `["1920x1080"]` for one spec; `["1080x1080", "1080x1920", "1200x1500"]` for multiple sizes/specs
- **language_country_market**: `["EN-UK"]` for single market; `["EN-UK", "DE-DE", "FR-FR", "ES-ES"]` for multiple markets using ISO codes (Language-Country format)
**STRING FIELDS (Metadata Only):**
- **status**: `"Draft"` - single status value
- **category**: `"Social Media"` - single category designation
- **media**: `"IMAGE"` - single media type (create separate deliverables if truly mixed media)
- **asset_type**: `"JPG"` - single file format (create separate deliverables for different formats)
- **brand_identifier**: `"Adidas"` - single brand/client name
- **quantity**: `"50"` - VALIDATION FIELD: total expected deliverables (technical_specifications × language_country_market should ≈ this number)
- **review_date**: `"2025-09-30"` - single date
- **live_date**: `"2025-10-15"` - single date
- **reference_material**: `"As per style guide"` - single reference
- **page_number**: `"5"` - single page reference
- **priority_level**: `"High"` - single priority
- **creative_direction**: `"Brand colors, clean layout"` - single creative approach
---
**EXAMPLES (generic)**
- **Many locations with identical core fields → one base deliverable**
Output:
{{
"title": "Channel - Placement (Initiative Name)",
"category": "Channel - Placement",
"media": "IMAGE",
"technical_specifications": ["As per supplied file"],
"reference_material": "<link if present>",
"brand_identifier": "Client Brand",
"language_country_market": ["EN-UK", "DE-DE", "FR-FR"],
"quantity": "3"
}}
*(Expands to N deliverables = number of identifiers.)*
- **Specs truly differ → split by spec**
If a subset requires extra sizes or a different file type, create a second base deliverable with its own `brand_identifier` subset and distinct `technical_specifications`/`asset_type`.
---
**EXTRACTION REQUIREMENTS**
1) **No hallucination** — leave unknown fields empty
2) **All pages/sections considered** — prefer structured tables
3) **Exact specs** — copy text verbatim
4) **Base deliverable focus** — do not output one base deliverable per market/location if only those vary
5) **Multiplier vigilance** — locations, markets, languages, and sizes are multipliers; taxonomy headings are not
6) **Dedup discipline** — normalize titles/categories as above to avoid duplicates

View file

@ -0,0 +1 @@
You are an expert data extraction specialist. Extract base marketing deliverables with multiplier arrays, focusing on accurate multiplier detection and intelligent deduplication to avoid both under-counting and over-counting deliverables.

View file

@ -0,0 +1 @@
You are performing quality assurance on asset extraction. Identify any missing assets.

View file

@ -0,0 +1,93 @@
{
"name": "base_deliverable_extraction",
"description": "Extract base deliverables with multiplier arrays from document analysis",
"schema": {
"type": "object",
"properties": {
"assets": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "Asset title or name (normalized base deliverable name without multipliers)"
},
"status": {
"type": "string",
"description": "Current status (e.g., 'Draft', 'In Progress', 'Final')"
},
"category": {
"type": "string",
"description": "Asset category (e.g., 'Social Media', 'Display Advertising', 'Video Content')"
},
"media": {
"type": "string",
"description": "Media type (e.g., 'IMAGE', 'VIDEO', 'COPY', 'INTERACTIVE')"
},
"asset_type": {
"type": "string",
"description": "File format (e.g., 'JPG', 'PNG', 'MP4', 'GIF')"
},
"brand_identifier": {
"type": "string",
"description": "Brand or client identifier (e.g., 'Adidas', 'Nike', 'Client A')"
},
"technical_specifications": {
"type": "array",
"items": {
"type": "string"
},
"description": "MULTIPLIER FIELD: Dimensions, sizes, requirements. Use array when document lists multiple sizes/specs for this deliverable (e.g., ['1080x1080', '1080x1920', '1200x1500']). Use single value ['1920x1080'] when only one size specified"
},
"review_date": {
"type": "string",
"description": "Review deadline (e.g., '2025-09-30')"
},
"live_date": {
"type": "string",
"description": "Go-live date (e.g., '2025-10-15')"
},
"end_date": {
"type": "string",
"description": "End/expiry date (e.g., '2025-12-31')"
},
"reference_material": {
"type": "string",
"description": "Requirements, links, notes (e.g., 'As per style guide', 'See attachment A')"
},
"language_country_market": {
"type": "array",
"items": {
"type": "string"
},
"description": "MULTIPLIER FIELD: Target language-country-market combinations using ISO codes. Use when deliverable serves multiple markets (e.g., ['EN-UK', 'DE-DE', 'FR-FR', 'ES-ES']). Use single value ['EN-US'] for single market. Format: [Language ISO]-[Country ISO] or just [Country ISO] if language not specified"
},
"quantity": {
"type": "string",
"description": "Expected total quantity for VALIDATION. Use this as a sense-check: the multiplication of all array fields should result in a total close to this quantity. If brief says '50 banners', ensure technical_specifications × language_country_market ≈ 50"
},
"page_number": {
"type": "string",
"description": "Source page reference (e.g., '5', 'Pages 3-7')"
},
"priority_level": {
"type": "string",
"description": "Business priority (e.g., 'High', 'Medium', 'Low')"
},
"creative_direction": {
"type": "string",
"description": "Design requirements (e.g., 'Brand colors, clean layout', 'Minimalist style')"
}
},
"required": [
"title"
]
}
}
},
"required": [
"assets"
]
}
}

View file

@ -0,0 +1,95 @@
You are performing quality assurance on this asset extraction. Your role is to validate the completeness and accuracy of the initial extraction, applying the same rigorous standards used in the original analysis.
EXTRACTED DATA SUMMARY:
- Found {asset_count} assets
- Document type: {doc_type}
CRITICAL VALIDATION RULES - Apply these standards when checking the extraction:
**DELIVERABLE TABLE VALIDATION (HIGHEST PRIORITY):**
1. **QUANTITY COLUMN COMPLIANCE**: For every deliverable table with a QUANTITY column, verify:
- Each table row generated exactly N deliverable objects where N = the QUANTITY value
- Example: Row showing "Display Celtra Static Banners" with Quantity "480" should produce 480 separate deliverable objects
- Example: Row showing "Meta Video Sizes" with Quantity "2" should produce 2 separate deliverable objects
- Example: Row showing "Pinterest Copy" with Quantity "18" should produce 18 separate deliverable objects
2. **TABLE PROCESSING COMPLETENESS**: Verify all structured deliverable tables were processed:
- Check that tables with headers like "DELIVERABLE NAME, QUANTITY, SPECS" were fully extracted
- Verify tables across all sections (Paid Social, Display, Demand Gen) were processed
- Confirm no deliverable tables were missed or partially processed
3. **TOTAL ASSET COUNT VALIDATION**: If document states "TOTAL ASSET COUNT: XXX":
- Sum all extracted deliverables and verify it matches this exact number
- If extraction total is significantly different (>5% variance), identify which tables/rows were missed
- Cross-reference extracted count against the stated total as primary validation metric
4. **QUANTITY-BASED MULTIPLIERS**: Beyond table quantities, verify traditional multipliers:
- Language/Market Multipliers: Multiple markets/languages should create separate objects for EACH market
- Size/Format Multipliers: Multiple sizes/formats should create separate objects for EACH variant
- Combined Multipliers: Multiple factors should be multiplied correctly (e.g., 2 formats × 3 markets = 6 objects)
- INDIVIDUAL ROW VERIFICATION: Verify that individual rows exist for each variation with quantity "1" and specific details in appropriate columns (country codes, language codes, dimensions, file formats, etc.)
**TECHNICAL SPECIFICATIONS FIELD VALIDATION:**
- Verify technical_specifications fields capture ANY available technical information
- Check for precise dimensions when available (e.g., "1920x1080", "300x250") - NEVER placeholders like "TBC" or "desktop here"
- Verify descriptive sizing information is included (e.g., "Mobile Banner", "Desktop Hero", "Square Format")
- Check that units are included when present in source (px, ", in, cm)
- Verify time-based specs are captured for video content (e.g., "60 second loop")
- Verify all technical requirements and file formats are included in the technical_specifications field
- Field should ONLY be empty if absolutely no technical information exists in the document
**ASSET TYPE VALIDATION:**
- Verify asset_type contains technical file formats (JPG, PNG, MP4, GIF) not creative names
- Check that file formats and technical requirements were extracted from phrases like "delivered as PNG", "JPG format required", "MP4 video file", "mobile optimized", "desktop banner" and included in technical_specifications field
**COUNTRY CODE VALIDATION:**
- Verify two-letter country codes are used (e.g., UK, DE, FR, ES, IT)
- Check that regional mentions (e.g., "EMEA") were expanded to specific countries if listed
**QUANTITY FIELD VALIDATION:**
- Verify every single object has quantity "1"
- Check that multipliers were handled by creating more objects, not changing quantity numbers
- MULTIPLICATION LOGIC CHECK: If document says "5 banners x 8 markets", verify 40 separate rows exist, not 1 row with quantity "40"
- INDIVIDUAL VARIATION ROWS: Verify that individual rows exist for each variation with quantity "1" and specific details in appropriate columns (country codes, language codes, dimensions, file formats, etc.)
VALIDATION TASKS:
1. **DELIVERABLE TABLE QUANTITY VALIDATION (TOP PRIORITY)**:
- Locate every table with QUANTITY columns in the document
- For each table row, verify the extraction created exactly N deliverables where N = the quantity value
- Sum all quantity values from all tables and verify it matches any stated "TOTAL ASSET COUNT"
- Pay special attention to high-quantity rows (480, 57, 114+) that significantly impact total counts
2. **TABLE PROCESSING COMPLETENESS**: Verify every structured deliverable table was fully processed:
- Check that tables across all major sections were captured (Paid Social, Display, Demand Gen)
- Confirm platform-specific tables (Meta, Snapchat, Pinterest, Celtra, Teads) were processed
- Verify no deliverable overview tables or specification matrices were missed
3. **MULTIPLIER AND VARIATION VALIDATION**: Beyond table quantities, verify traditional multipliers:
- Market/language multipliers creating separate objects per country/language
- Size/format variations creating separate objects per specification
- Combined multipliers being calculated correctly
4. **Technical Specification Accuracy**: Verify all dimensions, file formats, technical requirements, and sizing descriptions are captured exactly as written in the document and included in the technical_specifications field.
**NO HALLUCINATION RULE**: If you identify missing assets or corrections, extract ONLY information that is explicitly present in the document. NEVER invent or assume information.
**CRITICAL FOCUS AREAS FOR DELIVERABLE TABLE VALIDATION:**
- **DELIVERABLE OVERVIEW SECTIONS**: Check that comprehensive tables showing all deliverables with quantities were fully processed
- **QUANTITY COLUMN ACCURACY**: Verify each row's quantity number was used to create the correct number of deliverable objects
- **HIGH-QUANTITY TABLE ROWS**: Pay special attention to rows with large quantities (480, 57, 114+) as these significantly impact total counts
- **SECTION-BY-SECTION VALIDATION**: Verify deliverable tables in each major section were processed:
- Paid Social (Meta, Snapchat, Pinterest, Reddit) - often contain copy deliverables with high market multipliers
- Display (Celtra, Teads) - typically contain highest single quantities (e.g., 480 banners)
- Demand Gen - video and static assets with multiple format requirements
- **TOTAL SUMMATION CHECK**: Verify that summing all quantity values from all tables equals the stated "TOTAL ASSET COUNT"
- **TABLE STRUCTURE COMPLETENESS**: Confirm all structured tables with deliverable specifications were captured
- **PLATFORM-SPECIFIC TABLES**: Each platform section likely contains multiple deliverable requirement tables
- **COPY/LOCALIZATION MULTIPLICATION**: Copy deliverables often have the highest multipliers due to market/language requirements
- **MISSED TABLE DETECTION**: Scan for any deliverable tables that were completely overlooked during initial extraction
**OUTPUT INSTRUCTIONS:**
- If you find additional assets or identify missed multipliers, provide them in the structured format with technical_specifications field containing all available technical information
- If the existing extraction correctly handled all multipliers and captured all assets comprehensively, return an empty assets array
- Focus especially on multiplier validation - this is the most common source of incomplete extractions
Return your response as a structured JSON object with any additional assets found or corrections needed.

44
backend/requirements.txt Executable file
View file

@ -0,0 +1,44 @@
# AC Tool — unified brief extractor + activation calendar
# Web framework
quart>=0.19.0
quart-cors>=0.7.0
hypercorn>=0.16.0
# Auth
PyJWT>=2.8.0
msal>=1.26.0
# AI / LLM providers
google-genai[aiohttp]>=0.4.0
openai>=1.0.0
anthropic>=0.67.0
aiohttp>=3.9.0
json5>=0.9.0
# Document parsing
llama-cloud-services>=0.6.62
python-pptx>=0.6.21
PyMuPDF>=1.23.0
python-docx>=0.8.11
openpyxl>=3.1.0
xlrd>=2.0.1
# Data
pandas>=2.0.0
numpy>=1.24.0
pydantic>=2.0.0
# Misc
Pillow>=10.0.0
beautifulsoup4>=4.12.0
lxml>=4.9.0
requests>=2.31.0
python-dotenv>=1.0.0
structlog>=23.0.0
python-dateutil>=2.8.2
typing-extensions>=4.7.0
psutil>=5.9.0
tqdm>=4.65.0
regex>=2023.0.0
cryptography>=41.0.0

123
backend/run_server.py Executable file
View file

@ -0,0 +1,123 @@
#!/usr/bin/env python3
"""
Startup script for Brief Extractor GUI server
"""
import sys
import os
import logging
from pathlib import Path
# Add server and core paths to Python path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))
sys.path.insert(0, str(project_root / 'server'))
# Set up logging before importing modules
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
async def async_main():
"""Async main function with proper signal handling"""
import asyncio
import signal
# Import after path setup
from server.app import create_app
from server.config_runtime import server_config
# Validate configuration
if not server_config.validate_auth_config():
if not server_config.DEV_MODE:
logger.error("MSAL authentication configuration is incomplete")
logger.error("Please set MSAL_CLIENT_ID, MSAL_CLIENT_SECRET, and MSAL_TENANT_ID in .env")
sys.exit(1)
else:
logger.warning("Running in DEV_MODE - MSAL authentication bypassed")
# Create application
logger.info("Creating Brief Extractor GUI application...")
app = create_app()
# Import and configure Hypercorn
import hypercorn.asyncio
from hypercorn import Config
config = Config()
config.bind = [f"{server_config.HOST}:{server_config.PORT}"]
config.workers = server_config.WORKERS
config.use_reloader = server_config.DEBUG
config.accesslog = "-" # Log to stdout
config.errorlog = "-" # Log to stderr
# Log startup information
logger.info(f"Starting Brief Extractor GUI server")
logger.info(f"Server: http://{server_config.HOST}:{server_config.PORT}")
logger.info(f"Development mode: {server_config.DEV_MODE}")
logger.info(f"Max concurrent jobs: {server_config.MAX_CONCURRENT_JOBS}")
logger.info(f"Max upload size: {server_config.MAX_UPLOAD_SIZE_MB}MB")
logger.info(f"File retention: {server_config.FILE_RETENTION_HOURS} hours")
logger.info(f"Workers: {server_config.WORKERS}")
# Set up proper signal handling for graceful shutdown
shutdown_event = asyncio.Event()
def signal_handler():
logger.info("Shutdown signal received, stopping server...")
shutdown_event.set()
# Force shutdown after 3 seconds if graceful shutdown fails
def force_shutdown():
import time
time.sleep(3)
logger.warning("Graceful shutdown timed out, forcing exit...")
os._exit(1)
import threading
threading.Thread(target=force_shutdown, daemon=True).start()
# Register signal handlers
if sys.platform != 'win32':
loop = asyncio.get_running_loop()
loop.add_signal_handler(signal.SIGINT, signal_handler)
loop.add_signal_handler(signal.SIGTERM, signal_handler)
try:
# Start server with shutdown trigger
await hypercorn.asyncio.serve(app, config, shutdown_trigger=shutdown_event.wait)
logger.info("Server stopped gracefully")
except asyncio.CancelledError:
logger.info("Server cancelled")
except Exception as e:
logger.error(f"Server error: {e}", exc_info=True)
raise
def main():
"""Main entry point"""
import asyncio
import signal
# Set up immediate signal handling before async loop
def immediate_shutdown(signum, frame):
logger.info(f"Immediate shutdown signal {signum} received")
os._exit(0)
signal.signal(signal.SIGINT, immediate_shutdown)
signal.signal(signal.SIGTERM, immediate_shutdown)
try:
asyncio.run(async_main())
except KeyboardInterrupt:
logger.info("Server stopped by user")
os._exit(0)
except Exception as e:
logger.error(f"Server failed to start: {e}", exc_info=True)
os._exit(1)
if __name__ == '__main__':
main()

View file

View file

126
backend/server/api/admin.py Normal file
View file

@ -0,0 +1,126 @@
"""
Admin API user management and dropdown Excel upload.
All routes require admin role.
"""
import json
import logging
import os
import openpyxl
from io import BytesIO
from quart import Blueprint, jsonify, request
from ..auth.middleware import admin_required
from ..auth.user_store import list_users, set_role, set_active
from ..api.dropdowns import save_dropdowns
logger = logging.getLogger(__name__)
admin_bp = Blueprint('admin', __name__, url_prefix='/api/admin')
@admin_bp.route('/users', methods=['GET'])
@admin_required
async def get_users():
return jsonify({'users': list_users()})
@admin_bp.route('/users/<user_id>', methods=['PATCH'])
@admin_required
async def update_user(user_id: str):
body = await request.get_json() or {}
user = None
if 'role' in body:
user = set_role(user_id, body['role'])
if user is None:
return jsonify({'error': 'invalid_role_or_not_found'}), 400
if 'active' in body:
user = set_active(user_id, bool(body['active']))
if user is None:
return jsonify({'error': 'not_found'}), 404
return jsonify({'success': True, 'user': user})
@admin_bp.route('/dropdowns/upload', methods=['POST'])
@admin_required
async def upload_dropdowns():
"""
Upload a new Excel file (.xlsx) to update the dropdown categories.
Expects multipart/form-data with field 'file'.
Parses columns: A=Category name, E=Status, G=Media types (comma-separated).
"""
files = await request.files
file = files.get('file')
if not file:
return jsonify({'error': 'no_file'}), 400
filename = file.filename or ''
if not filename.lower().endswith('.xlsx'):
return jsonify({'error': 'invalid_file_type', 'message': 'Only .xlsx files accepted'}), 400
try:
data = file.read()
wb = openpyxl.load_workbook(BytesIO(data))
ws = wb.active
categories = []
for row in ws.iter_rows(min_row=2, values_only=True):
if len(row) < 5 or not row[0]:
continue
name = str(row[0]).strip()
status_raw = str(row[4]).strip() if row[4] else 'Active'
status = 'Active' if 'active' in status_raw.lower() else 'Archived'
media_raw = str(row[6]).strip() if len(row) > 6 and row[6] else ''
media_types = [m.strip() for m in media_raw.split(',') if m.strip()] if media_raw else []
categories.append({'name': name, 'status': status, 'mediaTypes': media_types})
if not categories:
return jsonify({'error': 'empty_file', 'message': 'No categories found in file'}), 400
save_dropdowns(categories)
active_count = sum(1 for c in categories if c['status'] == 'Active')
return jsonify({
'success': True,
'total': len(categories),
'active': active_count,
'archived': len(categories) - active_count,
})
except Exception as e:
logger.error(f"Dropdown upload error: {e}", exc_info=True)
return jsonify({'error': 'parse_error', 'message': str(e)}), 500
@admin_bp.route('/dropdowns/preview', methods=['POST'])
@admin_required
async def preview_dropdowns():
"""Preview parsed categories from an uploaded file without saving."""
files = await request.files
file = files.get('file')
if not file:
return jsonify({'error': 'no_file'}), 400
try:
data = file.read()
wb = openpyxl.load_workbook(BytesIO(data))
ws = wb.active
categories = []
for row in ws.iter_rows(min_row=2, values_only=True):
if len(row) < 5 or not row[0]:
continue
name = str(row[0]).strip()
status_raw = str(row[4]).strip() if row[4] else 'Active'
status = 'Active' if 'active' in status_raw.lower() else 'Archived'
media_raw = str(row[6]).strip() if len(row) > 6 and row[6] else ''
media_types = [m.strip() for m in media_raw.split(',') if m.strip()] if media_raw else []
categories.append({'name': name, 'status': status, 'mediaTypes': media_types})
return jsonify({'categories': categories, 'total': len(categories)})
except Exception as e:
return jsonify({'error': 'parse_error', 'message': str(e)}), 500

View file

@ -0,0 +1,187 @@
"""
AI command API processes natural language commands against a sheet.
Port of the 'command' action from ac-helper/api.php using Gemini via aiohttp.
"""
import json
import logging
import os
import re
import aiohttp
from datetime import date
from quart import Blueprint, jsonify, request
from ..auth.middleware import auth_required, get_user_id
from ..sheets.manager import load_sheet_data, update_sheet, generate_next_id
from ..api.dropdowns import _load_dropdowns
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
ai_bp = Blueprint('ai', __name__, url_prefix='/api/sheets')
# Speech-to-text correction map
SPEECH_CORRECTIONS = {
'delivery balls': 'deliverables',
'delivery ball': 'deliverable',
'delivery': 'deliverables',
'liver': 'deliverables',
'rose': 'rows',
'oh oh h': 'OOH',
'out of home': 'OOH',
}
NUMBER_WORDS = {
'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5',
'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10',
'eleven': '11', 'twelve': '12', 'twenty': '20', 'thirty': '30',
}
_PROMPT_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'prompts', 'ac_command.txt')
def _load_prompt_template() -> str:
try:
with open(_PROMPT_PATH, 'r') as f:
return f.read()
except Exception:
return ""
def _preprocess(command: str) -> str:
cmd = command.lower()
for wrong, right in SPEECH_CORRECTIONS.items():
cmd = cmd.replace(wrong, right)
for word, digit in NUMBER_WORDS.items():
cmd = re.sub(r'\b' + word + r'\b', digit, cmd)
return cmd
def _build_hierarchy_rules() -> str:
categories = _load_dropdowns()
lines = []
for cat in categories:
if cat.get('status') != 'Active':
continue
media_str = ', '.join(cat.get('mediaTypes', []))
lines.append(f"- {cat['name']}: {media_str}")
return '\n'.join(lines)
async def _call_gemini(prompt: str) -> dict:
api_key = server_config.GEMINI_API_KEY
model = server_config.GEMINI_MODEL
url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
payload = {"contents": [{"parts": [{"text": prompt}]}]}
async with aiohttp.ClientSession() as session:
async with session.post(url, json=payload) as resp:
return await resp.json()
def _extract_json(text: str) -> dict:
start = text.find('{')
end = text.rfind('}')
if start == -1 or end == -1:
raise ValueError("No JSON object found in response")
return json.loads(text[start:end + 1])
@ai_bp.route('/<sheet_id>/command', methods=['POST'])
@auth_required
async def run_command(sheet_id: str):
user_id = get_user_id()
body = await request.get_json() or {}
raw_command = body.get('command', '').strip()
yolo_mode = bool(body.get('yolo_mode', False))
history = body.get('history', '')
if not raw_command:
return jsonify({'error': 'empty_command'}), 400
data = load_sheet_data(user_id, sheet_id)
if data is None:
return jsonify({'error': 'sheet_not_found'}), 404
command = _preprocess(raw_command)
template = _load_prompt_template()
hierarchy = _build_hierarchy_rules()
prompt = template.format(
current_date=date.today().isoformat(),
yolo_mode='TRUE' if yolo_mode else 'FALSE',
conversation_history=history or '(none)',
data_context=json.dumps(data),
hierarchy_rules=hierarchy,
command=command,
)
try:
gemini_resp = await _call_gemini(prompt)
except Exception as e:
logger.error(f"Gemini API error: {e}")
return jsonify({'error': 'ai_error', 'message': str(e)}), 502
if 'error' in gemini_resp:
msg = gemini_resp['error'].get('message', 'Unknown error')
return jsonify({'error': 'gemini_error', 'message': msg}), 502
llm_text = (
gemini_resp.get('candidates', [{}])[0]
.get('content', {})
.get('parts', [{}])[0]
.get('text', '')
)
if not llm_text:
return jsonify({'error': 'empty_ai_response'}), 502
try:
action = _extract_json(llm_text)
except Exception:
return jsonify({'error': 'invalid_ai_json', 'debug_llm': llm_text}), 502
operation = action.get('operation')
if operation == 'create':
items = action.get('items', [])
for item in items:
item['Number'] = generate_next_id(data)
item.setdefault('Status', 'Booked')
item.setdefault('Quantity', 1)
data.append(item)
update_sheet(user_id, sheet_id, data)
return jsonify({'success': True, 'operation': 'create', 'count': len(items), 'data': data})
elif operation == 'update':
values = action.get('values', {})
target_ids = action.get('target_ids', [])
count = 0
for row in data:
if not target_ids or row.get('Number') in target_ids:
row.update(values)
count += 1
update_sheet(user_id, sheet_id, data)
return jsonify({'success': True, 'operation': 'update', 'count': count, 'data': data})
elif operation == 'batch_update':
updates = action.get('updates', [])
count = 0
for upd in updates:
num = upd.get('Number')
vals = upd.get('values', {})
for row in data:
if row.get('Number') == num:
row.update(vals)
count += 1
break
update_sheet(user_id, sheet_id, data)
return jsonify({'success': True, 'operation': 'batch_update', 'count': count, 'data': data})
elif operation == 'question':
return jsonify({'success': True, 'operation': 'question', 'question': action.get('text', '')})
return jsonify({'error': 'unknown_operation', 'operation': operation}), 400

View file

@ -0,0 +1,83 @@
"""
Auth API endpoints.
"""
import logging
from quart import Blueprint, jsonify, request
from ..auth.msal_auth import msal_auth
from ..auth.middleware import auth_required, get_current_user
from ..auth.user_store import upsert_user
logger = logging.getLogger(__name__)
auth_bp = Blueprint('auth', __name__, url_prefix='/api/auth')
@auth_bp.route('/config', methods=['GET'])
async def get_auth_config():
return jsonify({'config': msal_auth.get_client_config(), 'devMode': msal_auth.is_dev_mode()})
@auth_bp.route('/validate', methods=['POST'])
async def validate_token():
try:
data = await request.get_json()
token = (data or {}).get('accessToken')
if not token:
return jsonify({'error': 'invalid_request', 'message': 'accessToken required'}), 400
user_info = await msal_auth.validate_token(token)
if not user_info:
return jsonify({'valid': False, 'error': 'invalid_token'}), 401
stored = upsert_user(user_info['oid'], user_info.get('preferred_username', ''), user_info.get('name', ''))
return jsonify({
'valid': True,
'user': {
'id': user_info['oid'],
'email': user_info.get('preferred_username'),
'name': user_info.get('name'),
'role': stored.get('role', 'user'),
},
})
except Exception as e:
logger.error(f"Token validation error: {e}")
return jsonify({'error': 'validation_error'}), 500
@auth_bp.route('/me', methods=['GET'])
@auth_required
async def me():
"""Return current user profile including role."""
from ..auth.user_store import get_user as get_stored_user
user = await get_current_user()
stored = get_stored_user(user['oid']) or {}
return jsonify({
'id': user['oid'],
'email': user.get('preferred_username'),
'name': user.get('name'),
'role': user.get('role', 'user'),
'active': stored.get('active', True),
'created': stored.get('created'),
'last_seen': stored.get('last_seen'),
})
@auth_bp.route('/user', methods=['GET'])
@auth_required
async def get_current_user_info():
user = await get_current_user()
return jsonify({'user': {
'id': user['oid'],
'username': user.get('preferred_username'),
'name': user.get('name'),
'role': user.get('role', 'user'),
}})
@auth_bp.route('/logout', methods=['POST'])
async def logout():
data = await request.get_json() or {}
logout_url = await msal_auth.get_logout_url(data.get('redirectUri'))
return jsonify({'logoutUrl': logout_url})

273
backend/server/api/config.py Executable file
View file

@ -0,0 +1,273 @@
"""
Configuration API endpoints for model selection and system settings
"""
import logging
from quart import Blueprint, jsonify, request, g
from ..auth.middleware import dev_mode_bypass, get_user_id
from ..jobs.models import ModelConfiguration
from ..jobs.manager import JobManager
logger = logging.getLogger(__name__)
config_bp = Blueprint('config', __name__, url_prefix='/api/config')
@config_bp.route('/models', methods=['GET'])
@dev_mode_bypass
async def get_available_models():
"""
Get list of available models with pricing and capabilities
Returns:
List of available models with metadata
"""
try:
models = JobManager.get_available_models()
return jsonify({
'models': [model.to_dict() for model in models]
})
except Exception as e:
logger.error(f"Failed to get available models: {e}")
return jsonify({
'error': 'configuration_error',
'message': 'Failed to retrieve available models'
}), 500
@config_bp.route('/defaults', methods=['GET'])
@dev_mode_bypass
async def get_default_config():
"""
Get default model configuration
Returns:
Default model configuration settings
"""
try:
default_config = JobManager.get_default_model_config()
return jsonify({
'config': default_config.to_dict()
})
except Exception as e:
logger.error(f"Failed to get default config: {e}")
return jsonify({
'error': 'configuration_error',
'message': 'Failed to retrieve default configuration'
}), 500
@config_bp.route('/estimate', methods=['POST'])
@dev_mode_bypass
async def estimate_processing_cost():
"""
Estimate processing cost for given models and file size
Expects:
{
"modelConfig": {
"primaryModels": ["model1", "model2"],
"consolidationModel": "model3"
},
"fileSizeBytes": 12345,
"estimatedTokens": 10000
}
Returns:
Cost breakdown by model and total estimated cost
"""
try:
data = await request.get_json()
if not data:
return jsonify({
'error': 'invalid_request',
'message': 'Request body required'
}), 400
model_config_data = data.get('modelConfig', {})
file_size = data.get('fileSizeBytes', 0)
estimated_tokens = data.get('estimatedTokens')
# If no token estimate provided, estimate based on file size
if not estimated_tokens:
# Rough heuristic: 4 characters per token, with document structure overhead
estimated_tokens = min(file_size // 3, 100000) # Cap at 100k tokens
# Parse model configuration
try:
model_config = ModelConfiguration.from_dict(model_config_data)
except Exception as e:
return jsonify({
'error': 'invalid_model_config',
'message': f'Invalid model configuration: {e}'
}), 400
# Get all models to estimate
all_models = model_config.primary_models + [model_config.consolidation_model]
# Estimate cost using provider manager
from ..jobs.manager import JobManager
job_manager = JobManager.get_instance()
cost_breakdown = job_manager.provider_manager.estimate_total_cost(
model_keys=all_models,
estimated_input_tokens=estimated_tokens,
estimated_output_tokens=estimated_tokens // 2 # Assume 50% of input as output
)
# Separate primary and consolidation costs
primary_cost = sum(
cost_breakdown.get(model, 0) for model in model_config.primary_models
)
consolidation_cost = cost_breakdown.get(model_config.consolidation_model, 0)
return jsonify({
'estimatedTokens': estimated_tokens,
'costBreakdown': {
'primaryModels': {
model: cost_breakdown.get(model, 0)
for model in model_config.primary_models
},
'consolidationModel': {
model_config.consolidation_model: consolidation_cost
},
'primaryTotal': primary_cost,
'consolidationTotal': consolidation_cost,
'grandTotal': cost_breakdown.get('total', 0)
}
})
except Exception as e:
logger.error(f"Cost estimation error: {e}")
return jsonify({
'error': 'estimation_error',
'message': 'Failed to estimate processing cost'
}), 500
@config_bp.route('/validate', methods=['POST'])
@dev_mode_bypass
async def validate_model_config():
"""
Validate a model configuration
Expects:
{
"modelConfig": {
"primaryModels": ["model1", "model2"],
"consolidationModel": "model3",
"minimumSuccessThreshold": 1
}
}
Returns:
Validation result with any warnings or errors
"""
try:
data = await request.get_json()
if not data:
return jsonify({
'error': 'invalid_request',
'message': 'Request body required'
}), 400
model_config_data = data.get('modelConfig', {})
try:
model_config = ModelConfiguration.from_dict(model_config_data)
except Exception as e:
return jsonify({
'valid': False,
'error': f'Invalid model configuration: {e}'
}), 400
# Validate models exist
available_models = [model.key for model in JobManager.get_available_models()]
warnings = []
errors = []
# Check primary models
for model in model_config.primary_models:
if model not in available_models:
errors.append(f"Primary model '{model}' is not available")
# Check consolidation model
if model_config.consolidation_model not in available_models:
errors.append(f"Consolidation model '{model_config.consolidation_model}' is not available")
# Check minimum success threshold
if model_config.minimum_success_threshold > len(model_config.primary_models):
warnings.append(
f"Minimum success threshold ({model_config.minimum_success_threshold}) "
f"is higher than number of primary models ({len(model_config.primary_models)})"
)
# Check for duplicate models
if len(set(model_config.primary_models)) != len(model_config.primary_models):
warnings.append("Duplicate models detected in primary models list")
# Check if consolidation model is also in primary models
if model_config.consolidation_model in model_config.primary_models:
warnings.append("Consolidation model is also used as a primary model")
return jsonify({
'valid': len(errors) == 0,
'errors': errors,
'warnings': warnings,
'modelCount': {
'primary': len(model_config.primary_models),
'consolidation': 1,
'total': len(set(model_config.primary_models + [model_config.consolidation_model]))
}
})
except Exception as e:
logger.error(f"Model config validation error: {e}")
return jsonify({
'error': 'validation_error',
'message': 'Failed to validate model configuration'
}), 500
@config_bp.route('/system', methods=['GET'])
@dev_mode_bypass
async def get_system_info():
"""
Get system configuration and status information
Returns:
System information for admin/debugging purposes
"""
try:
from ..config_runtime import server_config
from ..jobs.manager import JobManager
job_manager = JobManager.get_instance()
# Get system stats
queue_size = await job_manager.get_queue_size()
active_jobs = await job_manager.get_active_jobs_count()
return jsonify({
'system': {
'devMode': server_config.DEV_MODE,
'maxConcurrentJobs': server_config.MAX_CONCURRENT_JOBS,
'maxUploadSizeMB': server_config.MAX_UPLOAD_SIZE_MB,
'fileRetentionHours': server_config.FILE_RETENTION_HOURS,
'allowedExtensions': list(server_config.ALLOWED_EXTENSIONS)
},
'queue': {
'pending': queue_size,
'active': active_jobs,
'maxConcurrent': server_config.MAX_CONCURRENT_JOBS
}
})
except Exception as e:
logger.error(f"Failed to get system info: {e}")
return jsonify({
'error': 'system_error',
'message': 'Failed to retrieve system information'
}), 500

View file

@ -0,0 +1,176 @@
"""
Dropdown data API category / media type hierarchy.
Data is loaded from dropdowns.json (seeded from Excel, updatable by admin).
"""
import json
import logging
import os
from quart import Blueprint, jsonify, request
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
dropdowns_bp = Blueprint('dropdowns', __name__, url_prefix='/api/dropdowns')
# Seed data embedded as fallback (from Excel Grid (1).xlsx)
SEED_CATEGORIES = [
{"name": "3D", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "A/B Testing", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Admin", "status": "Active", "mediaTypes": ["Management"]},
{"name": "Amazon page", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Animation", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "App Design", "status": "Active", "mediaTypes": ["Online advertising - .com"]},
{"name": "Artworking (Print)", "status": "Active", "mediaTypes": ["Literature", "Catalogue", "Press - Magazine", "Press - Newspaper", "POS - Print", "POS - Digital", "OOH - Print", "Direct mail - Email", "Direct mail - Print"]},
{"name": "Audio", "status": "Active", "mediaTypes": ["Broadcast - Radio"]},
{"name": "Augmented Reality", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Branday Adaptation", "status": "Active", "mediaTypes": ["Online advertising - Rich media"]},
{"name": "Branding", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "CMS", "status": "Active", "mediaTypes": ["Online advertising - .com"]},
{"name": "Campaign Print Complex", "status": "Active", "mediaTypes": ["Press - Newspaper"]},
{"name": "Campaign Print Simple", "status": "Active", "mediaTypes": ["Press - Magazine"]},
{"name": "Cinema", "status": "Active", "mediaTypes": ["Broadcast - TV", "Broadcast - Cinema", "Broadcast - Radio"]},
{"name": "Cinema Adaptation", "status": "Active", "mediaTypes": ["Broadcast - Cinema"]},
{"name": "Community Management", "status": "Active", "mediaTypes": ["Community management"]},
{"name": "Concept (Video)", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Copywriting", "status": "Active", "mediaTypes": ["Literature", "Transcreation", "Copywriting"]},
{"name": "Copywriting Newsletter", "status": "Active", "mediaTypes": ["Direct mail - Email", "Direct mail - Print"]},
{"name": "Copywriting Social", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Creative Development", "status": "Active", "mediaTypes": ["Literature", "Creative development"]},
{"name": "Creative Development Big Campaign", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Creative Development Small Campaign", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Creative Direction", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Creative Packaging Box", "status": "Active", "mediaTypes": ["Packaging - Print"]},
{"name": "DM", "status": "Active", "mediaTypes": ["Direct mail - Print"]},
{"name": "Digital Display (.com)", "status": "Active", "mediaTypes": ["Online advertising - Banner", "Online advertising - Static Image"]},
{"name": "Digital Display (Animation)", "status": "Active", "mediaTypes": ["POS - Digital", "Online advertising - Banner", "Online advertising - Rich media", "Online advertising - Push notifications", "Online advertising - .com"]},
{"name": "Digital Display (POS)", "status": "Active", "mediaTypes": ["Online advertising - Banner", "Online advertising - Static Image"]},
{"name": "Digital Display (Push Notification)", "status": "Active", "mediaTypes": ["Online advertising - Banner", "Online advertising - Static Image"]},
{"name": "Digital Display (Rich Media)", "status": "Active", "mediaTypes": ["Online advertising - Static Image"]},
{"name": "Digital Display (Static)", "status": "Active", "mediaTypes": ["Online advertising - Static Image"]},
{"name": "Display Static Adaptation Standard formats", "status": "Active", "mediaTypes": ["Online advertising - Static Image"]},
{"name": "Display Static Master Standard formats", "status": "Active", "mediaTypes": ["Online advertising - Static Image"]},
{"name": "E-commerce site", "status": "Active", "mediaTypes": ["Online advertising - .com"]},
{"name": "Email", "status": "Active", "mediaTypes": ["Direct mail - Email"]},
{"name": "Event", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Event Management", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Illustration", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Image Adaptation Social", "status": "Active", "mediaTypes": ["Social - Static Image"]},
{"name": "Image Animation", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Infographics", "status": "Active", "mediaTypes": ["Literature", "Online advertising - Banner", "Online advertising - Rich media", "Online advertising - Landing page", "Online advertising - Push notifications"]},
{"name": "Internal Comms", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Key Visual", "status": "Active", "mediaTypes": ["Literature", "Social - Static Image"]},
{"name": "Key Visual Adaptation", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Key Visual Design", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Logo creation", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Management", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Mechandise", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Non-Project Time", "status": "Active", "mediaTypes": ["Management"]},
{"name": "OOH (Digital)", "status": "Active", "mediaTypes": ["OOH - Digital"]},
{"name": "OOH (Print)", "status": "Active", "mediaTypes": ["OOH - Print"]},
{"name": "OOH Complex (Digital)", "status": "Active", "mediaTypes": ["OOH - Digital"]},
{"name": "OOH Complex (Print)", "status": "Active", "mediaTypes": ["OOH - Print"]},
{"name": "OOH Simple (Digital)", "status": "Active", "mediaTypes": ["OOH - Digital"]},
{"name": "OOH Simple (Print)", "status": "Active", "mediaTypes": ["OOH - Print"]},
{"name": "POS", "status": "Active", "mediaTypes": ["POS - Print", "POS - Digital"]},
{"name": "POS Complex", "status": "Active", "mediaTypes": ["POS - Print"]},
{"name": "POS Merchandising Complex (up to 10)", "status": "Active", "mediaTypes": ["Packaging - Print"]},
{"name": "POS Merchandising Simple (up to 5)", "status": "Active", "mediaTypes": ["Packaging - Print"]},
{"name": "POS Simple", "status": "Active", "mediaTypes": ["POS - Print"]},
{"name": "Packaging", "status": "Active", "mediaTypes": ["Packaging - Print"]},
{"name": "Packaging Box", "status": "Active", "mediaTypes": ["Packaging - Print"]},
{"name": "Paid Media", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Photography Shooting (10-20)", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Photography Shooting (20-40)", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Photography Shooting (up to 10)", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Photography Shooting Still Life", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Photoshoot", "status": "Active", "mediaTypes": ["Literature", "Photography"]},
{"name": "Presentations", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Presentations Template", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Print Design", "status": "Active", "mediaTypes": ["Literature", "Catalogue", "Press - Magazine", "Press - Newspaper", "POS - Print", "OOH - Print", "Direct mail - Print"]},
{"name": "Production", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Production (Post)", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Production (Pre)", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Programmatic", "status": "Active", "mediaTypes": ["Online advertising - Rich media"]},
{"name": "Project Management", "status": "Active", "mediaTypes": ["Management"]},
{"name": "Retouching", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Retouching Complex", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Retouching Simple", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "SEM", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "SEO", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Scoping", "status": "Active", "mediaTypes": ["Management"]},
{"name": "Seedtag Banner Adaptation", "status": "Active", "mediaTypes": ["Online advertising - Rich media"]},
{"name": "Sell Sheet", "status": "Active", "mediaTypes": ["Literature", "Catalogue", "Direct mail - Print"]},
{"name": "Signage", "status": "Active", "mediaTypes": ["POS - Print"]},
{"name": "Single Website Page Design", "status": "Active", "mediaTypes": ["Online advertising - Landing page"]},
{"name": "Skin Adaptation", "status": "Active", "mediaTypes": ["Online advertising - Rich media"]},
{"name": "Social (Animation)", "status": "Active", "mediaTypes": ["Social - Gif"]},
{"name": "Social (Static)", "status": "Active", "mediaTypes": ["Social - Static Image"]},
{"name": "Social (Video)", "status": "Active", "mediaTypes": ["Social - Video"]},
{"name": "Social Carousel (up to 5 images)", "status": "Active", "mediaTypes": ["Social - Static Image"]},
{"name": "Social Reporting", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Social Twitter Thread", "status": "Active", "mediaTypes": ["Social - Static Image"]},
{"name": "Sound", "status": "Active", "mediaTypes": ["Broadcast - Radio"]},
{"name": "Sound Editing", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Storyboarding", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Strategy", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Subtitling", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "TVC", "status": "Active", "mediaTypes": ["Broadcast - TV"]},
{"name": "Transcreation", "status": "Active", "mediaTypes": ["Transcreation"]},
{"name": "Typography", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Video (Edit)", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video (Shoot)", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 10s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 15s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 20s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 30s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 5s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 60s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing 15s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing 1m", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing 20s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing 45s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing Event", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing Stock Images", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Recording", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Virtual Reality", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Voice Over", "status": "Active", "mediaTypes": ["Broadcast - Radio"]},
{"name": "Web", "status": "Active", "mediaTypes": ["Online advertising - Landing page"]},
{"name": "Web Analytics", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Web UI & UX", "status": "Active", "mediaTypes": ["Online advertising - .com"]},
{"name": "Website Design", "status": "Active", "mediaTypes": ["Online advertising - .com"]},
]
def _load_dropdowns() -> list:
path = server_config.DROPDOWNS_FILE
if os.path.exists(path):
try:
with open(path, 'r') as f:
return json.load(f)
except Exception:
pass
return SEED_CATEGORIES
def save_dropdowns(categories: list):
path = server_config.DROPDOWNS_FILE
with open(path, 'w') as f:
json.dump(categories, f, indent=2)
@dropdowns_bp.route('/categories', methods=['GET'])
async def get_categories():
categories = _load_dropdowns()
active_only = request.args.get('active', 'true').lower() == 'true'
if active_only:
categories = [c for c in categories if c.get('status') == 'Active']
return jsonify({'categories': categories})
@dropdowns_bp.route('/all', methods=['GET'])
async def get_all():
"""Full dropdown data including archived, for admin preview."""
return jsonify({'categories': _load_dropdowns()})

View file

@ -0,0 +1,62 @@
"""
CSV export Activation Calendar format.
Mirrors the export logic from script.js in ac-helper.
"""
import csv
import io
import logging
from quart import Blueprint, make_response
from ..auth.middleware import auth_required, get_user_id
from ..sheets.manager import load_sheet_data
logger = logging.getLogger(__name__)
export_bp = Blueprint('export', __name__, url_prefix='/api/sheets')
# Activation Calendar column order
AC_HEADERS = [
'Number', 'Title', 'Status', 'Category', 'Media', 'Sub media',
'Destination', 'Format', 'Supply date', 'Live date', 'End date',
'Special instructions', 'Language', 'Country', 'Quantity',
]
@export_bp.route('/<sheet_id>/export', methods=['GET'])
@auth_required
async def export_csv(sheet_id: str):
user_id = get_user_id()
data = load_sheet_data(user_id, sheet_id)
if data is None:
return {'error': 'not_found'}, 404
output = io.StringIO()
writer = csv.DictWriter(output, fieldnames=AC_HEADERS, extrasaction='ignore')
writer.writeheader()
for row in data:
writer.writerow({
'Number': '', # cleared on export
'Title': row.get('Title', ''),
'Status': row.get('Status', ''),
'Category': row.get('Category', ''),
'Media': row.get('Media', ''),
'Sub media': row.get('Sub-media', ''),
'Destination': '',
'Format': row.get('Format', ''),
'Supply date': row.get('Supply date', ''),
'Live date': row.get('Live date', ''),
'End date': '',
'Special instructions': '',
'Language': row.get('Language', ''),
'Country': row.get('Country', ''),
'Quantity': '1.00',
})
csv_content = output.getvalue()
response = await make_response(csv_content)
response.headers['Content-Type'] = 'text/csv'
response.headers['Content-Disposition'] = f'attachment; filename="activation_calendar_{sheet_id}.csv"'
return response

615
backend/server/api/jobs.py Executable file
View file

@ -0,0 +1,615 @@
"""
Jobs API endpoints for file upload and processing management
"""
import logging
import os
import zipfile
from datetime import datetime
from io import BytesIO
from quart import Blueprint, request, jsonify, send_file, g
import csv
from ..auth.middleware import dev_mode_bypass, auth_required, get_user_id
from ..jobs.models import Job, ModelConfiguration
from ..jobs.manager import JobManager
from ..ws.manager import WebSocketManager
logger = logging.getLogger(__name__)
jobs_bp = Blueprint('jobs', __name__, url_prefix='/api/jobs')
@jobs_bp.route('', methods=['POST'])
@dev_mode_bypass
async def create_jobs():
"""
Create new processing jobs from uploaded files
Accepts multipart/form-data with:
- files: One or more files to process
- modelConfig (optional): JSON string with model configuration
Returns:
List of created job objects
"""
try:
job_manager = JobManager.get_instance()
ws_manager = WebSocketManager()
user_id = get_user_id()
# Get uploaded files
files = await request.files
if not files:
return jsonify({
'error': 'no_files',
'message': 'No files provided for upload'
}), 400
logger.info(f"Received {len(files)} files for upload")
# Get model configuration from form data
form_data = await request.form
model_config_json = form_data.get('modelConfig')
model_config = None
if model_config_json:
try:
import json
model_config_data = json.loads(model_config_json)
model_config = ModelConfiguration.from_dict(model_config_data)
except Exception as e:
return jsonify({
'error': 'invalid_model_config',
'message': f'Invalid model configuration: {e}'
}), 400
created_jobs = []
errors = []
# Process each uploaded file
for field_name, file_storage in files.items():
try:
if not file_storage or not file_storage.filename:
logger.warning(f"Skipping empty file field: {field_name}")
continue
logger.info(f"Processing file: {file_storage.filename}")
# Read file data
file_data = file_storage.read()
file_size = len(file_data)
# Create job
job = await job_manager.create_job(
file_name=file_storage.filename,
file_size=file_size,
file_data=file_data,
user_id=user_id,
model_config=model_config
)
created_jobs.append(job)
logger.info(f"Created and queued job {job.id} for {file_storage.filename}")
# Broadcast job creation
await ws_manager.broadcast_to_user(user_id, {
'type': 'job.created',
'job': job.to_dict()
})
# Broadcast job accepted (when it enters the queue)
await ws_manager.broadcast_to_user(user_id, {
'type': 'job.accepted',
'jobId': job.id
})
logger.info(f"Created job {job.id} for file {file_storage.filename} (user: {user_id})")
except Exception as e:
error_msg = f"Failed to process file {file_storage.filename}: {str(e)}"
errors.append(error_msg)
logger.error(error_msg)
if not created_jobs and errors:
return jsonify({
'error': 'upload_failed',
'message': 'Failed to process any files',
'details': errors
}), 400
return jsonify({
'jobs': [job.to_dict() for job in created_jobs],
'errors': errors
})
except Exception as e:
logger.error(f"Job creation failed: {e}", exc_info=True)
return jsonify({
'error': 'server_error',
'message': 'Failed to create jobs'
}), 500
@jobs_bp.route('', methods=['GET'])
@dev_mode_bypass
async def list_jobs():
"""
List jobs for the current user
Query parameters:
- limit: Maximum number of jobs to return (default: 50, max: 100)
- offset: Number of jobs to skip (default: 0)
- status: Filter by job status (optional)
Returns:
Paginated list of jobs
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
# Parse query parameters
limit = min(int(request.args.get('limit', 50)), 100)
offset = int(request.args.get('offset', 0))
status_filter = request.args.get('status')
# Get user jobs
jobs = await job_manager.get_user_jobs(user_id, limit, offset)
# Apply status filter if provided
if status_filter:
jobs = [job for job in jobs if job.phase.value.lower() == status_filter.lower()]
return jsonify({
'jobs': [job.to_dict() for job in jobs],
'pagination': {
'limit': limit,
'offset': offset,
'count': len(jobs)
}
})
except Exception as e:
logger.error(f"Failed to list jobs: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to retrieve jobs'
}), 500
@jobs_bp.route('/<job_id>', methods=['GET'])
@dev_mode_bypass
async def get_job(job_id: str):
"""
Get details for a specific job
Returns:
Job details including progress, logs, and results
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
job = await job_manager.get_job(job_id)
if not job:
return jsonify({
'error': 'not_found',
'message': 'Job not found'
}), 404
# Check if user owns this job (skip check in dev mode)
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
return jsonify({
'error': 'forbidden',
'message': 'Access denied'
}), 403
return jsonify({
'job': job.to_dict()
})
except Exception as e:
logger.error(f"Failed to get job {job_id}: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to retrieve job'
}), 500
@jobs_bp.route('/<job_id>/download', methods=['GET'])
@dev_mode_bypass
async def download_job_result(job_id: str):
"""
Download the CSV result file for a completed job
Returns:
CSV file as download attachment
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
job = await job_manager.get_job(job_id)
if not job:
return jsonify({
'error': 'not_found',
'message': 'Job not found'
}), 404
# Check if user owns this job (skip check in dev mode)
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
return jsonify({
'error': 'forbidden',
'message': 'Access denied'
}), 403
# Check if job is completed and has output
if not job.output_path or not os.path.exists(job.output_path):
return jsonify({
'error': 'not_ready',
'message': 'Job result not available'
}), 400
# Generate download filename
base_name = os.path.splitext(job.file_name)[0]
download_filename = f"{base_name}-results.csv"
return await send_file(
job.output_path,
as_attachment=True,
attachment_filename=download_filename,
mimetype='text/csv'
)
except Exception as e:
logger.error(f"Download failed for job {job_id}: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to download result'
}), 500
@jobs_bp.route('/<job_id>/logs', methods=['GET'])
@dev_mode_bypass
async def get_job_logs(job_id: str):
"""
Get logs for a specific job
Query parameters:
- limit: Maximum number of log entries (default: 100)
- level: Filter by log level (optional)
Returns:
List of log entries
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
job = await job_manager.get_job(job_id)
if not job:
return jsonify({
'error': 'not_found',
'message': 'Job not found'
}), 404
# Check if user owns this job (skip check in dev mode)
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
return jsonify({
'error': 'forbidden',
'message': 'Access denied'
}), 403
# Parse query parameters
limit = min(int(request.args.get('limit', 100)), 1000)
level_filter = request.args.get('level')
# Get logs
logs = job.logs
# Apply level filter if provided
if level_filter:
logs = [log for log in logs if log.level.lower() == level_filter.lower()]
# Apply limit
logs = logs[-limit:] if len(logs) > limit else logs
return jsonify({
'logs': [log.to_dict() for log in logs],
'count': len(logs)
})
except Exception as e:
logger.error(f"Failed to get logs for job {job_id}: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to retrieve job logs'
}), 500
@jobs_bp.route('/<job_id>', methods=['DELETE'])
@dev_mode_bypass
async def delete_job(job_id: str):
"""
Delete a job and clean up its files
Returns:
Success confirmation
"""
try:
job_manager = JobManager.get_instance()
ws_manager = WebSocketManager()
user_id = get_user_id()
job = await job_manager.get_job(job_id)
if not job:
return jsonify({
'error': 'not_found',
'message': 'Job not found'
}), 404
# Check if user owns this job (skip check in dev mode)
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
return jsonify({
'error': 'forbidden',
'message': 'Access denied'
}), 403
# Delete job
success = await job_manager.delete_job(job_id)
if success:
# Broadcast deletion
await ws_manager.broadcast_to_user(user_id, {
'type': 'job.deleted',
'jobId': job_id
})
logger.info(f"Deleted job {job_id} (user: {user_id})")
return jsonify({
'message': 'Job deleted successfully'
})
else:
return jsonify({
'error': 'deletion_failed',
'message': 'Failed to delete job'
}), 500
except Exception as e:
logger.error(f"Failed to delete job {job_id}: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to delete job'
}), 500
@jobs_bp.route('/batch-download', methods=['POST'])
@dev_mode_bypass
async def batch_download():
"""
Download multiple job results as a ZIP file
Expects:
{
"jobIds": ["job1", "job2", "job3"]
}
Returns:
ZIP file containing CSV results
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
data = await request.get_json()
job_ids = data.get('jobIds', [])
if not job_ids:
return jsonify({
'error': 'invalid_request',
'message': 'No job IDs provided'
}), 400
# Create ZIP file in memory
zip_buffer = BytesIO()
csv_files = []
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for job_id in job_ids:
job = await job_manager.get_job(job_id)
if not job:
logger.warning(f"Job {job_id} not found for batch download")
continue
# Check if user owns this job (skip check in dev mode)
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
logger.warning(f"User {user_id} denied access to job {job_id}")
continue
# Check if job has output
if not job.output_path or not os.path.exists(job.output_path):
logger.warning(f"Job {job_id} has no output file")
continue
# Add CSV to ZIP asynchronously
base_name = os.path.splitext(job.file_name)[0]
csv_filename = f"{base_name}-{job_id[:8]}.csv"
# Read file in thread pool to avoid blocking
def _read_csv():
with open(job.output_path, 'rb') as csv_file:
return csv_file.read()
import asyncio
loop = asyncio.get_running_loop()
csv_content = await loop.run_in_executor(None, _read_csv)
zip_file.writestr(csv_filename, csv_content)
csv_files.append(csv_filename)
if not csv_files:
return jsonify({
'error': 'no_results',
'message': 'No completed jobs found for download'
}), 400
zip_buffer.seek(0)
# Generate download filename
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
zip_filename = f"batch_results_{timestamp}.zip"
return await send_file(
zip_buffer,
as_attachment=True,
attachment_filename=zip_filename,
mimetype='application/zip'
)
except Exception as e:
logger.error(f"Batch download failed: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to create batch download'
}), 500
@jobs_bp.route('/cleanup', methods=['POST'])
@dev_mode_bypass
async def cleanup_expired():
"""
Manually trigger cleanup of expired jobs and files
(Admin/maintenance endpoint)
Returns:
Number of items cleaned up
"""
try:
job_manager = JobManager.get_instance()
# Perform cleanup
cleaned_count = await job_manager.cleanup_expired_jobs()
logger.info(f"Manual cleanup completed: {cleaned_count} items removed")
return jsonify({
'message': 'Cleanup completed',
'itemsRemoved': cleaned_count
})
except Exception as e:
logger.error(f"Cleanup failed: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to perform cleanup'
}), 500
@jobs_bp.route('/stats', methods=['GET'])
@dev_mode_bypass
async def get_job_stats():
"""
Get job processing statistics for the current user
Returns:
Statistics about job processing
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
# Get all user jobs
all_jobs = await job_manager.get_user_jobs(user_id, limit=1000)
# Calculate statistics
total_jobs = len(all_jobs)
completed_jobs = len([j for j in all_jobs if j.phase.value == 'COMPLETED'])
failed_jobs = len([j for j in all_jobs if j.phase.value == 'FAILED'])
active_jobs = len([j for j in all_jobs if j.phase.value not in ['COMPLETED', 'FAILED']])
total_assets = sum(j.summary.assets_extracted for j in all_jobs if j.summary)
total_cost = sum(j.summary.cost_usd_total for j in all_jobs if j.summary)
return jsonify({
'stats': {
'totalJobs': total_jobs,
'completedJobs': completed_jobs,
'failedJobs': failed_jobs,
'activeJobs': active_jobs,
'successRate': completed_jobs / total_jobs if total_jobs > 0 else 0,
'totalAssetsExtracted': total_assets,
'totalCostUsd': round(total_cost, 4)
}
})
except Exception as e:
logger.error(f"Failed to get job stats: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to retrieve statistics'
}), 500
@jobs_bp.route('/<job_id>/deliverables', methods=['GET'])
@auth_required
async def get_job_deliverables(job_id: str):
"""
Return extracted deliverables from a completed job as JSON rows
ready for the Review Import flow.
Reads the output CSV and maps columns to AC Deliverable schema.
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
job = await job_manager.get_job(job_id)
if not job:
return jsonify({'error': 'not_found'}), 404
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
return jsonify({'error': 'forbidden'}), 403
if not job.output_path or not os.path.exists(job.output_path):
return jsonify({'error': 'not_ready', 'message': 'Job not completed yet'}), 400
deliverables = []
with open(job.output_path, newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
# Map brief-extractor CSV columns → AC Deliverable schema
market = row.get('language_country_market', '')
lang, country = ('', '')
if '-' in market:
parts = market.split('-', 1)
lang = parts[0].strip().upper()
country = parts[1].strip().upper()
deliverables.append({
'Number': '',
'Title': row.get('title', ''),
'Status': row.get('status', 'Booked') or 'Booked',
'Category': row.get('category', ''),
'Media': row.get('media', ''),
'Sub-media': row.get('asset_type', ''),
'Format': row.get('technical_specifications', ''),
'Supply date': row.get('review_date', ''),
'Live date': row.get('live_date', ''),
'Language': lang,
'Country': country,
'Quantity': int(row.get('quantity', 1) or 1),
# Extra brief fields kept for review UI
'_brief_title': row.get('title', ''),
'_brand_identifier': row.get('brand_identifier', ''),
'_priority': row.get('priority_level', ''),
})
return jsonify({'deliverables': deliverables, 'count': len(deliverables)})
except Exception as e:
logger.error(f"Failed to get deliverables for job {job_id}: {e}")
return jsonify({'error': 'server_error'}), 500

View file

@ -0,0 +1,119 @@
"""
Sheet CRUD API port of ac-helper api.php sheet management.
All routes scoped to the authenticated user.
"""
import logging
from quart import Blueprint, jsonify, request
from ..auth.middleware import auth_required, get_user_id
from ..sheets.manager import (
get_user_sheets, create_sheet, load_sheet_data,
update_sheet, delete_sheet, rename_sheet, duplicate_sheet,
generate_next_id,
)
logger = logging.getLogger(__name__)
sheets_bp = Blueprint('sheets', __name__, url_prefix='/api/sheets')
@sheets_bp.route('', methods=['GET'])
@auth_required
async def list_sheets():
user_id = get_user_id()
sheets = get_user_sheets(user_id)
return jsonify({'sheets': sheets})
@sheets_bp.route('', methods=['POST'])
@auth_required
async def create_new_sheet():
user_id = get_user_id()
body = await request.get_json() or {}
name = body.get('name', '')
data = body.get('data', [])
sheet = create_sheet(user_id, name, data)
return jsonify({'sheet': sheet}), 201
@sheets_bp.route('/<sheet_id>', methods=['GET'])
@auth_required
async def get_sheet(sheet_id: str):
user_id = get_user_id()
data = load_sheet_data(user_id, sheet_id)
if data is None:
return jsonify({'error': 'not_found'}), 404
return jsonify({'data': data})
@sheets_bp.route('/<sheet_id>', methods=['PUT'])
@auth_required
async def update_sheet_data(sheet_id: str):
user_id = get_user_id()
body = await request.get_json() or {}
data = body.get('data', [])
update_sheet(user_id, sheet_id, data)
return jsonify({'success': True})
@sheets_bp.route('/<sheet_id>', methods=['DELETE'])
@auth_required
async def delete_sheet_route(sheet_id: str):
user_id = get_user_id()
delete_sheet(user_id, sheet_id)
return jsonify({'success': True})
@sheets_bp.route('/<sheet_id>', methods=['PATCH'])
@auth_required
async def rename_sheet_route(sheet_id: str):
user_id = get_user_id()
body = await request.get_json() or {}
name = body.get('name', '')
success = rename_sheet(user_id, sheet_id, name)
if not success:
return jsonify({'error': 'not_found'}), 404
return jsonify({'success': True})
@sheets_bp.route('/<sheet_id>/duplicate', methods=['POST'])
@auth_required
async def duplicate_sheet_route(sheet_id: str):
user_id = get_user_id()
sheet = duplicate_sheet(user_id, sheet_id)
if sheet is None:
return jsonify({'error': 'not_found'}), 404
return jsonify({'sheet': sheet}), 201
@sheets_bp.route('/<sheet_id>/import', methods=['POST'])
@auth_required
async def import_deliverables(sheet_id: str):
"""
Import a list of deliverables into an existing sheet.
Body: { "deliverables": [...], "mode": "append" | "replace" }
"""
user_id = get_user_id()
body = await request.get_json() or {}
incoming = body.get('deliverables', [])
mode = body.get('mode', 'append')
existing = load_sheet_data(user_id, sheet_id)
if existing is None:
return jsonify({'error': 'not_found'}), 404
base = [] if mode == 'replace' else list(existing)
for row in incoming:
row['Number'] = generate_next_id(base)
row.setdefault('Status', 'Booked')
row.setdefault('Quantity', 1)
# Strip internal brief metadata fields
for k in list(row.keys()):
if k.startswith('_'):
del row[k]
base.append(row)
update_sheet(user_id, sheet_id, base)
return jsonify({'success': True, 'imported': len(incoming), 'total': len(base)})

213
backend/server/app.py Normal file
View file

@ -0,0 +1,213 @@
"""
Main Quart application AC Tool (AC Helper + Brief Extractor unified)
"""
import asyncio
import json
import logging
import os
import signal
from datetime import datetime
from typing import List
from quart import Quart, websocket, jsonify
from quart_cors import cors
import structlog
from .config_runtime import server_config
from .auth import msal_auth
from .jobs import JobManager
from .ws import ws_manager
from .runners.job_runner import start_background_workers, stop_background_workers
# API blueprints
from .api.auth import auth_bp
from .api.jobs import jobs_bp
from .api.config import config_bp
from .api.sheets import sheets_bp
from .api.export import export_bp
from .api.ai_command import ai_bp
from .api.dropdowns import dropdowns_bp
from .api.admin import admin_bp
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="ISO"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.JSONRenderer(),
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
logger = structlog.get_logger(__name__)
background_workers: List[asyncio.Task] = []
def create_app() -> Quart:
app = Quart(__name__)
cors_cfg = server_config.get_cors_config()
cors(app, **cors_cfg)
app.config.update({
'MAX_CONTENT_LENGTH': server_config.MAX_CONTENT_LENGTH,
'SECRET_KEY': server_config.SESSION_SECRET,
})
server_config.ensure_directories()
# Seed dropdowns.json from embedded data if not present
_seed_dropdowns_if_needed()
job_manager = JobManager.get_instance()
# Register blueprints
for bp in [auth_bp, jobs_bp, config_bp, sheets_bp, export_bp, ai_bp, dropdowns_bp, admin_bp]:
app.register_blueprint(bp)
# Serve React SPA static files (built by Vite into /app/frontend/dist)
_register_spa(app)
@app.before_serving
async def startup():
logger.info("Starting AC Tool server...")
await ws_manager.start_background_tasks()
global background_workers
background_workers = await start_background_workers(
job_manager, ws_manager, num_workers=server_config.MAX_CONCURRENT_JOBS
)
background_workers.append(asyncio.create_task(periodic_cleanup(job_manager)))
logger.info("Server started", dev_mode=server_config.DEV_MODE)
@app.after_serving
async def shutdown():
logger.info("Shutting down AC Tool server...")
global background_workers
if background_workers:
await stop_background_workers(background_workers)
await ws_manager.stop_background_tasks()
@app.route('/health')
async def health():
queue_size = await job_manager.get_queue_size()
active_jobs = await job_manager.get_active_jobs_count()
ws_stats = await ws_manager.get_connection_stats()
return jsonify({
'status': 'healthy',
'timestamp': datetime.utcnow().isoformat(),
'queue': {'pending': queue_size, 'active': active_jobs},
'websockets': ws_stats,
'devMode': server_config.DEV_MODE,
})
@app.websocket('/ws')
async def websocket_handler():
client = None
try:
if server_config.DEV_MODE:
user_id = server_config.DEV_USER_ID
else:
user_id = None
token = websocket.args.get('token') or (websocket.headers.get('Authorization', '')[7:])
if token:
from .auth.msal_auth import msal_auth as _msal
info = await _msal.validate_token(token)
if info:
user_id = info['oid']
if not user_id:
await websocket.send(json.dumps({'error': 'unauthorized'}))
return
client = await ws_manager.register_client(user_id)
jobs_data = job_manager.serialize_all()
await ws_manager.send_queue_snapshot(client, jobs_data)
while True:
try:
msg = await websocket.receive()
if msg:
data = json.loads(msg)
if data.get('type') == 'ping':
await client.send({'type': 'pong'})
except Exception:
break
except Exception as e:
logger.error(f"WebSocket error: {e}")
finally:
if client:
await ws_manager.unregister_client(client.client_id)
# Error handlers
@app.errorhandler(401)
async def unauthorized(e):
return jsonify({'error': 'unauthorized'}), 401
@app.errorhandler(403)
async def forbidden(e):
return jsonify({'error': 'forbidden'}), 403
@app.errorhandler(404)
async def not_found(e):
return jsonify({'error': 'not_found'}), 404
@app.errorhandler(413)
async def too_large(e):
return jsonify({'error': 'file_too_large', 'message': f'Max {server_config.MAX_UPLOAD_SIZE_MB}MB'}), 413
@app.errorhandler(500)
async def internal(e):
return jsonify({'error': 'internal_error'}), 500
return app
def _register_spa(app: Quart):
"""Serve the Vite-built React frontend for all non-API routes."""
import os
from quart import send_from_directory, send_file
dist = os.environ.get('FRONTEND_DIST', os.path.join(os.path.dirname(os.path.dirname(__file__)), '..', 'frontend', 'dist'))
dist = os.path.abspath(dist)
if not os.path.isdir(dist):
logger.warning(f"Frontend dist not found at {dist} — API-only mode")
return
@app.route('/', defaults={'path': ''})
@app.route('/<path:path>')
async def serve_spa(path):
full = os.path.join(dist, path)
if path and os.path.isfile(full):
return await send_from_directory(dist, path)
return await send_from_directory(dist, 'index.html')
def _seed_dropdowns_if_needed():
"""Write initial dropdowns.json from embedded seed data if file doesn't exist."""
path = server_config.DROPDOWNS_FILE
if os.path.exists(path):
return
from .api.dropdowns import SEED_CATEGORIES, save_dropdowns
save_dropdowns(SEED_CATEGORIES)
logger.info(f"Seeded {len(SEED_CATEGORIES)} categories to {path}")
async def periodic_cleanup(job_manager: JobManager):
while True:
try:
await asyncio.sleep(3600)
cleaned = await job_manager.cleanup_expired_jobs()
if cleaned:
logger.info(f"Periodic cleanup: {cleaned} items removed")
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Cleanup error: {e}")

View file

View file

@ -0,0 +1,123 @@
"""
Authentication middleware decorators for Quart routes.
Includes @auth_required, @admin_required, @dev_mode_bypass.
"""
import logging
from functools import wraps
from typing import Optional, Dict, Any, Callable
from quart import request, jsonify, g
from .msal_auth import msal_auth
from .user_store import upsert_user, get_user
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
async def _extract_token_user() -> Optional[Dict[str, Any]]:
"""Extract and validate Bearer token from Authorization header or ?_token= query param."""
auth_header = request.headers.get('Authorization', '')
if auth_header.startswith('Bearer '):
token = auth_header[7:]
else:
# Fallback for browser download links (window.open) which can't set headers
token = request.args.get('_token', '')
if not token:
return None
return await msal_auth.validate_token(token)
async def _resolve_user(token_user: Dict) -> Dict:
"""
Merge token claims with our users.json store.
Creates the user record on first login; enriches token info with role.
"""
user_id = token_user['oid']
email = token_user.get('preferred_username', '')
name = token_user.get('name', '')
stored = upsert_user(user_id, email, name)
return {**token_user, 'role': stored.get('role', 'user'), 'active': stored.get('active', True)}
def auth_required(f: Callable) -> Callable:
"""Require a valid Bearer token. Sets g.current_user."""
@wraps(f)
async def wrapper(*args, **kwargs):
if server_config.DEV_MODE:
role = server_config.DEV_USER_ROLE
g.current_user = {
'oid': server_config.DEV_USER_ID,
'preferred_username': server_config.DEV_USER_EMAIL,
'name': server_config.DEV_USER_NAME,
'role': role,
'active': True,
}
# Ensure dev user exists in store
upsert_user(
server_config.DEV_USER_ID,
server_config.DEV_USER_EMAIL,
server_config.DEV_USER_NAME,
role=role,
)
else:
token_user = await _extract_token_user()
if not token_user:
return jsonify({'error': 'unauthorized', 'message': 'Authentication required'}), 401
user = await _resolve_user(token_user)
if not user.get('active', True):
return jsonify({'error': 'forbidden', 'message': 'Account deactivated'}), 403
g.current_user = user
return await f(*args, **kwargs)
return wrapper
# Keep old name for compatibility with brief-extractor blueprints
dev_mode_bypass = auth_required
def admin_required(f: Callable) -> Callable:
"""Require admin role. Must be used after @auth_required."""
@wraps(f)
async def wrapper(*args, **kwargs):
if server_config.DEV_MODE:
role = server_config.DEV_USER_ROLE
g.current_user = {
'oid': server_config.DEV_USER_ID,
'preferred_username': server_config.DEV_USER_EMAIL,
'name': server_config.DEV_USER_NAME,
'role': role,
'active': True,
}
upsert_user(
server_config.DEV_USER_ID,
server_config.DEV_USER_EMAIL,
server_config.DEV_USER_NAME,
role=role,
)
else:
token_user = await _extract_token_user()
if not token_user:
return jsonify({'error': 'unauthorized', 'message': 'Authentication required'}), 401
user = await _resolve_user(token_user)
if not user.get('active', True):
return jsonify({'error': 'forbidden', 'message': 'Account deactivated'}), 403
g.current_user = user
if g.current_user.get('role') != 'admin':
return jsonify({'error': 'forbidden', 'message': 'Admin access required'}), 403
return await f(*args, **kwargs)
return wrapper
def get_user_id() -> str:
user = getattr(g, 'current_user', None)
return user.get('oid', 'anonymous') if user else 'anonymous'
async def get_current_user() -> Optional[Dict[str, Any]]:
return getattr(g, 'current_user', None)

View file

@ -0,0 +1,91 @@
"""
MSAL / Azure AD token validator (SPA PKCE flow).
Backend only validates incoming Bearer JWTs no server-side MSAL client needed.
"""
import logging
import time
from typing import Optional, Dict, Any
import jwt
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
class MSALAuthenticator:
def __init__(self):
if server_config.DEV_MODE:
logger.info("Running in DEV_MODE — MSAL authentication bypassed")
async def validate_token(self, access_token: str) -> Optional[Dict[str, Any]]:
if server_config.DEV_MODE:
return {
'oid': server_config.DEV_USER_ID,
'preferred_username': server_config.DEV_USER_EMAIL,
'name': server_config.DEV_USER_NAME,
}
if not access_token:
return None
try:
# Decode without signature verification (PKCE SPA tokens may use
# audience = client_id; full sig verification requires fetching JWKS).
unverified = jwt.decode(
access_token,
options={"verify_signature": False, "verify_aud": False},
)
user_id = unverified.get('oid')
if not user_id:
logger.warning("Token missing 'oid' claim")
return None
exp = unverified.get('exp', 0)
if exp < time.time():
logger.warning("Token expired")
return None
return {
'oid': user_id,
'preferred_username': unverified.get('preferred_username') or unverified.get('upn', ''),
'name': unverified.get('name', ''),
}
except jwt.InvalidTokenError as e:
logger.warning(f"Invalid JWT: {e}")
return None
except Exception as e:
logger.error(f"Token validation error: {e}", exc_info=True)
return None
async def get_logout_url(self, post_logout_redirect_uri: Optional[str] = None) -> str:
if server_config.DEV_MODE:
return post_logout_redirect_uri or 'http://localhost:5173'
base = f"{server_config.MSAL_AUTHORITY}/oauth2/v2.0/logout"
if post_logout_redirect_uri:
return f"{base}?post_logout_redirect_uri={post_logout_redirect_uri}"
return base
def get_client_config(self) -> Dict[str, Any]:
if server_config.DEV_MODE:
return {
'clientId': server_config.MSAL_CLIENT_ID,
'authority': server_config.MSAL_AUTHORITY,
'redirectUri': server_config.MSAL_REDIRECT_URI,
'devMode': True,
}
return {
'clientId': server_config.MSAL_CLIENT_ID,
'authority': server_config.MSAL_AUTHORITY,
'redirectUri': server_config.MSAL_REDIRECT_URI,
'devMode': False,
}
def is_dev_mode(self) -> bool:
return server_config.DEV_MODE
msal_auth = MSALAuthenticator()

View file

@ -0,0 +1,96 @@
"""
User store manages users.json (roles, active status).
Keyed by Azure AD oid (object ID).
"""
import json
import logging
import os
from datetime import datetime, timezone
from typing import Dict, Optional
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
_LOCK_FILE = server_config.USERS_FILE + '.lock'
def _load() -> Dict:
path = server_config.USERS_FILE
if not os.path.exists(path):
return {}
try:
with open(path, 'r') as f:
return json.load(f)
except Exception:
return {}
def _save(data: Dict):
path = server_config.USERS_FILE
with open(path, 'w') as f:
json.dump(data, f, indent=2)
def get_user(user_id: str) -> Optional[Dict]:
users = _load()
return users.get(user_id)
def upsert_user(user_id: str, email: str, name: str, role: Optional[str] = None) -> Dict:
"""
Create or update user. On creation defaults to 'user' role,
unless the email matches ADMIN_EMAIL env var (gets 'admin').
"""
users = _load()
existing = users.get(user_id)
if existing is None:
# First login — determine default role
default_role = 'admin' if email and email.lower() == server_config.ADMIN_EMAIL.lower() else 'user'
user = {
'id': user_id,
'email': email,
'name': name,
'role': role or default_role,
'active': True,
'created': datetime.now(timezone.utc).isoformat(),
'last_seen': datetime.now(timezone.utc).isoformat(),
}
else:
user = {**existing}
user['email'] = email or existing.get('email', '')
user['name'] = name or existing.get('name', '')
user['last_seen'] = datetime.now(timezone.utc).isoformat()
if role is not None:
user['role'] = role
users[user_id] = user
_save(users)
return user
def list_users() -> list:
users = _load()
return sorted(users.values(), key=lambda u: u.get('last_seen', ''), reverse=True)
def set_role(user_id: str, role: str) -> Optional[Dict]:
if role not in ('user', 'admin'):
return None
users = _load()
if user_id not in users:
return None
users[user_id]['role'] = role
_save(users)
return users[user_id]
def set_active(user_id: str, active: bool) -> Optional[Dict]:
users = _load()
if user_id not in users:
return None
users[user_id]['active'] = active
_save(users)
return users[user_id]

View file

@ -0,0 +1,97 @@
"""
Runtime configuration for AC Tool server
"""
import os
from typing import List
from dotenv import load_dotenv
load_dotenv()
class ServerConfig:
# Server
HOST: str = os.getenv('SERVER_HOST', '0.0.0.0')
PORT: int = int(os.getenv('SERVER_PORT', '8000'))
WORKERS: int = int(os.getenv('SERVER_WORKERS', '2'))
DEBUG: bool = os.getenv('DEBUG', 'false').lower() == 'true'
# Development Mode
DEV_MODE: bool = os.getenv('DEV_MODE', 'true').lower() == 'true'
DEV_USER_ID: str = os.getenv('DEV_USER_ID', 'dev-user-id')
DEV_USER_EMAIL: str = os.getenv('DEV_USER_EMAIL', 'dev@localhost')
DEV_USER_NAME: str = os.getenv('DEV_USER_NAME', 'Dev User')
DEV_USER_ROLE: str = os.getenv('DEV_USER_ROLE', 'admin') # 'user' or 'admin'
# CORS
ALLOWED_ORIGINS: List[str] = [
origin.strip()
for origin in os.getenv(
'ALLOWED_ORIGINS',
'http://localhost:3000,http://localhost:5173,https://ai-sandbox.oliver.solutions'
).split(',')
]
# Azure AD / MSAL (SPA PKCE flow — no client secret needed)
MSAL_CLIENT_ID: str = os.getenv('MSAL_CLIENT_ID', '9079054c-9620-4757-a256-23413042f1ef')
MSAL_TENANT_ID: str = os.getenv('MSAL_TENANT_ID', 'e519c2e6-bc6d-4fdf-8d9c-923c2f002385')
MSAL_REDIRECT_URI: str = os.getenv('MSAL_REDIRECT_URI', 'https://ai-sandbox.oliver.solutions/ac-helper/')
MSAL_AUTHORITY: str = f'https://login.microsoftonline.com/{os.getenv("MSAL_TENANT_ID", "e519c2e6-bc6d-4fdf-8d9c-923c2f002385")}'
# Admin bootstrap
ADMIN_EMAIL: str = os.getenv('ADMIN_EMAIL', 'daveporter@oliver.agency')
# Security
SESSION_SECRET: str = os.getenv('SESSION_SECRET', 'change-me-in-production')
SECURE_COOKIES: bool = os.getenv('SECURE_COOKIES', 'false').lower() == 'true'
HTTPS_ONLY: bool = os.getenv('HTTPS_ONLY', 'false').lower() == 'true'
# File Upload
MAX_UPLOAD_SIZE_MB: int = int(os.getenv('MAX_UPLOAD_SIZE_MB', '200'))
MAX_CONTENT_LENGTH: int = MAX_UPLOAD_SIZE_MB * 1024 * 1024
ALLOWED_EXTENSIONS: set = {'.pdf', '.pptx', '.docx', '.xlsx', '.ppt', '.doc', '.xls'}
# Job Management
MAX_CONCURRENT_JOBS: int = int(os.getenv('MAX_CONCURRENT_JOBS', '2'))
FILE_RETENTION_HOURS: int = int(os.getenv('FILE_RETENTION_HOURS', '24'))
# WebSocket
WS_PING_INTERVAL_SECONDS: int = int(os.getenv('WS_PING_INTERVAL_SECONDS', '30'))
# AI
GEMINI_API_KEY: str = os.getenv('GEMINI_API_KEY', '')
GEMINI_MODEL: str = os.getenv('GEMINI_MODEL', 'gemini-2.0-flash-exp')
# Data paths — mounted as Docker volume
DATA_DIR: str = os.getenv(
'DATA_DIR',
os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
)
UPLOAD_DIR: str = os.path.join(DATA_DIR, 'uploads')
OUTPUT_DIR: str = os.path.join(DATA_DIR, 'outputs')
SHEETS_DIR: str = os.path.join(DATA_DIR, 'sheets')
USERS_FILE: str = os.path.join(DATA_DIR, 'users.json')
DROPDOWNS_FILE: str = os.path.join(DATA_DIR, 'dropdowns.json')
@classmethod
def ensure_directories(cls):
for d in [cls.DATA_DIR, cls.UPLOAD_DIR, cls.OUTPUT_DIR, cls.SHEETS_DIR]:
os.makedirs(d, exist_ok=True)
@classmethod
def validate_auth_config(cls) -> bool:
if cls.DEV_MODE:
return True
return bool(cls.MSAL_CLIENT_ID and cls.MSAL_TENANT_ID)
@classmethod
def get_cors_config(cls) -> dict:
return {
'allow_origin': cls.ALLOWED_ORIGINS,
'allow_methods': ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'],
'allow_headers': ['Content-Type', 'Authorization', 'Accept'],
'allow_credentials': True,
}
server_config = ServerConfig()

18
backend/server/jobs/__init__.py Executable file
View file

@ -0,0 +1,18 @@
"""
Job management module for Brief Extractor GUI
"""
from .models import Job, JobPhase, ProviderUpdate, JobSummary, ModelConfiguration, ModelInfo
from .manager import JobManager
from .storage import StorageManager
__all__ = [
'Job',
'JobPhase',
'ProviderUpdate',
'JobSummary',
'ModelConfiguration',
'ModelInfo',
'JobManager',
'StorageManager'
]

338
backend/server/jobs/manager.py Executable file
View file

@ -0,0 +1,338 @@
"""
Job manager for handling job queue, registry, and lifecycle
"""
import asyncio
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Optional
from threading import RLock
from .models import Job, JobPhase, ModelConfiguration, ModelInfo
from .storage import StorageManager
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
class JobManager:
"""
Manages job lifecycle, queue, and in-memory registry
Thread-safe singleton for job management
"""
_instance: Optional['JobManager'] = None
_lock = RLock()
def __new__(cls):
if cls._instance is None:
with cls._lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
def __init__(self):
if hasattr(self, '_initialized'):
return
self._initialized = True
self.jobs: Dict[str, Job] = {}
self.queue: asyncio.Queue = asyncio.Queue()
self.processing_semaphore = asyncio.Semaphore(server_config.MAX_CONCURRENT_JOBS)
self.storage = StorageManager()
self._lock = asyncio.Lock()
logger.info(f"JobManager initialized with concurrency limit: {server_config.MAX_CONCURRENT_JOBS}")
@classmethod
def get_instance(cls) -> 'JobManager':
"""Get the singleton instance"""
return cls()
async def create_job(
self,
file_name: str,
file_size: int,
file_data: bytes,
user_id: str,
model_config: Optional[ModelConfiguration] = None
) -> Job:
"""
Create a new job from uploaded file
Args:
file_name: Original filename
file_size: Size in bytes
file_data: Binary file content
user_id: User identifier
model_config: Model configuration for processing
Returns:
Created job object
"""
# Validate file
is_valid, error_msg = self.storage.validate_file(file_name, file_size)
if not is_valid:
raise ValueError(f"File validation failed: {error_msg}")
# Create job
job = Job.create(
file_name=file_name,
file_size=file_size,
user_id=user_id,
upload_path="", # Will be set after saving
model_config=model_config
)
try:
# Save uploaded file
upload_path = await self.storage.save_uploaded_file(
file_data=file_data,
filename=file_name,
job_id=job.id
)
job.upload_path = upload_path
# Add to registry
async with self._lock:
self.jobs[job.id] = job
# Add to queue
await self.queue.put(job.id)
logger.info(f"Created job {job.id} for file {file_name} (user: {user_id})")
return job
except Exception as e:
logger.error(f"Failed to create job for {file_name}: {e}")
# Cleanup on failure
if job.upload_path:
await self.storage.cleanup_job_files(job.upload_path, None)
raise
async def get_job(self, job_id: str) -> Optional[Job]:
"""Get job by ID"""
async with self._lock:
return self.jobs.get(job_id)
async def update_job(self, job_id: str, **updates) -> bool:
"""
Update job attributes
Args:
job_id: Job identifier
**updates: Attributes to update
Returns:
True if job was found and updated
"""
async with self._lock:
job = self.jobs.get(job_id)
if not job:
return False
for attr, value in updates.items():
if hasattr(job, attr):
setattr(job, attr, value)
job.updated_at = datetime.utcnow()
return True
async def get_user_jobs(
self,
user_id: str,
limit: int = 100,
offset: int = 0
) -> List[Job]:
"""
Get jobs for a specific user
Args:
user_id: User identifier
limit: Maximum number of jobs to return
offset: Number of jobs to skip
Returns:
List of user's jobs, newest first
"""
async with self._lock:
user_jobs = [
job for job in self.jobs.values()
if job.user_id == user_id
]
# Sort by creation time, newest first
user_jobs.sort(key=lambda j: j.created_at, reverse=True)
# Apply pagination
return user_jobs[offset:offset + limit]
async def get_all_jobs(self, limit: int = 100, offset: int = 0) -> List[Job]:
"""
Get all jobs (admin function)
Args:
limit: Maximum number of jobs to return
offset: Number of jobs to skip
Returns:
List of all jobs, newest first
"""
async with self._lock:
all_jobs = list(self.jobs.values())
# Sort by creation time, newest first
all_jobs.sort(key=lambda j: j.created_at, reverse=True)
# Apply pagination
return all_jobs[offset:offset + limit]
async def delete_job(self, job_id: str) -> bool:
"""
Delete a job and clean up its files
Args:
job_id: Job identifier
Returns:
True if job was found and deleted
"""
async with self._lock:
job = self.jobs.get(job_id)
if not job:
return False
# Clean up files
await self.storage.cleanup_job_files(job.upload_path, job.output_path)
# Remove from registry
del self.jobs[job_id]
logger.info(f"Deleted job {job_id}")
return True
async def get_queue_size(self) -> int:
"""Get current queue size"""
return self.queue.qsize()
async def get_active_jobs_count(self) -> int:
"""Get number of jobs currently being processed"""
async with self._lock:
return len([
job for job in self.jobs.values()
if job.phase in [JobPhase.EXTRACT_CONTENT, JobPhase.LLM_ANALYSIS,
JobPhase.CONSOLIDATION, JobPhase.CSV_GENERATION]
])
def serialize_all(self) -> List[Dict]:
"""Serialize all jobs for WebSocket broadcast"""
return [job.to_dict() for job in self.jobs.values()]
async def cleanup_expired_jobs(self) -> int:
"""
Clean up expired jobs and their files
Returns:
Number of jobs cleaned up
"""
cutoff_time = datetime.utcnow() - timedelta(hours=server_config.FILE_RETENTION_HOURS)
cleanup_count = 0
# Get jobs to cleanup
jobs_to_cleanup = []
async with self._lock:
for job_id, job in list(self.jobs.items()):
# Clean up completed/failed jobs older than retention period
if (job.phase in [JobPhase.COMPLETED, JobPhase.FAILED] and
job.updated_at < cutoff_time):
jobs_to_cleanup.append(job_id)
# Clean up identified jobs
for job_id in jobs_to_cleanup:
if await self.delete_job(job_id):
cleanup_count += 1
# Also clean up orphaned files
orphaned_count = await self.storage.cleanup_expired_files()
total_cleaned = cleanup_count + orphaned_count
if total_cleaned > 0:
logger.info(f"Cleaned up {cleanup_count} expired jobs and {orphaned_count} orphaned files")
return total_cleaned
@staticmethod
def get_available_models() -> List[ModelInfo]:
"""
Get list of available models with their information
Returns:
List of available model information
"""
# Import here to avoid circular imports
from core.config import config as core_config
models = []
# Define model information based on existing configuration
model_info_map = {
'openai-gpt51': ModelInfo(
key='openai-gpt51',
name='GPT-5.1',
provider='OpenAI',
description='Latest OpenAI model with advanced reasoning capabilities',
cost_per_1m_input=1.25,
cost_per_1m_output=10.00,
can_be_primary=True,
can_be_consolidation=True
),
'anthropic-opus45': ModelInfo(
key='anthropic-opus45',
name='Claude Opus 4.5',
provider='Anthropic',
description='Highest quality model for complex analysis',
cost_per_1m_input=5.00,
cost_per_1m_output=25.00,
can_be_primary=True,
can_be_consolidation=True
),
'anthropic-sonnet45': ModelInfo(
key='anthropic-sonnet45',
name='Claude Sonnet 4.5',
provider='Anthropic',
description='Balanced performance and cost',
cost_per_1m_input=3.00,
cost_per_1m_output=15.00,
can_be_primary=True,
can_be_consolidation=True
),
'google-gemini31': ModelInfo(
key='google-gemini31',
name='Gemini 3.1 Pro',
provider='Google',
description='Cost-effective model with high context limit',
cost_per_1m_input=1.25,
cost_per_1m_output=5.00,
can_be_primary=True,
can_be_consolidation=True
)
}
# Return models that exist in the configuration
for model_key in core_config.MODEL_MAPPINGS.keys():
if model_key in model_info_map:
models.append(model_info_map[model_key])
return models
def get_default_model_config() -> ModelConfiguration:
"""Get default model configuration"""
from core.config import config as core_config
return ModelConfiguration(
primary_models=core_config.get_default_primary_models(),
consolidation_model=core_config.DEFAULT_CONSOLIDATION_MODEL,
minimum_success_threshold=core_config.MINIMUM_SUCCESS_THRESHOLD
)
# Global instance
job_manager = JobManager.get_instance()

270
backend/server/jobs/models.py Executable file
View file

@ -0,0 +1,270 @@
"""
Data models for job management and processing
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Dict, List, Optional, Any
import uuid
class JobPhase(Enum):
"""Processing phases for a job"""
QUEUED = "QUEUED"
EXTRACT_CONTENT = "EXTRACT_CONTENT"
LLM_ANALYSIS = "LLM_ANALYSIS"
CONSOLIDATION = "CONSOLIDATION"
CSV_GENERATION = "CSV_GENERATION"
COMPLETED = "COMPLETED"
FAILED = "FAILED"
@dataclass
class ProviderUpdate:
"""Update information for a specific LLM provider during processing"""
provider: str # 'openai', 'anthropic', 'google'
model: str # e.g., "gpt-5.1", "claude-sonnet-4-5", "gemini-3.1-pro"
status: str # 'started', 'success', 'error'
started_at: Optional[str] = None
completed_at: Optional[str] = None
latency_ms: Optional[float] = None
tokens_in: Optional[int] = None
tokens_out: Optional[int] = None
tokens_cached: Optional[int] = None
cost_usd: Optional[float] = None
error: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization"""
return {
'provider': self.provider,
'model': self.model,
'status': self.status,
'startedAt': self.started_at,
'completedAt': self.completed_at,
'latencyMs': self.latency_ms,
'tokensIn': self.tokens_in,
'tokensOut': self.tokens_out,
'tokensCached': self.tokens_cached,
'costUsd': self.cost_usd,
'error': self.error
}
@dataclass
class LogEntry:
"""Individual log entry for job processing"""
timestamp: str
level: str # 'DEBUG', 'INFO', 'WARNING', 'ERROR'
message: str
def to_dict(self) -> Dict[str, Any]:
return {
'timestamp': self.timestamp,
'level': self.level,
'message': self.message
}
@dataclass
class JobSummary:
"""Summary information for a completed job"""
doc_type: str
assets_extracted: int
confidence_score: float
notes: List[str]
cost_usd_total: float
tokens_total: int
primary_models: List[str]
consolidation_model: str
processing_time_seconds: Optional[float] = None
def to_dict(self) -> Dict[str, Any]:
return {
'docType': self.doc_type,
'assetsExtracted': self.assets_extracted,
'confidenceScore': self.confidence_score,
'notes': self.notes,
'costUsdTotal': self.cost_usd_total,
'tokensTotal': self.tokens_total,
'primaryModels': self.primary_models,
'consolidationModel': self.consolidation_model,
'processingTimeSeconds': self.processing_time_seconds
}
@dataclass
class ModelInfo:
"""Information about an available LLM model"""
key: str
name: str
provider: str
description: str
cost_per_1m_input: float
cost_per_1m_output: float
can_be_primary: bool = True
can_be_consolidation: bool = True
def to_dict(self) -> Dict[str, Any]:
return {
'key': self.key,
'name': self.name,
'provider': self.provider,
'description': self.description,
'costPer1mInput': self.cost_per_1m_input,
'costPer1mOutput': self.cost_per_1m_output,
'canBePrimary': self.can_be_primary,
'canBeConsolidation': self.can_be_consolidation
}
@dataclass
class ModelConfiguration:
"""Model selection configuration for a job"""
primary_models: List[str] = field(default_factory=lambda: [
'openai-gpt51', 'anthropic-sonnet45', 'google-gemini31'
])
consolidation_model: str = 'openai-gpt51'
minimum_success_threshold: int = 1
def to_dict(self) -> Dict[str, Any]:
return {
'primaryModels': self.primary_models,
'consolidationModel': self.consolidation_model,
'minimumSuccessThreshold': self.minimum_success_threshold
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'ModelConfiguration':
return cls(
primary_models=data.get('primaryModels', []),
consolidation_model=data.get('consolidationModel', 'openai-gpt51'),
minimum_success_threshold=data.get('minimumSuccessThreshold', 1)
)
@dataclass
class Job:
"""Main job model representing a document processing job"""
id: str
file_name: str
file_size: int
created_at: datetime
updated_at: datetime
user_id: str
phase: JobPhase
progress_pct: int # 0-100
step_label: str
provider_updates: Dict[str, ProviderUpdate] = field(default_factory=dict)
error: Optional[str] = None
result_csv_url: Optional[str] = None
summary: Optional[JobSummary] = None
logs: List[LogEntry] = field(default_factory=list)
upload_path: Optional[str] = None
output_path: Optional[str] = None
model_config: ModelConfiguration = field(default_factory=ModelConfiguration)
@classmethod
def create(
cls,
file_name: str,
file_size: int,
user_id: str,
upload_path: str,
model_config: Optional[ModelConfiguration] = None
) -> 'Job':
"""Create a new job with default values"""
now = datetime.utcnow()
return cls(
id=str(uuid.uuid4()),
file_name=file_name,
file_size=file_size,
created_at=now,
updated_at=now,
user_id=user_id,
phase=JobPhase.QUEUED,
progress_pct=0,
step_label='Queued for processing',
upload_path=upload_path,
model_config=model_config or ModelConfiguration()
)
def update_progress(
self,
phase: JobPhase,
progress_pct: int,
step_label: str = ""
):
"""Update job progress"""
self.phase = phase
self.progress_pct = min(100, max(0, progress_pct)) # Clamp to [0, 100]
self.updated_at = datetime.utcnow()
if step_label:
self.step_label = step_label
else:
# Default step labels based on phase
phase_labels = {
JobPhase.QUEUED: 'Queued for processing',
JobPhase.EXTRACT_CONTENT: 'Extracting document content',
JobPhase.LLM_ANALYSIS: 'Parallel LLM analysis',
JobPhase.CONSOLIDATION: 'Consolidating results',
JobPhase.CSV_GENERATION: 'Generating CSV output',
JobPhase.COMPLETED: 'Processing completed',
JobPhase.FAILED: 'Processing failed'
}
self.step_label = phase_labels.get(phase, 'Processing')
def add_log(self, level: str, message: str):
"""Add a log entry to this job"""
log_entry = LogEntry(
timestamp=datetime.utcnow().isoformat(),
level=level,
message=message
)
self.logs.append(log_entry)
self.updated_at = datetime.utcnow()
def update_provider(self, model_key: str, update: ProviderUpdate):
"""Update status for a specific provider"""
self.provider_updates[model_key] = update
self.updated_at = datetime.utcnow()
def mark_completed(
self,
result_csv_url: str,
summary: JobSummary,
output_path: str
):
"""Mark job as completed with results"""
self.phase = JobPhase.COMPLETED
self.progress_pct = 100
self.step_label = 'Processing completed'
self.result_csv_url = result_csv_url
self.summary = summary
self.output_path = output_path
self.updated_at = datetime.utcnow()
def mark_failed(self, error: str):
"""Mark job as failed with error message"""
self.phase = JobPhase.FAILED
self.error = error
self.step_label = 'Processing failed'
self.updated_at = datetime.utcnow()
def to_dict(self) -> Dict[str, Any]:
"""Convert job to dictionary for JSON serialization"""
# Handle phase - might be string or enum
phase_value = self.phase.value if isinstance(self.phase, JobPhase) else self.phase
return {
'id': self.id,
'fileName': self.file_name,
'fileSize': self.file_size,
'createdAt': self.created_at.isoformat(),
'updatedAt': self.updated_at.isoformat(),
'userId': self.user_id,
'phase': phase_value,
'progressPct': self.progress_pct,
'stepLabel': self.step_label,
'providerUpdates': {k: v.to_dict() for k, v in self.provider_updates.items()},
'error': self.error,
'resultCsvUrl': self.result_csv_url,
'summary': self.summary.to_dict() if self.summary else None,
'logs': [log.to_dict() for log in self.logs],
'modelConfig': self.model_config.to_dict()
}

231
backend/server/jobs/storage.py Executable file
View file

@ -0,0 +1,231 @@
"""
File storage management for uploads and outputs
"""
import os
import hashlib
import asyncio
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional, List
import logging
import uuid
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
class StorageManager:
"""Manages file storage, cleanup, and safe file operations"""
def __init__(self):
self.upload_dir = Path(server_config.UPLOAD_DIR)
self.output_dir = Path(server_config.OUTPUT_DIR)
# Ensure directories exist
self.upload_dir.mkdir(parents=True, exist_ok=True)
self.output_dir.mkdir(parents=True, exist_ok=True)
def generate_safe_filename(self, original_filename: str, job_id: str) -> str:
"""
Generate a safe filename for uploaded files
Args:
original_filename: Original filename from upload
job_id: Unique job identifier
Returns:
Safe filename with job ID prefix
"""
# Extract extension
name, ext = os.path.splitext(original_filename)
# Sanitize the filename
safe_name = "".join(c for c in name if c.isalnum() or c in (' ', '-', '_')).strip()
safe_name = safe_name[:50] # Limit length
# Generate unique filename
return f"{job_id}_{safe_name}{ext}"
def get_upload_path(self, filename: str) -> str:
"""Get full path for uploaded file"""
return str(self.upload_dir / filename)
def get_output_path(self, job_id: str, original_filename: str) -> str:
"""
Generate output CSV path for a job
Args:
job_id: Job identifier
original_filename: Original uploaded filename
Returns:
Path for output CSV file
"""
# Generate timestamp
timestamp = datetime.utcnow().strftime("%Y%m%d%H%M%S")
# Extract base name without extension
base_name = os.path.splitext(original_filename)[0]
safe_base = "".join(c for c in base_name if c.isalnum() or c in (' ', '-', '_')).strip()
safe_base = safe_base[:30] # Limit length
# Generate output filename
output_filename = f"{safe_base}-{timestamp}.csv"
return str(self.output_dir / output_filename)
async def save_uploaded_file(self, file_data: bytes, filename: str, job_id: str) -> str:
"""
Save uploaded file data to disk
Args:
file_data: Binary file data
filename: Original filename
job_id: Job identifier
Returns:
Path to saved file
"""
safe_filename = self.generate_safe_filename(filename, job_id)
file_path = self.get_upload_path(safe_filename)
try:
# Write file asynchronously using thread pool
def _write_file():
"""Blocking file write operation for thread pool"""
with open(file_path, 'wb') as f:
f.write(file_data)
loop = asyncio.get_running_loop()
await loop.run_in_executor(None, _write_file)
logger.info(f"Saved uploaded file: {file_path}")
return file_path
except Exception as e:
logger.error(f"Failed to save uploaded file {filename}: {e}")
raise
def validate_file(self, filename: str, file_size: int) -> tuple[bool, Optional[str]]:
"""
Validate uploaded file
Args:
filename: Original filename
file_size: File size in bytes
Returns:
Tuple of (is_valid, error_message)
"""
# Check file extension
_, ext = os.path.splitext(filename.lower())
if ext not in server_config.ALLOWED_EXTENSIONS:
allowed = ', '.join(server_config.ALLOWED_EXTENSIONS)
return False, f"File type {ext} not allowed. Allowed types: {allowed}"
# Check file size
max_size = server_config.MAX_CONTENT_LENGTH
if file_size > max_size:
max_mb = max_size / (1024 * 1024)
actual_mb = file_size / (1024 * 1024)
return False, f"File size {actual_mb:.1f}MB exceeds limit of {max_mb:.1f}MB"
# Check filename length and characters
if len(filename) > 255:
return False, "Filename too long (max 255 characters)"
return True, None
async def cleanup_job_files(self, upload_path: Optional[str], output_path: Optional[str]):
"""
Clean up files associated with a job
Args:
upload_path: Path to uploaded file
output_path: Path to output CSV file
"""
for file_path in [upload_path, output_path]:
if file_path and os.path.exists(file_path):
try:
os.remove(file_path)
logger.info(f"Cleaned up file: {file_path}")
except Exception as e:
logger.warning(f"Failed to clean up file {file_path}: {e}")
async def cleanup_expired_files(self) -> int:
"""
Clean up files older than the retention period
Returns:
Number of files cleaned up
"""
cutoff_time = datetime.utcnow() - timedelta(hours=server_config.FILE_RETENTION_HOURS)
cleanup_count = 0
# Clean upload directory
cleanup_count += await self._cleanup_directory(self.upload_dir, cutoff_time)
# Clean output directory
cleanup_count += await self._cleanup_directory(self.output_dir, cutoff_time)
if cleanup_count > 0:
logger.info(f"Cleaned up {cleanup_count} expired files")
return cleanup_count
async def _cleanup_directory(self, directory: Path, cutoff_time: datetime) -> int:
"""Clean up files in a specific directory older than cutoff time"""
cleanup_count = 0
try:
for file_path in directory.iterdir():
if file_path.is_file():
# Get file modification time
mtime = datetime.fromtimestamp(file_path.stat().st_mtime)
if mtime < cutoff_time:
try:
file_path.unlink()
cleanup_count += 1
logger.debug(f"Cleaned up expired file: {file_path}")
except Exception as e:
logger.warning(f"Failed to delete expired file {file_path}: {e}")
except Exception as e:
logger.error(f"Error during directory cleanup {directory}: {e}")
return cleanup_count
def get_file_info(self, file_path: str) -> Optional[dict]:
"""
Get information about a file
Args:
file_path: Path to file
Returns:
Dictionary with file info or None if file doesn't exist
"""
if not os.path.exists(file_path):
return None
try:
stat = os.stat(file_path)
return {
'path': file_path,
'size': stat.st_size,
'created': datetime.fromtimestamp(stat.st_ctime).isoformat(),
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat()
}
except Exception as e:
logger.error(f"Failed to get file info for {file_path}: {e}")
return None
def ensure_directories(self):
"""Ensure all required directories exist"""
self.upload_dir.mkdir(parents=True, exist_ok=True)
self.output_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Storage directories ready: {self.upload_dir}, {self.output_dir}")
# Global instance
storage_manager = StorageManager()

View file

@ -0,0 +1,16 @@
"""
Job runners module for processing document analysis jobs
"""
from .progress import ProgressReporter, JobLogHandler, create_job_logger
from .job_runner import run_job, process_job_queue, start_background_workers, stop_background_workers
__all__ = [
'ProgressReporter',
'JobLogHandler',
'create_job_logger',
'run_job',
'process_job_queue',
'start_background_workers',
'stop_background_workers'
]

View file

@ -0,0 +1,368 @@
"""
Enhanced DocumentAnalyzer with progress reporting for GUI integration
Extends the existing analyzer with progress hooks and WebSocket updates
"""
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '../../'))
from typing import Optional, List, Dict, Any
import logging
from core.process_brief_enhanced import DocumentAnalyzer, ProcessingResult
from ..jobs.models import JobPhase, ModelConfiguration, JobSummary
from .progress import ProgressReporter
logger = logging.getLogger(__name__)
class EnhancedDocumentAnalyzer(DocumentAnalyzer):
"""
Enhanced DocumentAnalyzer with progress reporting capabilities
Extends the base analyzer with WebSocket progress updates
"""
def __init__(
self,
model_config: ModelConfiguration,
progress_reporter: Optional[ProgressReporter] = None
):
# Initialize base analyzer with model configuration
primary_models = model_config.primary_models
consolidation_model = model_config.consolidation_model
super().__init__(primary_models, consolidation_model)
self.progress = progress_reporter
self.model_config = model_config
async def process_document_with_progress(self, filepath: str) -> ProcessingResult:
"""
Process document with progress reporting integration
Args:
filepath: Path to document file
Returns:
ProcessingResult with extracted data
"""
try:
if self.progress:
await self.progress.emit(
JobPhase.EXTRACT_CONTENT,
10,
f"Starting analysis of {os.path.basename(filepath)}"
)
# Stage 1: Extract document content
if self.progress:
await self.progress.emit_log('INFO', "=== STAGE 1: Document Content Extraction ===")
try:
document_content = self._extract_document_content(filepath)
if self.progress:
await self.progress.emit(
JobPhase.EXTRACT_CONTENT,
25,
"Document content extracted successfully"
)
await self.progress.emit_log('INFO', f"Extracted {len(document_content)} characters of content")
except Exception as e:
error_msg = f"Content extraction failed: {e}"
if self.progress:
await self.progress.emit_failure(error_msg)
return ProcessingResult([], {}, 0.0, [error_msg], self.token_usage)
# Stage 2: Parallel multi-model analysis
if self.progress:
await self.progress.emit(
JobPhase.LLM_ANALYSIS,
30,
"Starting parallel multi-model analysis"
)
await self.progress.emit_log('INFO', "=== STAGE 2: Parallel Multi-Model Analysis ===")
await self.progress.emit_log('INFO', f"Using models: {', '.join(self.primary_models)}")
doc_type = self.classify_document(filepath)
try:
analysis_responses, analysis_metadata = await self._perform_parallel_analysis_with_progress(
document_content, doc_type
)
if self.progress:
await self.progress.emit(
JobPhase.LLM_ANALYSIS,
75,
f"Parallel analysis completed - {len(analysis_responses)} successful models"
)
await self.progress.emit_log('INFO',
f"Analysis complete: {len(analysis_responses)}/{len(self.primary_models)} models succeeded"
)
except Exception as e:
error_msg = f"Parallel analysis failed: {e}"
if self.progress:
await self.progress.emit_failure(error_msg)
return ProcessingResult([], {}, 0.0, [error_msg], self.token_usage)
# Stage 3: Consolidation
if self.progress:
await self.progress.emit(
JobPhase.CONSOLIDATION,
80,
"Starting result consolidation"
)
await self.progress.emit_log('INFO', "=== STAGE 3: Result Consolidation ===")
await self.progress.emit_log('INFO', f"Using consolidation model: {self.consolidation_model}")
try:
consolidation_result = await self.consolidation_processor.consolidate_results(
analysis_responses, self.consolidation_model, document_content
)
if self.progress:
await self.progress.emit(
JobPhase.CONSOLIDATION,
90,
f"Consolidation completed: {len(consolidation_result.expanded_assets)} final assets"
)
await self.progress.emit_log('INFO',
f"Consolidation complete: {len(consolidation_result.expanded_assets)} final deliverables"
)
except Exception as e:
error_msg = f"Consolidation failed: {e}"
if self.progress:
await self.progress.emit_failure(error_msg)
return ProcessingResult([], {}, 0.0, [error_msg], self.token_usage)
# Stage 4: Prepare results
if self.progress:
await self.progress.emit(
JobPhase.CSV_GENERATION,
95,
"Preparing results for output"
)
# Convert expanded assets to dict format for compatibility
extracted_data = [asset.model_dump() for asset in consolidation_result.expanded_assets]
# Aggregate token usage from all models
total_token_usage = self.provider_manager.get_aggregated_token_usage(analysis_responses)
# Combine processing notes
successful_count = analysis_metadata.get('successful_models', len(analysis_responses))
total_count = analysis_metadata.get('total_models_attempted', len(self.primary_models))
processing_notes = [f"Parallel analysis: {successful_count}/{total_count} models"]
processing_notes.extend(consolidation_result.warnings)
# Merge metadata
combined_metadata = {
'doc_type': doc_type.value,
'primary_models_used': self.primary_models,
'consolidation_model': self.consolidation_model,
'analysis_metadata': analysis_metadata,
'consolidation_metadata': consolidation_result.consolidation_metadata
}
result = ProcessingResult(
raw_data=extracted_data,
metadata=combined_metadata,
confidence_score=0.9, # Higher confidence due to multi-model consensus
processing_notes=processing_notes,
token_usage=total_token_usage
)
if self.progress:
await self.progress.emit(
JobPhase.CSV_GENERATION,
100,
"Analysis completed successfully"
)
await self.progress.emit_log('INFO', "=== PROCESSING COMPLETED SUCCESSFULLY ===")
return result
except Exception as e:
error_msg = f"Unexpected error during processing: {e}"
logger.error(error_msg, exc_info=True)
if self.progress:
await self.progress.emit_failure(error_msg)
return ProcessingResult([], {}, 0.0, [error_msg], self.token_usage)
async def _perform_parallel_analysis_with_progress(
self,
document_content: str,
doc_type
) -> tuple:
"""
Perform parallel analysis with progress reporting
Args:
document_content: Extracted document text
doc_type: Document type classification
Returns:
Tuple of (successful_responses, metadata)
"""
# Load prompt from external file
multi_perspective_prompt_template = self._load_prompt('multi_perspective_analysis')
multi_perspective_prompt = multi_perspective_prompt_template.format(doc_type=doc_type.value)
# Load system message from external file
system_message = self._load_prompt('system_multi_perspective')
# Prepare combined prompt
combined_prompt = f"{multi_perspective_prompt}\n\nDocument Content:\n{document_content}"
# Prepare messages for all providers
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": combined_prompt}
]
# Get schema for structured output
from core.process_brief_enhanced import UNIVERSAL_BASE_DELIVERABLE_SCHEMA
# Create progress callback for provider updates
progress_callback = None
if self.progress:
progress_callback = self._create_provider_progress_callback()
# Execute parallel analysis with progress reporting
successful_responses, metadata = await self.provider_manager.execute_parallel_analysis(
model_keys=self.primary_models,
messages=messages,
schema=UNIVERSAL_BASE_DELIVERABLE_SCHEMA,
minimum_success_threshold=self.model_config.minimum_success_threshold,
on_model_event=progress_callback
)
return successful_responses, metadata
def _create_provider_progress_callback(self):
"""
Create callback function for provider progress updates
Returns:
Async callback function
"""
async def on_model_event(model_key: str, stage: str, data: Any):
if not self.progress:
return
try:
if stage == 'start':
await self.progress.emit_provider_update(model_key, {
'provider': self._get_provider_name(model_key),
'model': self._get_model_display_name(model_key),
'status': 'started',
'startedAt': data.get('timestamp') if data else None
})
await self.progress.emit_log('INFO', f"Starting analysis with {model_key}")
elif stage == 'end':
if 'error' in data:
await self.progress.emit_provider_update(model_key, {
'provider': self._get_provider_name(model_key),
'model': self._get_model_display_name(model_key),
'status': 'error',
'error': str(data['error']),
'completedAt': data.get('timestamp') if data else None
})
await self.progress.emit_log('ERROR', f"Analysis failed for {model_key}: {data['error']}")
else:
response = data.get('response')
cost = data.get('cost', 0)
if response:
await self.progress.emit_provider_update(model_key, {
'provider': self._get_provider_name(model_key),
'model': self._get_model_display_name(model_key),
'status': 'success',
'completedAt': data.get('timestamp') if data else None,
'latencyMs': response.processing_time * 1000 if response.processing_time else None,
'tokensIn': response.token_usage.input_tokens,
'tokensOut': response.token_usage.output_tokens,
'tokensCached': response.token_usage.cached_input_tokens,
'costUsd': cost
})
await self.progress.emit_log('INFO', f"Analysis completed for {model_key} "
f"({response.token_usage.input_tokens + response.token_usage.output_tokens} tokens, ${cost:.4f})")
# Update overall progress
completed_count = len([
p for p in self.progress.job.provider_updates.values()
if p.status in ['success', 'error']
])
total_count = len(self.primary_models)
# Calculate progress: 25% (extraction done) + (completed/total * 50%) for analysis
analysis_progress = await self.progress.calculate_analysis_progress(
base_progress=25,
completed_providers=completed_count,
total_providers=total_count,
analysis_weight=50
)
await self.progress.emit(
JobPhase.LLM_ANALYSIS,
analysis_progress,
f"Analysis progress: {completed_count}/{total_count} models complete"
)
except Exception as e:
logger.error(f"Error in provider progress callback: {e}")
return on_model_event
def _get_provider_name(self, model_key: str) -> str:
"""Get provider name from model key"""
from core.config import config
try:
provider_name, _ = config.get_model_info(model_key)
return provider_name
except:
return model_key.split('-')[0] if '-' in model_key else 'unknown'
def _get_model_display_name(self, model_key: str) -> str:
"""Get display name for model"""
display_names = {
'openai-gpt51': 'GPT-5.1',
'anthropic-opus45': 'Claude Opus 4.5',
'anthropic-sonnet45': 'Claude Sonnet 4.5',
'google-gemini31': 'Gemini 3.1 Pro'
}
return display_names.get(model_key, model_key)
def create_job_summary(self, result: ProcessingResult) -> JobSummary:
"""
Create job summary from processing result
Args:
result: Processing result
Returns:
JobSummary object
"""
# Extract cost information
consolidation_metadata = result.metadata.get('consolidation_metadata', {})
cost_breakdown = consolidation_metadata.get('cost_breakdown', {})
token_usage = consolidation_metadata.get('token_usage', {})
return JobSummary(
doc_type=result.metadata.get('doc_type', 'unknown'),
assets_extracted=len(result.raw_data),
confidence_score=result.confidence_score,
notes=result.processing_notes,
cost_usd_total=cost_breakdown.get('total_cost', 0),
tokens_total=token_usage.get('grand_total', 0),
primary_models=result.metadata.get('primary_models_used', []),
consolidation_model=result.metadata.get('consolidation_model', ''),
processing_time_seconds=None # Will be set by job runner
)

View file

@ -0,0 +1,251 @@
"""
Job runner that orchestrates document processing with progress reporting
"""
import asyncio
import logging
import os
import time
from datetime import datetime
from typing import Dict, Any
from ..jobs.models import Job, JobPhase, JobSummary
from ..jobs.storage import StorageManager
from ..ws.manager import WebSocketManager
from .progress import ProgressReporter, create_job_logger
from core.process_brief_enhanced import DocumentAnalyzer
logger = logging.getLogger(__name__)
async def run_job(job: Job, ws_manager: WebSocketManager) -> bool:
"""
Execute a document processing job with progress reporting
Args:
job: Job to process
ws_manager: WebSocket manager for real-time updates
Returns:
True if job completed successfully, False otherwise
"""
start_time = time.time()
job_logger = create_job_logger(job.id, ws_manager)
try:
# Create progress reporter
progress = ProgressReporter(job, ws_manager)
# Create analyzer with model configuration
analyzer = DocumentAnalyzer(
primary_models=job.model_config.primary_models,
consolidation_model=job.model_config.consolidation_model
)
# Mark as GUI mode to suppress legacy print statements
analyzer._is_gui_mode = True
await progress.emit_log('INFO', f"Starting processing of {job.file_name}")
await progress.emit_log('INFO', f"File size: {job.file_size:,} bytes")
await progress.emit_log('INFO', f"Selected models: {', '.join(job.model_config.primary_models)}")
await progress.emit_log('INFO', f"Consolidation model: {job.model_config.consolidation_model}")
# Validate upload path exists
if not job.upload_path or not os.path.exists(job.upload_path):
error_msg = f"Upload file not found: {job.upload_path}"
await progress.emit_failure(error_msg)
return False
# Process document
result = await analyzer.process_document_multi_model(job.upload_path, progress)
if not result.raw_data:
error_msg = "No data extracted from document"
await progress.emit_failure(error_msg)
return False
# Generate output CSV
await progress.emit(JobPhase.CSV_GENERATION, 95, "Generating CSV output")
storage = StorageManager()
output_path = storage.get_output_path(job.id, job.file_name)
# Write CSV file asynchronously
import csv
import asyncio
def _write_csv():
"""Blocking CSV write operation for thread pool"""
with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
if result.raw_data:
# Get headers from first record
headers = list(result.raw_data[0].keys())
writer = csv.DictWriter(csvfile, fieldnames=headers, extrasaction='ignore')
writer.writeheader()
writer.writerows(result.raw_data)
# Run CSV writing in thread pool to avoid blocking event loop
loop = asyncio.get_running_loop()
await loop.run_in_executor(None, _write_csv)
# Create job summary
processing_time = time.time() - start_time
summary = create_job_summary(result, processing_time)
# Generate CSV download URL
result_csv_url = f"/api/jobs/{job.id}/download"
# Mark job as completed
job.mark_completed(result_csv_url, summary, output_path)
# Emit completion event
await progress.emit_completion(result_csv_url, summary.to_dict())
await progress.emit_log('INFO', f"Processing completed in {processing_time:.1f} seconds")
await progress.emit_log('INFO', f"Extracted {len(result.raw_data)} marketing assets")
await progress.emit_log('INFO', f"Total cost: ${summary.cost_usd_total:.4f}")
await progress.emit_log('INFO', f"Total tokens: {summary.tokens_total:,}")
logger.info(f"Job {job.id} completed successfully: {len(result.raw_data)} assets, "
f"${summary.cost_usd_total:.4f}, {processing_time:.1f}s")
return True
except Exception as e:
error_msg = f"Job processing failed: {str(e)}"
logger.error(f"Job {job.id} failed: {error_msg}", exc_info=True)
try:
progress = ProgressReporter(job, ws_manager)
await progress.emit_failure(error_msg)
except:
# Fallback if progress reporter fails
job.mark_failed(error_msg)
return False
async def process_job_queue(job_manager, ws_manager: WebSocketManager):
"""
Background worker that processes jobs from the queue
Args:
job_manager: JobManager instance
ws_manager: WebSocket manager for updates
"""
logger.info("Starting job queue processor")
while True:
try:
# Get next job from queue (blocks until available)
job_id = await job_manager.queue.get()
# Get job details
job = await job_manager.get_job(job_id)
if not job:
logger.warning(f"Job {job_id} not found in registry")
job_manager.queue.task_done()
continue
logger.info(f"Processing job {job_id}: {job.file_name}")
# Check queue size for debugging
queue_size = job_manager.queue.qsize()
logger.info(f"Queue size before processing: {queue_size}")
# Acquire semaphore for concurrency control
async with job_manager.processing_semaphore:
# Process the job
success = await run_job(job, ws_manager)
if success:
logger.info(f"Job {job_id} completed successfully")
else:
logger.error(f"Job {job_id} failed")
# Mark task as done
job_manager.queue.task_done()
# Check queue size after processing
remaining_queue_size = job_manager.queue.qsize()
logger.info(f"Queue size after processing: {remaining_queue_size}")
except asyncio.CancelledError:
logger.info("Job queue processor cancelled")
break
except Exception as e:
logger.error(f"Error in job queue processor: {e}", exc_info=True)
# Continue processing other jobs
try:
job_manager.queue.task_done()
except:
pass
async def start_background_workers(job_manager, ws_manager: WebSocketManager, num_workers: int = 1):
"""
Start background worker tasks for job processing
Args:
job_manager: JobManager instance
ws_manager: WebSocket manager
num_workers: Number of worker tasks to start
Returns:
List of worker tasks
"""
workers = []
for i in range(num_workers):
worker = asyncio.create_task(
process_job_queue(job_manager, ws_manager),
name=f"job-worker-{i}"
)
workers.append(worker)
logger.info(f"Started job worker {i}")
return workers
async def stop_background_workers(workers):
"""
Stop background worker tasks
Args:
workers: List of worker tasks to stop
"""
logger.info("Stopping background workers...")
for worker in workers:
worker.cancel()
# Wait for workers to finish
try:
await asyncio.gather(*workers, return_exceptions=True)
except Exception as e:
logger.warning(f"Error stopping workers: {e}")
logger.info("Background workers stopped")
def create_job_summary(result, processing_time: float) -> JobSummary:
"""
Create job summary from processing result
Args:
result: ProcessingResult from DocumentAnalyzer
processing_time: Total processing time in seconds
Returns:
JobSummary object
"""
# Extract cost information
consolidation_metadata = result.metadata.get('consolidation_metadata', {})
cost_breakdown = consolidation_metadata.get('cost_breakdown', {})
token_usage = consolidation_metadata.get('token_usage', {})
return JobSummary(
doc_type=result.metadata.get('doc_type', 'unknown'),
assets_extracted=len(result.raw_data),
confidence_score=result.confidence_score,
notes=result.processing_notes,
cost_usd_total=cost_breakdown.get('total_cost', 0),
tokens_total=token_usage.get('grand_total', 0),
primary_models=result.metadata.get('primary_models_used', []),
consolidation_model=result.metadata.get('consolidation_model', ''),
processing_time_seconds=processing_time
)

View file

@ -0,0 +1,301 @@
"""
Progress reporting for job processing with WebSocket integration
"""
import logging
from datetime import datetime
from typing import Dict, Any, Optional
from ..jobs.models import Job, JobPhase, ProviderUpdate
from ..ws.manager import WebSocketManager
logger = logging.getLogger(__name__)
class ProgressReporter:
"""
Reports progress updates for job processing with WebSocket broadcasting
"""
def __init__(self, job: Job, ws_manager: WebSocketManager):
self.job = job
self.ws_manager = ws_manager
self.logger = logging.getLogger(f"{__name__}.{job.id}")
async def emit(
self,
phase: JobPhase,
progress_pct: int,
message: str = "",
step_label: str = ""
):
"""
Emit progress update for job
Args:
phase: Current processing phase
progress_pct: Progress percentage (0-100)
message: Optional progress message
step_label: Optional custom step label
"""
try:
# Update job progress
self.job.update_progress(phase, progress_pct, step_label)
# Add log entry
if message:
self.job.add_log('INFO', message)
self.logger.info(message)
# Broadcast progress update
await self.ws_manager.broadcast_job_update(self.job.id, {
'type': 'job.progress',
'jobId': self.job.id,
'phase': phase.value if hasattr(phase, 'value') else phase,
'progressPct': progress_pct,
'message': message,
'stepLabel': self.job.step_label,
'providerUpdates': {k: v.to_dict() for k, v in self.job.provider_updates.items()}
})
self.logger.debug(f"Progress update: {phase.value if hasattr(phase, 'value') else phase} {progress_pct}% - {message}")
except Exception as e:
self.logger.error(f"Failed to emit progress update: {e}")
# Don't re-raise to avoid breaking the processing pipeline
async def emit_provider_update(
self,
model_key: str,
update_data: Dict[str, Any]
):
"""
Emit provider-specific update
Args:
model_key: Model identifier (e.g., 'openai-gpt51')
update_data: Provider update information
"""
try:
# Create provider update object
provider_update = ProviderUpdate(
provider=update_data.get('provider', ''),
model=update_data.get('model', ''),
status=update_data.get('status', ''),
started_at=update_data.get('startedAt'),
completed_at=update_data.get('completedAt'),
latency_ms=update_data.get('latencyMs'),
tokens_in=update_data.get('tokensIn'),
tokens_out=update_data.get('tokensOut'),
tokens_cached=update_data.get('tokensCached'),
cost_usd=update_data.get('costUsd'),
error=update_data.get('error')
)
# Update job
self.job.update_provider(model_key, provider_update)
# Log provider update
status_msg = f"Provider {model_key}: {provider_update.status}"
if provider_update.error:
status_msg += f" - {provider_update.error}"
self.job.add_log('ERROR', status_msg)
self.logger.error(status_msg)
else:
self.job.add_log('INFO', status_msg)
self.logger.info(status_msg)
# Broadcast provider update
await self.ws_manager.broadcast_job_update(self.job.id, {
'type': 'job.provider_update',
'jobId': self.job.id,
'modelKey': model_key,
'update': provider_update.to_dict()
})
self.logger.debug(f"Provider update: {model_key} - {provider_update.status}")
except Exception as e:
self.logger.error(f"Failed to emit provider update for {model_key}: {e}")
async def emit_log(self, level: str, message: str):
"""
Emit log message with WebSocket streaming
Args:
level: Log level (DEBUG, INFO, WARNING, ERROR)
message: Log message
"""
try:
# Add to job logs
self.job.add_log(level, message)
# Log to system logger
getattr(self.logger, level.lower(), self.logger.info)(message)
# Broadcast log entry
await self.ws_manager.broadcast_job_update(self.job.id, {
'type': 'job.log',
'jobId': self.job.id,
'logEntry': {
'timestamp': datetime.utcnow().isoformat(),
'level': level,
'message': message
}
})
except Exception as e:
self.logger.error(f"Failed to emit log message: {e}")
async def calculate_analysis_progress(
self,
base_progress: int,
completed_providers: int,
total_providers: int,
analysis_weight: int = 50
) -> int:
"""
Calculate progress percentage for LLM analysis phase
Args:
base_progress: Starting progress percentage (usually 25)
completed_providers: Number of completed providers
total_providers: Total number of providers
analysis_weight: Weight of analysis phase in total progress
Returns:
Updated progress percentage
"""
if total_providers == 0:
return base_progress
analysis_progress = (completed_providers / total_providers) * analysis_weight
return min(100, base_progress + int(analysis_progress))
async def emit_completion(
self,
result_csv_url: str,
summary_data: Dict[str, Any]
):
"""
Emit job completion event
Args:
result_csv_url: URL to download CSV result
summary_data: Job summary information
"""
try:
self.job.add_log('INFO', 'Processing completed successfully')
# Broadcast completion
await self.ws_manager.broadcast_job_update(self.job.id, {
'type': 'job.completed',
'jobId': self.job.id,
'resultCsvUrl': result_csv_url,
'summary': summary_data
})
self.logger.info(f"Job {self.job.id} completed successfully")
except Exception as e:
self.logger.error(f"Failed to emit completion event: {e}")
async def emit_failure(self, error: str):
"""
Emit job failure event
Args:
error: Error message
"""
try:
self.job.mark_failed(error)
self.job.add_log('ERROR', f'Processing failed: {error}')
# Broadcast failure
await self.ws_manager.broadcast_job_update(self.job.id, {
'type': 'job.failed',
'jobId': self.job.id,
'error': error
})
self.logger.error(f"Job {self.job.id} failed: {error}")
except Exception as e:
self.logger.error(f"Failed to emit failure event: {e}")
class JobLogHandler(logging.Handler):
"""
Custom logging handler that routes job-specific logs to WebSocket clients
"""
def __init__(self, job_id: str, ws_manager: WebSocketManager):
super().__init__()
self.job_id = job_id
self.ws_manager = ws_manager
# Set up formatter for log messages
self.setFormatter(logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
))
def emit(self, record):
"""
Process a log record and send it via WebSocket
Args:
record: LogRecord to process
"""
try:
# Format the message
message = self.format(record)
# Create log entry
log_entry = {
'timestamp': datetime.utcnow().isoformat(),
'level': record.levelname,
'message': message,
'logger': record.name
}
# Send via WebSocket (non-blocking)
import asyncio
try:
loop = asyncio.get_event_loop()
loop.create_task(self.ws_manager.broadcast_job_update(self.job_id, {
'type': 'job.log',
'jobId': self.job_id,
'logEntry': log_entry
}))
except RuntimeError:
# No event loop available, skip WebSocket update
pass
except Exception as e:
# Don't let logging errors break the application
print(f"JobLogHandler error: {e}")
def create_job_logger(job_id: str, ws_manager: WebSocketManager) -> logging.Logger:
"""
Create a job-specific logger with WebSocket streaming
Args:
job_id: Job identifier
ws_manager: WebSocket manager instance
Returns:
Logger instance with job-specific handler
"""
logger = logging.getLogger(f"job.{job_id}")
# Remove existing handlers to avoid duplicates
logger.handlers.clear()
# Add job-specific handler
handler = JobLogHandler(job_id, ws_manager)
handler.setLevel(logging.INFO)
logger.addHandler(handler)
# Set logger level
logger.setLevel(logging.INFO)
return logger

View file

View file

@ -0,0 +1,157 @@
"""
Sheet management Python port of sheet_helpers.php.
File-based JSON storage, one metadata file + one data file per sheet.
"""
import json
import logging
import os
import re
import time
import random
from datetime import datetime, timezone
from typing import List, Optional, Dict
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
METADATA_FILE = os.path.join(server_config.DATA_DIR, 'sheets_metadata.json')
def _safe_user(user_id: str) -> str:
"""Sanitise user_id for use in filenames."""
return re.sub(r'[^a-zA-Z0-9_\-]', '_', user_id)
def _sheet_path(user_id: str, sheet_id: str) -> str:
return os.path.join(server_config.SHEETS_DIR, f"{_safe_user(user_id)}_{sheet_id}.json")
def _load_metadata() -> Dict:
if not os.path.exists(METADATA_FILE):
return {}
try:
with open(METADATA_FILE, 'r') as f:
return json.load(f)
except Exception:
return {}
def _save_metadata(meta: Dict):
with open(METADATA_FILE, 'w') as f:
json.dump(meta, f, indent=2)
def get_user_sheets(user_id: str) -> List[Dict]:
meta = _load_metadata()
return meta.get(user_id, [])
def create_sheet(user_id: str, name: str, data: List[dict] = None) -> Dict:
if data is None:
data = []
sheet_id = str(int(time.time())) + str(random.randint(100, 999))
now = datetime.now(timezone.utc).isoformat()
sheet_meta = {
'id': sheet_id,
'name': name or f"Untitled Sheet — {datetime.now().strftime('%Y-%m-%d %H:%M')}",
'created': now,
'modified': now,
'itemCount': len(data),
'user': user_id,
}
# Write data file
path = _sheet_path(user_id, sheet_id)
with open(path, 'w') as f:
json.dump(data, f, indent=2)
# Update metadata
meta = _load_metadata()
meta.setdefault(user_id, []).append(sheet_meta)
_save_metadata(meta)
return sheet_meta
def load_sheet_data(user_id: str, sheet_id: str) -> Optional[List[dict]]:
path = _sheet_path(user_id, sheet_id)
if not os.path.exists(path):
return None
try:
with open(path, 'r') as f:
return json.load(f)
except Exception:
return None
def update_sheet(user_id: str, sheet_id: str, data: List[dict]) -> bool:
path = _sheet_path(user_id, sheet_id)
with open(path, 'w') as f:
json.dump(data, f, indent=2)
# Update metadata counts
meta = _load_metadata()
if user_id in meta:
for sheet in meta[user_id]:
if sheet['id'] == sheet_id:
sheet['modified'] = datetime.now(timezone.utc).isoformat()
sheet['itemCount'] = len(data)
break
_save_metadata(meta)
return True
def delete_sheet(user_id: str, sheet_id: str):
path = _sheet_path(user_id, sheet_id)
if os.path.exists(path):
os.remove(path)
meta = _load_metadata()
if user_id in meta:
meta[user_id] = [s for s in meta[user_id] if s['id'] != sheet_id]
_save_metadata(meta)
def rename_sheet(user_id: str, sheet_id: str, new_name: str) -> bool:
meta = _load_metadata()
if user_id not in meta:
return False
for sheet in meta[user_id]:
if sheet['id'] == sheet_id:
sheet['name'] = new_name
sheet['modified'] = datetime.now(timezone.utc).isoformat()
_save_metadata(meta)
return True
return False
def duplicate_sheet(user_id: str, sheet_id: str) -> Optional[Dict]:
data = load_sheet_data(user_id, sheet_id)
if data is None:
return None
meta = _load_metadata()
original_name = "Copy of Sheet"
for sheet in meta.get(user_id, []):
if sheet['id'] == sheet_id:
original_name = f"Copy of {sheet['name']}"
break
return create_sheet(user_id, original_name, data)
def generate_next_id(data: List[dict]) -> str:
"""Generate the next DEL-NNN id."""
max_id = 0
for row in data:
num_str = row.get('Number', '').replace('DEL-', '')
try:
n = int(num_str)
if n > max_id:
max_id = n
except ValueError:
pass
return f"DEL-{str(max_id + 1).zfill(3)}"

View file

@ -0,0 +1,73 @@
"""
Pydantic models for sheets and deliverables.
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import List, Optional
from pydantic import BaseModel, Field
class Deliverable(BaseModel):
Number: str = ""
Title: str = ""
Status: str = "Booked"
Category: str = ""
Media: str = ""
SubMedia: str = Field(default="", alias="Sub-media")
Format: str = ""
SupplyDate: str = Field(default="", alias="Supply date")
LiveDate: str = Field(default="", alias="Live date")
Language: str = ""
Country: str = ""
Quantity: int = 1
class Config:
populate_by_name = True
def to_dict(self) -> dict:
return {
"Number": self.Number,
"Title": self.Title,
"Status": self.Status,
"Category": self.Category,
"Media": self.Media,
"Sub-media": self.SubMedia,
"Format": self.Format,
"Supply date": self.SupplyDate,
"Live date": self.LiveDate,
"Language": self.Language,
"Country": self.Country,
"Quantity": self.Quantity,
}
@classmethod
def from_dict(cls, d: dict) -> "Deliverable":
return cls(
Number=d.get("Number", ""),
Title=d.get("Title", ""),
Status=d.get("Status", "Booked"),
Category=d.get("Category", ""),
Media=d.get("Media", ""),
**{"Sub-media": d.get("Sub-media", "")},
Format=d.get("Format", ""),
**{"Supply date": d.get("Supply date", "")},
**{"Live date": d.get("Live date", "")},
Language=d.get("Language", ""),
Country=d.get("Country", ""),
Quantity=int(d.get("Quantity", 1)),
)
class SheetMeta(BaseModel):
id: str
name: str
created: str
modified: str
itemCount: int
user: str
class Sheet(BaseModel):
meta: SheetMeta
data: List[dict] # raw dicts for speed; validated on write

13
backend/server/ws/__init__.py Executable file
View file

@ -0,0 +1,13 @@
"""
WebSocket module for real-time communication
"""
from .manager import WebSocketManager
# Create global instance
ws_manager = WebSocketManager()
__all__ = [
'WebSocketManager',
'ws_manager'
]

300
backend/server/ws/manager.py Executable file
View file

@ -0,0 +1,300 @@
"""
WebSocket connection and message management
"""
import asyncio
import json
import logging
from datetime import datetime
from typing import Dict, Set, Any, Optional
import uuid
from weakref import WeakSet
from quart import websocket
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
class WebSocketClient:
"""Represents a connected WebSocket client"""
def __init__(self, client_id: str, user_id: Optional[str] = None):
self.client_id = client_id
self.user_id = user_id or 'anonymous'
self.connected_at = datetime.utcnow()
self.last_ping = datetime.utcnow()
self.websocket = websocket._get_current_object()
async def send(self, message: Dict[str, Any]):
"""Send a message to this client"""
try:
await self.websocket.send(json.dumps(message))
except Exception as e:
logger.warning(f"Failed to send message to client {self.client_id}: {e}")
raise
async def ping(self):
"""Send ping to client"""
try:
await self.send({'type': 'ping', 'timestamp': datetime.utcnow().isoformat()})
self.last_ping = datetime.utcnow()
except Exception as e:
logger.warning(f"Failed to ping client {self.client_id}: {e}")
raise
class WebSocketManager:
"""
Manages WebSocket connections and broadcasts
Singleton for coordinating real-time updates
"""
_instance: Optional['WebSocketManager'] = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
def __init__(self):
if hasattr(self, '_initialized'):
return
self._initialized = True
self.clients: Dict[str, WebSocketClient] = {}
self._lock = asyncio.Lock()
# Start background tasks
self.ping_task = None
self.cleanup_task = None
logger.info("WebSocketManager initialized")
async def start_background_tasks(self):
"""Start background maintenance tasks"""
if not self.ping_task:
self.ping_task = asyncio.create_task(self._ping_clients_loop())
if not self.cleanup_task:
self.cleanup_task = asyncio.create_task(self._cleanup_disconnected_loop())
async def stop_background_tasks(self):
"""Stop background maintenance tasks"""
if self.ping_task:
self.ping_task.cancel()
try:
await self.ping_task
except asyncio.CancelledError:
pass
if self.cleanup_task:
self.cleanup_task.cancel()
try:
await self.cleanup_task
except asyncio.CancelledError:
pass
async def register_client(self, user_id: Optional[str] = None) -> WebSocketClient:
"""
Register a new WebSocket client
Args:
user_id: User identifier (optional for dev mode)
Returns:
WebSocketClient instance
"""
client_id = str(uuid.uuid4())
client = WebSocketClient(client_id, user_id)
async with self._lock:
self.clients[client_id] = client
logger.info(f"Registered WebSocket client {client_id} for user {user_id}")
# Send initial connection acknowledgment
await client.send({
'type': 'connection.established',
'clientId': client_id,
'userId': user_id,
'connectedAt': client.connected_at.isoformat()
})
return client
async def unregister_client(self, client_id: str):
"""
Unregister a WebSocket client
Args:
client_id: Client identifier
"""
async with self._lock:
if client_id in self.clients:
client = self.clients.pop(client_id)
logger.info(f"Unregistered WebSocket client {client_id} for user {client.user_id}")
async def broadcast_to_all(self, message: Dict[str, Any]):
"""
Broadcast message to all connected clients
Args:
message: Message to broadcast
"""
if not self.clients:
return
# Add timestamp to message
message['timestamp'] = datetime.utcnow().isoformat()
async with self._lock:
clients_to_remove = []
for client_id, client in self.clients.items():
try:
await client.send(message)
except Exception as e:
logger.warning(f"Failed to send to client {client_id}: {e}")
clients_to_remove.append(client_id)
# Remove failed clients
for client_id in clients_to_remove:
self.clients.pop(client_id, None)
async def broadcast_to_user(self, user_id: str, message: Dict[str, Any]):
"""
Broadcast message to all connections for a specific user
Args:
user_id: User identifier
message: Message to broadcast
"""
if not self.clients:
return
# Add timestamp to message
message['timestamp'] = datetime.utcnow().isoformat()
async with self._lock:
clients_to_remove = []
sent_count = 0
for client_id, client in self.clients.items():
if client.user_id == user_id:
try:
await client.send(message)
sent_count += 1
except Exception as e:
logger.warning(f"Failed to send to client {client_id}: {e}")
clients_to_remove.append(client_id)
# Remove failed clients
for client_id in clients_to_remove:
self.clients.pop(client_id, None)
if sent_count > 0:
logger.debug(f"Broadcast message to {sent_count} clients for user {user_id}")
async def broadcast_job_update(self, job_id: str, message: Dict[str, Any]):
"""
Broadcast job-specific update
Args:
job_id: Job identifier
message: Message to broadcast
"""
# For now, broadcast to all clients
# In the future, we could implement job-specific subscriptions
message['jobId'] = job_id
await self.broadcast_to_all(message)
async def send_queue_snapshot(self, client: WebSocketClient, jobs_data: list):
"""
Send initial queue snapshot to a client
Args:
client: WebSocket client
jobs_data: Serialized jobs data
"""
try:
await client.send({
'type': 'queue.snapshot',
'jobs': jobs_data
})
logger.debug(f"Sent queue snapshot to client {client.client_id}")
except Exception as e:
logger.error(f"Failed to send queue snapshot to {client.client_id}: {e}")
raise
async def get_connection_stats(self) -> Dict[str, Any]:
"""
Get WebSocket connection statistics
Returns:
Statistics dictionary
"""
async with self._lock:
user_counts = {}
for client in self.clients.values():
user_counts[client.user_id] = user_counts.get(client.user_id, 0) + 1
return {
'total_connections': len(self.clients),
'unique_users': len(user_counts),
'connections_per_user': user_counts,
'uptime_seconds': (datetime.utcnow() -
min((c.connected_at for c in self.clients.values()),
default=datetime.utcnow())).total_seconds()
}
async def _ping_clients_loop(self):
"""Background task to ping clients periodically"""
while True:
try:
await asyncio.sleep(server_config.WS_PING_INTERVAL_SECONDS)
async with self._lock:
clients_to_remove = []
for client_id, client in self.clients.items():
try:
await client.ping()
except Exception:
clients_to_remove.append(client_id)
# Remove failed clients
for client_id in clients_to_remove:
self.clients.pop(client_id, None)
logger.debug(f"Removed unresponsive client {client_id}")
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Error in ping loop: {e}")
async def _cleanup_disconnected_loop(self):
"""Background task to clean up disconnected clients"""
while True:
try:
await asyncio.sleep(60) # Check every minute
async with self._lock:
# Clean up clients that haven't been pinged recently
cutoff = datetime.utcnow().timestamp() - (server_config.WS_PING_INTERVAL_SECONDS * 3)
clients_to_remove = []
for client_id, client in self.clients.items():
if client.last_ping.timestamp() < cutoff:
clients_to_remove.append(client_id)
for client_id in clients_to_remove:
self.clients.pop(client_id, None)
logger.debug(f"Cleaned up stale client {client_id}")
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Error in cleanup loop: {e}")
# Global instance
ws_manager = WebSocketManager()

60
docker-compose.yml Normal file
View file

@ -0,0 +1,60 @@
# Nginx reverse proxy config (add to your nginx site config):
#
# location /ac-helper/ {
# proxy_pass http://localhost:8000/;
# proxy_http_version 1.1;
# proxy_set_header Upgrade $http_upgrade;
# proxy_set_header Connection "upgrade";
# proxy_set_header Host $host;
# proxy_set_header X-Real-IP $remote_addr;
# }
#
# This strips /ac-helper/ prefix before forwarding to the container.
# The frontend uses /ac-helper/api and /ac-helper/ws which the proxy forwards
# as /api and /ws to the backend.
version: '3.9'
services:
app:
build: .
container_name: ac-tool
restart: unless-stopped
ports:
- "8000:8000"
volumes:
- ./data:/app/data
environment:
# Auth
AZURE_TENANT_ID: ${AZURE_TENANT_ID:-e519c2e6-bc6d-4fdf-8d9c-923c2f002385}
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-9079054c-9620-4757-a256-23413042f1ef}
AZURE_REDIRECT_URI: ${AZURE_REDIRECT_URI:-https://ai-sandbox.oliver.solutions/ac-helper/}
# Dev mode (set to false in production)
DEV_MODE: ${DEV_MODE:-false}
DEV_USER_ID: ${DEV_USER_ID:-dev-user-001}
DEV_USER_ROLE: ${DEV_USER_ROLE:-admin}
# Admin bootstrap
ADMIN_EMAIL: ${ADMIN_EMAIL:-daveporter@oliver.agency}
# AI providers
GEMINI_API_KEY: ${GEMINI_API_KEY}
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
LLAMA_CLOUD_API_KEY: ${LLAMA_CLOUD_API_KEY:-}
# Paths
DATA_DIR: /app/data
UPLOADS_DIR: /app/data/uploads
OUTPUTS_DIR: /app/data/outputs
SHEETS_DIR: /app/data/sheets
USERS_FILE: /app/data/users.json
DROPDOWNS_FILE: /app/data/dropdowns.json
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s

24
frontend/.gitignore vendored Normal file
View file

@ -0,0 +1,24 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

73
frontend/README.md Normal file
View file

@ -0,0 +1,73 @@
# React + TypeScript + Vite
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
Currently, two official plugins are available:
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Oxc](https://oxc.rs)
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/)
## React Compiler
The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
## Expanding the ESLint configuration
If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules:
```js
export default defineConfig([
globalIgnores(['dist']),
{
files: ['**/*.{ts,tsx}'],
extends: [
// Other configs...
// Remove tseslint.configs.recommended and replace with this
tseslint.configs.recommendedTypeChecked,
// Alternatively, use this for stricter rules
tseslint.configs.strictTypeChecked,
// Optionally, add this for stylistic rules
tseslint.configs.stylisticTypeChecked,
// Other configs...
],
languageOptions: {
parserOptions: {
project: ['./tsconfig.node.json', './tsconfig.app.json'],
tsconfigRootDir: import.meta.dirname,
},
// other options...
},
},
])
```
You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules:
```js
// eslint.config.js
import reactX from 'eslint-plugin-react-x'
import reactDom from 'eslint-plugin-react-dom'
export default defineConfig([
globalIgnores(['dist']),
{
files: ['**/*.{ts,tsx}'],
extends: [
// Other configs...
// Enable lint rules for React
reactX.configs['recommended-typescript'],
// Enable lint rules for React DOM
reactDom.configs.recommended,
],
languageOptions: {
parserOptions: {
project: ['./tsconfig.node.json', './tsconfig.app.json'],
tsconfigRootDir: import.meta.dirname,
},
// other options...
},
},
])
```

23
frontend/eslint.config.js Normal file
View file

@ -0,0 +1,23 @@
import js from '@eslint/js'
import globals from 'globals'
import reactHooks from 'eslint-plugin-react-hooks'
import reactRefresh from 'eslint-plugin-react-refresh'
import tseslint from 'typescript-eslint'
import { defineConfig, globalIgnores } from 'eslint/config'
export default defineConfig([
globalIgnores(['dist']),
{
files: ['**/*.{ts,tsx}'],
extends: [
js.configs.recommended,
tseslint.configs.recommended,
reactHooks.configs.flat.recommended,
reactRefresh.configs.vite,
],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
},
},
])

13
frontend/index.html Normal file
View file

@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/ac-helper/favicon.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>AC Tool — Oliver Agency</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

3925
frontend/package-lock.json generated Normal file

File diff suppressed because it is too large Load diff

44
frontend/package.json Normal file
View file

@ -0,0 +1,44 @@
{
"name": "frontend",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc -b && vite build",
"lint": "eslint .",
"preview": "vite preview"
},
"dependencies": {
"@azure/msal-browser": "^4.30.0",
"@azure/msal-react": "^3.0.29",
"@handsontable/react": "^16.2.0",
"@tailwindcss/vite": "^4.2.2",
"@types/react-router-dom": "^5.3.3",
"autoprefixer": "^10.4.27",
"axios": "^1.13.6",
"handsontable": "^17.0.0",
"postcss": "^8.5.8",
"react": "^19.2.4",
"react-dom": "^19.2.4",
"react-dropzone": "^15.0.0",
"react-hot-toast": "^2.6.0",
"react-router-dom": "^7.13.1",
"tailwindcss": "^4.2.2",
"zustand": "^5.0.12"
},
"devDependencies": {
"@eslint/js": "^9.39.4",
"@types/node": "^24.12.0",
"@types/react": "^19.2.14",
"@types/react-dom": "^19.2.3",
"@vitejs/plugin-react": "^6.0.1",
"eslint": "^9.39.4",
"eslint-plugin-react-hooks": "^7.0.1",
"eslint-plugin-react-refresh": "^0.5.2",
"globals": "^17.4.0",
"typescript": "~5.9.3",
"typescript-eslint": "^8.57.0",
"vite": "^8.0.1"
}
}

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 9.3 KiB

24
frontend/public/icons.svg Normal file
View file

@ -0,0 +1,24 @@
<svg xmlns="http://www.w3.org/2000/svg">
<symbol id="bluesky-icon" viewBox="0 0 16 17">
<g clip-path="url(#bluesky-clip)"><path fill="#08060d" d="M7.75 7.735c-.693-1.348-2.58-3.86-4.334-5.097-1.68-1.187-2.32-.981-2.74-.79C.188 2.065.1 2.812.1 3.251s.241 3.602.398 4.13c.52 1.744 2.367 2.333 4.07 2.145-2.495.37-4.71 1.278-1.805 4.512 3.196 3.309 4.38-.71 4.987-2.746.608 2.036 1.307 5.91 4.93 2.746 2.72-2.746.747-4.143-1.747-4.512 1.702.189 3.55-.4 4.07-2.145.156-.528.397-3.691.397-4.13s-.088-1.186-.575-1.406c-.42-.19-1.06-.395-2.741.79-1.755 1.24-3.64 3.752-4.334 5.099"/></g>
<defs><clipPath id="bluesky-clip"><path fill="#fff" d="M.1.85h15.3v15.3H.1z"/></clipPath></defs>
</symbol>
<symbol id="discord-icon" viewBox="0 0 20 19">
<path fill="#08060d" d="M16.224 3.768a14.5 14.5 0 0 0-3.67-1.153c-.158.286-.343.67-.47.976a13.5 13.5 0 0 0-4.067 0c-.128-.306-.317-.69-.476-.976A14.4 14.4 0 0 0 3.868 3.77C1.546 7.28.916 10.703 1.231 14.077a14.7 14.7 0 0 0 4.5 2.306q.545-.748.965-1.587a9.5 9.5 0 0 1-1.518-.74q.191-.14.372-.293c2.927 1.369 6.107 1.369 8.999 0q.183.152.372.294-.723.437-1.52.74.418.838.963 1.588a14.6 14.6 0 0 0 4.504-2.308c.37-3.911-.63-7.302-2.644-10.309m-9.13 8.234c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.894 0 1.614.82 1.599 1.82.001 1-.705 1.82-1.6 1.82m5.91 0c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.893 0 1.614.82 1.599 1.82 0 1-.706 1.82-1.6 1.82"/>
</symbol>
<symbol id="documentation-icon" viewBox="0 0 21 20">
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="m15.5 13.333 1.533 1.322c.645.555.967.833.967 1.178s-.322.623-.967 1.179L15.5 18.333m-3.333-5-1.534 1.322c-.644.555-.966.833-.966 1.178s.322.623.966 1.179l1.534 1.321"/>
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M17.167 10.836v-4.32c0-1.41 0-2.117-.224-2.68-.359-.906-1.118-1.621-2.08-1.96-.599-.21-1.349-.21-2.848-.21-2.623 0-3.935 0-4.983.369-1.684.591-3.013 1.842-3.641 3.428C3 6.449 3 7.684 3 10.154v2.122c0 2.558 0 3.838.706 4.726q.306.383.713.671c.76.536 1.79.64 3.581.66"/>
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M3 10a2.78 2.78 0 0 1 2.778-2.778c.555 0 1.209.097 1.748-.047.48-.129.854-.503.982-.982.145-.54.048-1.194.048-1.749a2.78 2.78 0 0 1 2.777-2.777"/>
</symbol>
<symbol id="github-icon" viewBox="0 0 19 19">
<path fill="#08060d" fill-rule="evenodd" d="M9.356 1.85C5.05 1.85 1.57 5.356 1.57 9.694a7.84 7.84 0 0 0 5.324 7.44c.387.079.528-.168.528-.376 0-.182-.013-.805-.013-1.454-2.165.467-2.616-.935-2.616-.935-.349-.91-.864-1.143-.864-1.143-.71-.48.051-.48.051-.48.787.051 1.2.805 1.2.805.695 1.194 1.817.857 2.268.649.064-.507.27-.857.49-1.052-1.728-.182-3.545-.857-3.545-3.87 0-.857.31-1.558.8-2.104-.078-.195-.349-1 .077-2.078 0 0 .657-.208 2.14.805a7.5 7.5 0 0 1 1.946-.26c.657 0 1.328.092 1.946.26 1.483-1.013 2.14-.805 2.14-.805.426 1.078.155 1.883.078 2.078.502.546.799 1.247.799 2.104 0 3.013-1.818 3.675-3.558 3.87.284.247.528.714.528 1.454 0 1.052-.012 1.896-.012 2.156 0 .208.142.455.528.377a7.84 7.84 0 0 0 5.324-7.441c.013-4.338-3.48-7.844-7.773-7.844" clip-rule="evenodd"/>
</symbol>
<symbol id="social-icon" viewBox="0 0 20 20">
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M12.5 6.667a4.167 4.167 0 1 0-8.334 0 4.167 4.167 0 0 0 8.334 0"/>
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M2.5 16.667a5.833 5.833 0 0 1 8.75-5.053m3.837.474.513 1.035c.07.144.257.282.414.309l.93.155c.596.1.736.536.307.965l-.723.73a.64.64 0 0 0-.152.531l.207.903c.164.715-.213.991-.84.618l-.872-.52a.63.63 0 0 0-.577 0l-.872.52c-.624.373-1.003.094-.84-.618l.207-.903a.64.64 0 0 0-.152-.532l-.723-.729c-.426-.43-.289-.864.306-.964l.93-.156a.64.64 0 0 0 .412-.31l.513-1.034c.28-.562.735-.562 1.012 0"/>
</symbol>
<symbol id="x-icon" viewBox="0 0 19 19">
<path fill="#08060d" fill-rule="evenodd" d="M1.893 1.98c.052.072 1.245 1.769 2.653 3.77l2.892 4.114c.183.261.333.48.333.486s-.068.089-.152.183l-.522.593-.765.867-3.597 4.087c-.375.426-.734.834-.798.905a1 1 0 0 0-.118.148c0 .01.236.017.664.017h.663l.729-.83c.4-.457.796-.906.879-.999a692 692 0 0 0 1.794-2.038c.034-.037.301-.34.594-.675l.551-.624.345-.392a7 7 0 0 1 .34-.374c.006 0 .93 1.306 2.052 2.903l2.084 2.965.045.063h2.275c1.87 0 2.273-.003 2.266-.021-.008-.02-1.098-1.572-3.894-5.547-2.013-2.862-2.28-3.246-2.273-3.266.008-.019.282-.332 2.085-2.38l2-2.274 1.567-1.782c.022-.028-.016-.03-.65-.03h-.674l-.3.342a871 871 0 0 1-1.782 2.025c-.067.075-.405.458-.75.852a100 100 0 0 1-.803.91c-.148.172-.299.344-.99 1.127-.304.343-.32.358-.345.327-.015-.019-.904-1.282-1.976-2.808L6.365 1.85H1.8zm1.782.91 8.078 11.294c.772 1.08 1.413 1.973 1.425 1.984.016.017.241.02 1.05.017l1.03-.004-2.694-3.766L7.796 5.75 5.722 2.852l-1.039-.004-1.039-.004z" clip-rule="evenodd"/>
</symbol>
</svg>

After

Width:  |  Height:  |  Size: 4.9 KiB

184
frontend/src/App.css Normal file
View file

@ -0,0 +1,184 @@
.counter {
font-size: 16px;
padding: 5px 10px;
border-radius: 5px;
color: var(--accent);
background: var(--accent-bg);
border: 2px solid transparent;
transition: border-color 0.3s;
margin-bottom: 24px;
&:hover {
border-color: var(--accent-border);
}
&:focus-visible {
outline: 2px solid var(--accent);
outline-offset: 2px;
}
}
.hero {
position: relative;
.base,
.framework,
.vite {
inset-inline: 0;
margin: 0 auto;
}
.base {
width: 170px;
position: relative;
z-index: 0;
}
.framework,
.vite {
position: absolute;
}
.framework {
z-index: 1;
top: 34px;
height: 28px;
transform: perspective(2000px) rotateZ(300deg) rotateX(44deg) rotateY(39deg)
scale(1.4);
}
.vite {
z-index: 0;
top: 107px;
height: 26px;
width: auto;
transform: perspective(2000px) rotateZ(300deg) rotateX(40deg) rotateY(39deg)
scale(0.8);
}
}
#center {
display: flex;
flex-direction: column;
gap: 25px;
place-content: center;
place-items: center;
flex-grow: 1;
@media (max-width: 1024px) {
padding: 32px 20px 24px;
gap: 18px;
}
}
#next-steps {
display: flex;
border-top: 1px solid var(--border);
text-align: left;
& > div {
flex: 1 1 0;
padding: 32px;
@media (max-width: 1024px) {
padding: 24px 20px;
}
}
.icon {
margin-bottom: 16px;
width: 22px;
height: 22px;
}
@media (max-width: 1024px) {
flex-direction: column;
text-align: center;
}
}
#docs {
border-right: 1px solid var(--border);
@media (max-width: 1024px) {
border-right: none;
border-bottom: 1px solid var(--border);
}
}
#next-steps ul {
list-style: none;
padding: 0;
display: flex;
gap: 8px;
margin: 32px 0 0;
.logo {
height: 18px;
}
a {
color: var(--text-h);
font-size: 16px;
border-radius: 6px;
background: var(--social-bg);
display: flex;
padding: 6px 12px;
align-items: center;
gap: 8px;
text-decoration: none;
transition: box-shadow 0.3s;
&:hover {
box-shadow: var(--shadow);
}
.button-icon {
height: 18px;
width: 18px;
}
}
@media (max-width: 1024px) {
margin-top: 20px;
flex-wrap: wrap;
justify-content: center;
li {
flex: 1 1 calc(50% - 8px);
}
a {
width: 100%;
justify-content: center;
box-sizing: border-box;
}
}
}
#spacer {
height: 88px;
border-top: 1px solid var(--border);
@media (max-width: 1024px) {
height: 48px;
}
}
.ticks {
position: relative;
width: 100%;
&::before,
&::after {
content: '';
position: absolute;
top: -4.5px;
border: 5px solid transparent;
}
&::before {
left: 0;
border-left-color: var(--border);
}
&::after {
right: 0;
border-right-color: var(--border);
}
}

89
frontend/src/App.tsx Normal file
View file

@ -0,0 +1,89 @@
import { useEffect } from 'react'
import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom'
import { useMsal } from '@azure/msal-react'
import { InteractionStatus } from '@azure/msal-browser'
import { Toaster } from 'react-hot-toast'
import { useAuthStore } from './stores/useAuthStore'
import AppShell from './components/layout/AppShell'
import DashboardPage from './pages/DashboardPage'
import SheetPage from './pages/SheetPage'
import BriefUploadPage from './pages/BriefUploadPage'
import BriefReviewPage from './pages/BriefReviewPage'
import AdminUsersPage from './pages/admin/AdminUsersPage'
import AdminDropdownsPage from './pages/admin/AdminDropdownsPage'
import LoginPage from './pages/LoginPage'
function AuthGate({ children }: { children: React.ReactNode }) {
const { instance, inProgress, accounts } = useMsal()
const { user, loading, fetchMe, setToken } = useAuthStore()
useEffect(() => {
if (inProgress !== InteractionStatus.None) return
const acquire = async () => {
// Dev mode: skip MSAL, just call /auth/me directly
if (import.meta.env.DEV || accounts.length === 0) {
if (!user && !loading) fetchMe()
return
}
try {
const result = await instance.acquireTokenSilent({
account: accounts[0],
scopes: ['openid', 'profile', 'email'],
})
setToken(result.idToken)
if (!user && !loading) fetchMe()
} catch {
instance.loginRedirect({ scopes: ['openid', 'profile', 'email'] })
}
}
acquire()
}, [inProgress, accounts.length])
if (loading || inProgress !== InteractionStatus.None) {
return (
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100vh', background: '#000' }}>
<div style={{ color: 'var(--text-muted)' }}>Loading</div>
</div>
)
}
if (!user) {
return <LoginPage />
}
return <>{children}</>
}
function AdminRoute({ children }: { children: React.ReactNode }) {
const { user } = useAuthStore()
if (user?.role !== 'admin') return <Navigate to="/" replace />
return <>{children}</>
}
export default function App() {
return (
<BrowserRouter basename="/ac-helper/">
<Toaster
position="bottom-right"
toastOptions={{
style: { background: '#1a1a1a', color: '#fff', border: '1px solid #2a2a2a' },
}}
/>
<AuthGate>
<AppShell>
<Routes>
<Route path="/" element={<DashboardPage />} />
<Route path="/sheet/:sheetId" element={<SheetPage />} />
<Route path="/brief/upload" element={<BriefUploadPage />} />
<Route path="/brief/review/:jobId" element={<BriefReviewPage />} />
<Route path="/admin/users" element={<AdminRoute><AdminUsersPage /></AdminRoute>} />
<Route path="/admin/dropdowns" element={<AdminRoute><AdminDropdownsPage /></AdminRoute>} />
<Route path="*" element={<Navigate to="/" replace />} />
</Routes>
</AppShell>
</AuthGate>
</BrowserRouter>
)
}

20
frontend/src/api/admin.ts Normal file
View file

@ -0,0 +1,20 @@
import api from './client'
import type { User, CategoryData } from '../types'
export const listUsers = () =>
api.get<{ users: User[] }>('/admin/users').then(r => r.data.users)
export const updateUser = (id: string, patch: { role?: User['role']; active?: boolean }) =>
api.patch<{ success: boolean; user: User }>(`/admin/users/${id}`, patch).then(r => r.data.user)
export const uploadDropdowns = (file: File) => {
const form = new FormData()
form.append('file', file)
return api.post<{ success: boolean; total: number; active: number }>('/admin/dropdowns/upload', form).then(r => r.data)
}
export const previewDropdowns = (file: File) => {
const form = new FormData()
form.append('file', file)
return api.post<{ categories: CategoryData[] }>('/admin/dropdowns/preview', form).then(r => r.data.categories)
}

14
frontend/src/api/ai.ts Normal file
View file

@ -0,0 +1,14 @@
import api from './client'
import type { Deliverable } from '../types'
export interface CommandResult {
success: boolean
operation?: 'create' | 'update' | 'batch_update' | 'question'
count?: number
question?: string
data?: Deliverable[]
error?: string
}
export const sendCommand = (sheetId: string, command: string, yoloMode: boolean, history: string): Promise<CommandResult> =>
api.post<CommandResult>(`/sheets/${sheetId}/command`, { command, yolo_mode: yoloMode, history }).then(r => r.data)

View file

@ -0,0 +1,18 @@
import axios from 'axios'
// Use Vite's BASE_URL so API calls go through the same proxied path in production.
// e.g. BASE_URL = '/ac-helper/' → baseURL = '/ac-helper/api'
const api = axios.create({
baseURL: `${import.meta.env.BASE_URL}api`,
})
// Attach Bearer token from sessionStorage on every request
api.interceptors.request.use((config) => {
const token = sessionStorage.getItem('ac_access_token')
if (token) {
config.headers.Authorization = `Bearer ${token}`
}
return config
})
export default api

View file

@ -0,0 +1,5 @@
import api from './client'
import type { CategoryData } from '../types'
export const getCategories = (activeOnly = true) =>
api.get<{ categories: CategoryData[] }>(`/dropdowns/categories?active=${activeOnly}`).then(r => r.data.categories)

25
frontend/src/api/jobs.ts Normal file
View file

@ -0,0 +1,25 @@
import api from './client'
import type { Job, ModelConfiguration, Deliverable } from '../types'
export const listJobs = (limit = 50) =>
api.get<{ jobs: Job[] }>(`/jobs?limit=${limit}`).then(r => r.data.jobs)
export const getJob = (id: string) =>
api.get<{ job: Job }>(`/jobs/${id}`).then(r => r.data.job)
export const createJob = (files: File[], modelConfig?: ModelConfiguration) => {
const form = new FormData()
files.forEach((f, i) => form.append(`file_${i}`, f))
if (modelConfig) form.append('modelConfig', JSON.stringify(modelConfig))
return api.post<{ jobs: Job[] }>('/jobs', form, {
headers: { 'Content-Type': 'multipart/form-data' },
}).then(r => r.data.jobs)
}
export const deleteJob = (id: string) => api.delete(`/jobs/${id}`)
export const getJobDeliverables = (id: string) =>
api.get<{ deliverables: Deliverable[]; count: number }>(`/jobs/${id}/deliverables`).then(r => r.data)
export const getJobStats = () =>
api.get('/jobs/stats').then(r => r.data.stats)

View file

@ -0,0 +1,30 @@
import api from './client'
import type { SheetMeta, Deliverable } from '../types'
export const listSheets = () => api.get<{ sheets: SheetMeta[] }>('/sheets').then(r => r.data.sheets)
export const createSheet = (name: string, data: Deliverable[] = []) =>
api.post<{ sheet: SheetMeta }>('/sheets', { name, data }).then(r => r.data.sheet)
export const loadSheet = (id: string) =>
api.get<{ data: Deliverable[] }>(`/sheets/${id}`).then(r => r.data.data)
export const updateSheet = (id: string, data: Deliverable[]) =>
api.put(`/sheets/${id}`, { data })
export const deleteSheet = (id: string) => api.delete(`/sheets/${id}`)
export const renameSheet = (id: string, name: string) =>
api.patch(`/sheets/${id}`, { name })
export const duplicateSheet = (id: string) =>
api.post<{ sheet: SheetMeta }>(`/sheets/${id}/duplicate`).then(r => r.data.sheet)
export const importDeliverables = (sheetId: string, deliverables: Deliverable[], mode: 'append' | 'replace' = 'append') =>
api.post(`/sheets/${sheetId}/import`, { deliverables, mode }).then(r => r.data)
export const exportSheet = (id: string) => {
const token = sessionStorage.getItem('ac_access_token')
const query = token ? `?_token=${token}` : ''
window.open(`${import.meta.env.BASE_URL}api/sheets/${id}/export${query}`, '_blank')
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>

After

Width:  |  Height:  |  Size: 4 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 8.5 KiB

View file

@ -0,0 +1,52 @@
import { useCallback } from 'react'
import { useDropzone } from 'react-dropzone'
interface Props {
onFiles: (files: File[]) => void
loading: boolean
}
const ACCEPTED = {
'application/pdf': ['.pdf'],
'application/vnd.openxmlformats-officedocument.presentationml.presentation': ['.pptx'],
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'],
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
}
export default function FileDropzone({ onFiles, loading }: Props) {
const onDrop = useCallback((accepted: File[]) => {
if (accepted.length) onFiles(accepted)
}, [onFiles])
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop, accept: ACCEPTED, disabled: loading, maxSize: 200 * 1024 * 1024,
})
return (
<div
{...getRootProps()}
className="rounded-xl p-12 text-center cursor-pointer transition-colors"
style={{
border: `2px dashed ${isDragActive ? 'var(--accent)' : 'var(--border)'}`,
background: isDragActive ? 'rgba(255,196,7,0.05)' : 'var(--bg-card)',
opacity: loading ? 0.5 : 1,
}}
>
<input {...getInputProps()} />
<div className="text-4xl mb-3">📄</div>
<p className="font-medium mb-1" style={{ color: 'var(--text-primary)' }}>
{isDragActive ? 'Drop brief here' : 'Drag & drop your brief'}
</p>
<p className="text-sm" style={{ color: 'var(--text-muted)' }}>
PDF, PPTX, DOCX, XLSX up to 200 MB
</p>
<button
className="mt-4 px-4 py-2 rounded text-sm font-medium"
style={{ background: 'var(--accent)', color: '#000' }}
disabled={loading}
>
Browse files
</button>
</div>
)
}

View file

@ -0,0 +1,103 @@
import type { Job } from '../../types'
import { useNavigate } from 'react-router-dom'
const PHASE_LABELS: Record<string, string> = {
QUEUED: 'Queued',
EXTRACT_CONTENT: 'Extracting Content',
LLM_ANALYSIS: 'AI Analysis',
CONSOLIDATION: 'Consolidating',
CSV_GENERATION: 'Generating CSV',
COMPLETED: 'Completed',
FAILED: 'Failed',
}
interface Props { job: Job; onDelete?: (id: string) => void }
export default function JobProgressCard({ job, onDelete }: Props) {
const navigate = useNavigate()
const isDone = job.phase === 'COMPLETED'
const isFailed = job.phase === 'FAILED'
return (
<div className="rounded-lg p-4" style={{ background: 'var(--bg-card)', border: '1px solid var(--border)' }}>
<div className="flex items-start justify-between mb-2">
<div>
<div className="font-medium text-sm truncate max-w-xs" style={{ color: 'var(--text-primary)' }}>
{job.file_name}
</div>
<div className="text-xs mt-0.5" style={{ color: 'var(--text-muted)' }}>
{(job.file_size / 1024 / 1024).toFixed(1)} MB
</div>
</div>
<div className="flex items-center gap-2">
<span
className="text-xs px-2 py-1 rounded font-medium"
style={{
background: isDone ? 'rgba(34,197,94,0.15)' : isFailed ? 'rgba(239,68,68,0.15)' : 'rgba(255,196,7,0.15)',
color: isDone ? 'var(--success)' : isFailed ? 'var(--danger)' : 'var(--accent)',
}}
>
{PHASE_LABELS[job.phase] || job.phase}
</span>
{onDelete && (
<button onClick={() => onDelete(job.id)} className="text-xs" style={{ color: 'var(--text-muted)' }}></button>
)}
</div>
</div>
{/* Progress bar */}
{!isFailed && (
<div className="h-1 rounded-full mb-2" style={{ background: 'var(--border)' }}>
<div
className="h-full rounded-full transition-all duration-500"
style={{ width: `${job.progress_pct}%`, background: isDone ? 'var(--success)' : 'var(--accent)' }}
/>
</div>
)}
{job.step_label && (
<div className="text-xs mb-2" style={{ color: 'var(--text-muted)' }}>{job.step_label}</div>
)}
{/* Provider updates */}
{Object.entries(job.provider_updates || {}).length > 0 && (
<div className="flex gap-2 flex-wrap mb-2">
{Object.entries(job.provider_updates).map(([key, pu]) => (
<span key={key} className="text-xs px-2 py-0.5 rounded" style={{
background: pu.status === 'success' ? 'rgba(34,197,94,0.1)' : pu.status === 'error' ? 'rgba(239,68,68,0.1)' : 'rgba(255,255,255,0.05)',
color: pu.status === 'success' ? 'var(--success)' : pu.status === 'error' ? 'var(--danger)' : 'var(--text-muted)',
border: '1px solid var(--border)',
}}>
{pu.provider} {pu.status === 'success' ? '✓' : pu.status === 'error' ? '✗' : '…'}
</span>
))}
</div>
)}
{/* Action buttons */}
{isDone && (
<div className="flex gap-2 mt-2">
<button
onClick={() => navigate(`/brief/review/${job.id}`)}
className="px-3 py-1.5 rounded text-xs font-medium"
style={{ background: 'var(--accent)', color: '#000' }}
>
Review Results
</button>
</div>
)}
{isFailed && job.error && (
<div className="text-xs mt-1 p-2 rounded" style={{ background: 'rgba(239,68,68,0.1)', color: 'var(--danger)' }}>
{job.error}
</div>
)}
{isDone && job.summary && (
<div className="text-xs mt-2" style={{ color: 'var(--text-muted)' }}>
{job.summary.assets_extracted} assets · ${job.summary.cost_usd_total?.toFixed(4)} · {job.summary.processing_time_seconds?.toFixed(1)}s
</div>
)}
</div>
)
}

View file

@ -0,0 +1,25 @@
import React, { useEffect } from 'react'
import Sidebar from './Sidebar'
import TopBar from './TopBar'
import { useSheetStore } from '../../stores/useSheetStore'
interface Props {
children: React.ReactNode
}
export default function AppShell({ children }: Props) {
const fetchSheets = useSheetStore(s => s.fetchSheets)
useEffect(() => { fetchSheets() }, [])
return (
<div className="flex h-screen overflow-hidden" style={{ background: 'var(--bg-color)' }}>
<Sidebar />
<div className="flex-1 flex flex-col overflow-hidden">
<TopBar />
<main className="flex-1 overflow-auto p-4">
{children}
</main>
</div>
</div>
)
}

View file

@ -0,0 +1,163 @@
import { useState } from 'react'
import { useNavigate, useLocation } from 'react-router-dom'
import { useSheetStore } from '../../stores/useSheetStore'
import { useAuthStore } from '../../stores/useAuthStore'
export default function Sidebar() {
const navigate = useNavigate()
const location = useLocation()
const { sheets, activeSheetId, loadSheet, createSheet, renameSheet, deleteSheet, duplicateSheet } = useSheetStore()
const user = useAuthStore(s => s.user)
const [renamingId, setRenamingId] = useState<string | null>(null)
const [renameValue, setRenameValue] = useState('')
const [contextMenu, setContextMenu] = useState<{ id: string; x: number; y: number } | null>(null)
const handleNewSheet = async () => {
const name = `Sheet ${new Date().toLocaleDateString()}`
const id = await createSheet(name)
navigate(`/sheet/${id}`)
}
const handleSelect = async (id: string) => {
await loadSheet(id)
navigate(`/sheet/${id}`)
}
const handleContextMenu = (e: React.MouseEvent, id: string) => {
e.preventDefault()
setContextMenu({ id, x: e.clientX, y: e.clientY })
}
const handleRename = (id: string, current: string) => {
setRenamingId(id)
setRenameValue(current)
setContextMenu(null)
}
const commitRename = async (id: string) => {
if (renameValue.trim()) await renameSheet(id, renameValue.trim())
setRenamingId(null)
}
return (
<div
className="flex flex-col w-64 flex-shrink-0 border-r overflow-hidden"
style={{ background: 'var(--bg-sidebar)', borderColor: 'var(--border)' }}
onClick={() => setContextMenu(null)}
>
{/* Logo */}
<div className="p-4 border-b" style={{ borderColor: 'var(--border)' }}>
<div className="flex items-center gap-2">
<div className="w-7 h-7 rounded flex items-center justify-center font-bold text-xs"
style={{ background: 'var(--accent)', color: '#000' }}>AC</div>
<span className="font-semibold text-sm" style={{ color: 'var(--text-primary)' }}>AC Tool</span>
</div>
</div>
{/* Upload Brief */}
<div className="p-3 border-b" style={{ borderColor: 'var(--border)' }}>
<button
onClick={() => navigate('/brief/upload')}
className="w-full py-2 px-3 rounded text-sm font-medium transition-colors"
style={{ background: 'var(--accent)', color: '#000' }}
>
+ Upload Brief
</button>
</div>
{/* Sheets list */}
<div className="flex-1 overflow-y-auto p-2">
<div className="flex items-center justify-between px-2 py-1 mb-1">
<span className="text-xs font-semibold uppercase tracking-wider" style={{ color: 'var(--text-muted)' }}>
Sheets
</span>
<button
onClick={handleNewSheet}
className="text-lg leading-none hover:opacity-70 transition-opacity"
style={{ color: 'var(--accent)' }}
title="New Sheet"
>+</button>
</div>
{sheets.map(sheet => (
<div
key={sheet.id}
onContextMenu={e => handleContextMenu(e, sheet.id)}
className="group flex items-center gap-2 px-2 py-2 rounded cursor-pointer mb-1 transition-colors"
style={{
background: activeSheetId === sheet.id ? 'rgba(255,196,7,0.12)' : 'transparent',
borderLeft: activeSheetId === sheet.id ? '2px solid var(--accent)' : '2px solid transparent',
}}
onClick={() => handleSelect(sheet.id)}
>
{renamingId === sheet.id ? (
<input
autoFocus
value={renameValue}
onChange={e => setRenameValue(e.target.value)}
onBlur={() => commitRename(sheet.id)}
onKeyDown={e => { if (e.key === 'Enter') commitRename(sheet.id); if (e.key === 'Escape') setRenamingId(null) }}
className="flex-1 bg-transparent text-sm outline-none border-b"
style={{ color: 'var(--text-primary)', borderColor: 'var(--accent)' }}
onClick={e => e.stopPropagation()}
/>
) : (
<>
<span className="text-xs" style={{ color: 'var(--text-muted)' }}>📋</span>
<div className="flex-1 min-w-0">
<div className="text-sm truncate" style={{ color: activeSheetId === sheet.id ? 'var(--accent)' : 'var(--text-primary)' }}>
{sheet.name}
</div>
<div className="text-xs" style={{ color: 'var(--text-muted)' }}>{sheet.itemCount} items</div>
</div>
</>
)}
</div>
))}
{sheets.length === 0 && (
<div className="px-2 py-4 text-center text-xs" style={{ color: 'var(--text-muted)' }}>
No sheets yet.<br />Click + to create one.
</div>
)}
</div>
{/* Admin link */}
{user?.role === 'admin' && (
<div className="p-3 border-t" style={{ borderColor: 'var(--border)' }}>
<button
onClick={() => navigate('/admin/users')}
className="w-full py-2 px-3 rounded text-xs transition-colors text-left"
style={{ color: 'var(--text-secondary)', background: location.pathname.startsWith('/admin') ? 'rgba(255,255,255,0.05)' : 'transparent' }}
>
Admin
</button>
</div>
)}
{/* Context menu */}
{contextMenu && (
<div
className="fixed z-50 rounded shadow-lg py-1"
style={{ top: contextMenu.y, left: contextMenu.x, background: 'var(--bg-card)', border: '1px solid var(--border)', minWidth: 140 }}
onClick={e => e.stopPropagation()}
>
{[
{ label: 'Rename', action: () => { const s = sheets.find(sh => sh.id === contextMenu.id); if (s) handleRename(s.id, s.name) } },
{ label: 'Duplicate', action: async () => { await duplicateSheet(contextMenu.id); setContextMenu(null) } },
{ label: 'Delete', action: async () => { if (confirm('Delete this sheet?')) { await deleteSheet(contextMenu.id); setContextMenu(null) } }, danger: true },
].map(item => (
<button
key={item.label}
onClick={item.action}
className="w-full text-left px-3 py-2 text-sm hover:opacity-70 transition-opacity"
style={{ color: item.danger ? 'var(--danger)' : 'var(--text-primary)' }}
>
{item.label}
</button>
))}
</div>
)}
</div>
)
}

View file

@ -0,0 +1,61 @@
import { useNavigate, useLocation } from 'react-router-dom'
import { useAuthStore } from '../../stores/useAuthStore'
import api from '../../api/client'
export default function TopBar() {
const user = useAuthStore(s => s.user)
const logout = useAuthStore(s => s.logout)
const navigate = useNavigate()
const location = useLocation()
const handleLogout = async () => {
try {
const res = await api.post('/auth/logout', { redirectUri: window.location.origin + '/ac-helper/' })
logout()
if (res.data.logoutUrl) window.location.href = res.data.logoutUrl
else navigate('/login')
} catch {
logout()
navigate('/login')
}
}
const breadcrumb = location.pathname.startsWith('/sheet/') ? 'Sheet Editor'
: location.pathname.startsWith('/brief/review/') ? 'Review Brief'
: location.pathname === '/brief/upload' ? 'Upload Brief'
: location.pathname.startsWith('/admin') ? 'Admin'
: 'Dashboard'
return (
<header
className="flex items-center justify-between px-4 py-3 border-b flex-shrink-0"
style={{ background: 'var(--bg-sidebar)', borderColor: 'var(--border)' }}
>
<div className="text-sm font-medium" style={{ color: 'var(--text-secondary)' }}>
{breadcrumb}
</div>
<div className="flex items-center gap-3">
{user && (
<>
<span className="text-xs" style={{ color: 'var(--text-muted)' }}>
{user.name || user.email}
{user.role === 'admin' && (
<span className="ml-1 px-1 py-0.5 rounded text-xs font-bold" style={{ background: 'rgba(255,196,7,0.2)', color: 'var(--accent)' }}>
ADMIN
</span>
)}
</span>
<button
onClick={handleLogout}
className="text-xs px-2 py-1 rounded transition-colors"
style={{ color: 'var(--text-muted)', border: '1px solid var(--border)' }}
>
Sign out
</button>
</>
)}
</div>
</header>
)
}

View file

@ -0,0 +1,45 @@
import { useState } from 'react'
interface LogEntry {
time: string
type: 'command' | 'success' | 'error' | 'question'
text: string
}
interface Props {
entries: LogEntry[]
}
export default function AIActivityLog({ entries }: Props) {
const [open, setOpen] = useState(false)
return (
<div className="rounded" style={{ border: '1px solid var(--border)' }}>
<button
onClick={() => setOpen(o => !o)}
className="w-full flex items-center justify-between px-3 py-2 text-xs font-medium"
style={{ color: 'var(--accent)', background: '#0a1a0a' }}
>
<span>AI Activity Log ({entries.length})</span>
<span>{open ? '▲' : '▼'}</span>
</button>
{open && (
<div className="p-3 max-h-48 overflow-y-auto font-mono text-xs" style={{ background: '#050f05', color: '#00cc44' }}>
{entries.length === 0 && <div style={{ color: '#555' }}>No activity yet.</div>}
{entries.map((e, i) => (
<div key={i} className="mb-1">
<span style={{ color: '#555' }}>[{e.time}]</span>{' '}
<span style={{ color: e.type === 'error' ? '#ff4444' : e.type === 'question' ? '#ffc407' : '#00cc44' }}>
[{e.type.toUpperCase()}]
</span>{' '}
{e.text}
</div>
))}
</div>
)}
</div>
)
}
export type { LogEntry }

View file

@ -0,0 +1,46 @@
import { useState } from 'react'
interface Props {
question: string
onAnswer: (answer: string) => void
onDismiss: () => void
}
export default function AIQuestionModal({ question, onAnswer, onDismiss }: Props) {
const [answer, setAnswer] = useState('')
return (
<div className="fixed inset-0 z-50 flex items-center justify-center" style={{ background: 'rgba(0,0,0,0.7)' }}>
<div className="rounded-lg p-6 w-full max-w-md shadow-2xl" style={{ background: 'var(--bg-card)', border: '1px solid var(--border)' }}>
<div className="mb-2 text-xs font-semibold uppercase tracking-wider" style={{ color: 'var(--accent)' }}>
AI needs clarification
</div>
<p className="mb-4 text-sm" style={{ color: 'var(--text-primary)' }}>{question}</p>
<input
autoFocus
value={answer}
onChange={e => setAnswer(e.target.value)}
onKeyDown={e => { if (e.key === 'Enter' && answer.trim()) onAnswer(answer.trim()) }}
placeholder="Your answer…"
className="w-full px-3 py-2 rounded text-sm outline-none mb-4"
style={{ background: '#1a1a1a', color: 'var(--text-primary)', border: '1px solid var(--border)' }}
/>
<div className="flex gap-2 justify-end">
<button onClick={onDismiss} className="px-3 py-2 text-sm rounded" style={{ color: 'var(--text-secondary)' }}>
Cancel
</button>
<button
onClick={() => answer.trim() && onAnswer(answer.trim())}
disabled={!answer.trim()}
className="px-4 py-2 text-sm font-medium rounded disabled:opacity-40"
style={{ background: 'var(--accent)', color: '#000' }}
>
Send
</button>
</div>
</div>
</div>
)
}

View file

@ -0,0 +1,107 @@
import { useState, useRef } from 'react'
import { useSpeechRecognition } from '../../hooks/useSpeechRecognition'
interface Props {
onCommand: (command: string, yolo: boolean) => void
loading: boolean
yolo: boolean
onYoloChange: (val: boolean) => void
}
export default function CommandBar({ onCommand, loading, yolo, onYoloChange }: Props) {
const [input, setInput] = useState('')
const inputRef = useRef<HTMLInputElement>(null)
const { listening, start, stop, supported } = useSpeechRecognition((text) => {
setInput(text)
})
const handleSend = () => {
const cmd = input.trim()
if (!cmd || loading) return
onCommand(cmd, yolo)
setInput('')
}
const quickStarters = [
'Add 5 social media banners for UK',
'Add 3 email newsletters for DE, FR, ES',
'Create 10 OOH Print A4 deliverables',
]
return (
<div className="space-y-2">
<div className="flex gap-2 items-center">
<input
ref={inputRef}
value={input}
onChange={e => setInput(e.target.value)}
onKeyDown={e => { if (e.key === 'Enter' && !e.shiftKey) handleSend() }}
placeholder="Type a command… e.g. 'Add 5 social banners for UK'"
disabled={loading}
className="flex-1 px-3 py-2 rounded text-sm outline-none"
style={{
background: 'var(--bg-card)',
color: 'var(--text-primary)',
border: '1px solid var(--border)',
}}
/>
{supported && (
<button
onMouseDown={start}
onMouseUp={stop}
className="px-3 py-2 rounded text-sm transition-colors"
style={{
background: listening ? 'var(--danger)' : 'var(--bg-card)',
color: listening ? '#fff' : 'var(--text-secondary)',
border: '1px solid var(--border)',
}}
title="Hold to speak"
>
🎤
</button>
)}
<button
onClick={handleSend}
disabled={loading || !input.trim()}
className="px-4 py-2 rounded text-sm font-medium transition-colors disabled:opacity-40"
style={{ background: 'var(--accent)', color: '#000' }}
>
{loading ? '…' : 'Send'}
</button>
<label className="flex items-center gap-1 cursor-pointer select-none" title="YOLO mode — AI never asks questions">
<div
className="relative w-10 h-5 rounded-full transition-colors"
style={{ background: yolo ? 'var(--accent)' : 'var(--border)' }}
onClick={() => onYoloChange(!yolo)}
>
<div
className="absolute top-0.5 w-4 h-4 rounded-full transition-transform"
style={{
background: yolo ? '#000' : 'var(--text-muted)',
transform: yolo ? 'translateX(22px)' : 'translateX(2px)',
}}
/>
</div>
<span className="text-xs" style={{ color: 'var(--text-secondary)' }}>YOLO</span>
</label>
</div>
<div className="flex flex-wrap gap-2">
{quickStarters.map(qs => (
<button
key={qs}
onClick={() => { setInput(qs); inputRef.current?.focus() }}
className="px-2 py-1 rounded text-xs transition-colors hover:opacity-80"
style={{ background: 'var(--bg-card)', color: 'var(--text-secondary)', border: '1px solid var(--border)' }}
>
{qs}
</button>
))}
</div>
</div>
)
}

View file

@ -0,0 +1,48 @@
import { useState, useRef, useCallback } from 'react'
interface SpeechRecognitionHook {
transcript: string
listening: boolean
start: () => void
stop: () => void
supported: boolean
}
export function useSpeechRecognition(onResult: (text: string) => void): SpeechRecognitionHook {
const [transcript, setTranscript] = useState('')
const [listening, setListening] = useState(false)
const recognitionRef = useRef<any>(null)
const supported = 'webkitSpeechRecognition' in window || 'SpeechRecognition' in window
const start = useCallback(() => {
if (!supported) return
const SpeechRecognition = (window as any).webkitSpeechRecognition || (window as any).SpeechRecognition
const recognition = new SpeechRecognition()
recognition.continuous = true
recognition.interimResults = true
recognition.lang = 'en-US'
recognition.onresult = (e: any) => {
let final = ''
for (let i = e.resultIndex; i < e.results.length; i++) {
if (e.results[i].isFinal) final += e.results[i][0].transcript
}
if (final) {
setTranscript(final)
onResult(final)
}
}
recognition.onend = () => setListening(false)
recognitionRef.current = recognition
recognition.start()
setListening(true)
}, [supported, onResult])
const stop = useCallback(() => {
recognitionRef.current?.stop()
setListening(false)
}, [])
return { transcript, listening, start, stop, supported }
}

View file

@ -0,0 +1,47 @@
import { useEffect, useRef } from 'react'
import { useJobStore } from '../stores/useJobStore'
import type { Job } from '../types'
export function useWebSocket() {
const wsRef = useRef<WebSocket | null>(null)
const updateJob = useJobStore(s => s.updateJob)
const fetchJobs = useJobStore(s => s.fetchJobs)
useEffect(() => {
const token = sessionStorage.getItem('ac_access_token')
// Build WS URL using the same base path so it's proxied correctly in production
const base = import.meta.env.BASE_URL.replace(/\/$/, '') // strip trailing slash
const wsUrl = `${location.protocol === 'https:' ? 'wss' : 'ws'}://${location.host}${base}/ws${token ? `?token=${token}` : ''}`
const connect = () => {
const ws = new WebSocket(wsUrl)
wsRef.current = ws
ws.onmessage = (e) => {
try {
const msg = JSON.parse(e.data)
if (msg.type === 'queue.snapshot') {
fetchJobs()
} else if (msg.type === 'job.progress' || msg.type === 'job.completed' || msg.type === 'job.failed') {
if (msg.job) updateJob(msg.job as Job)
}
} catch { /* ignore */ }
}
// Keepalive ping
const ping = setInterval(() => {
if (ws.readyState === WebSocket.OPEN) ws.send(JSON.stringify({ type: 'ping' }))
}, 25000)
ws.onclose = () => {
clearInterval(ping)
setTimeout(connect, 3000)
}
}
connect()
return () => {
wsRef.current?.close()
}
}, [])
}

98
frontend/src/index.css Normal file
View file

@ -0,0 +1,98 @@
@import "tailwindcss";
/* Oliver Agency dark theme */
:root {
--bg-color: #000000;
--bg-card: #121212;
--bg-sidebar: #0a0a0a;
--accent: #FFC407;
--accent-hover: #e6b000;
--text-primary: #ffffff;
--text-secondary: #888888;
--text-muted: #555555;
--border: #222222;
--border-light: #333333;
--success: #22c55e;
--danger: #ef4444;
--warning: #f59e0b;
--info: #3b82f6;
}
* { box-sizing: border-box; }
body {
margin: 0;
background: var(--bg-color);
color: var(--text-primary);
font-family: 'Montserrat', 'Inter', system-ui, sans-serif;
font-size: 14px;
line-height: 1.5;
-webkit-font-smoothing: antialiased;
}
#root {
min-height: 100vh;
display: flex;
flex-direction: column;
}
/* Scrollbar styling */
::-webkit-scrollbar { width: 6px; height: 6px; }
::-webkit-scrollbar-track { background: var(--bg-card); }
::-webkit-scrollbar-thumb { background: var(--border-light); border-radius: 3px; }
::-webkit-scrollbar-thumb:hover { background: #444; }
/* Handsontable v17 dark theme via CSS variables */
/* Force dark color-scheme so light-dark() resolves to dark variant */
.ht-theme-main {
color-scheme: dark;
/* Oliver accent colour instead of blue */
--ht-colors-primary-100: #FFC407;
--ht-colors-primary-200: #FFC407;
--ht-colors-primary-300: #e6b000;
--ht-colors-primary-400: #e6b000;
--ht-colors-primary-500: #cc9d00;
--ht-colors-primary-600: #b38900;
/* Dark palette */
--ht-colors-palette-950: #121212;
--ht-colors-palette-900: #1a1a1a;
--ht-colors-palette-800: #ffffff;
--ht-colors-palette-700: #222222;
--ht-colors-palette-600: #333333;
--ht-colors-palette-500: #555555;
--ht-colors-palette-400: #888888;
--ht-colors-palette-300: #aaaaaa;
--ht-colors-palette-200: #cccccc;
--ht-colors-palette-100: #222222;
--ht-colors-palette-50: #1a1a1a;
--ht-colors-white: #121212;
/* Font */
font-family: 'Montserrat', 'Inter', system-ui, sans-serif;
font-size: 13px;
}
/* Column headers: uppercase accent */
.ht-theme-main .ht_clone_top th,
.ht-theme-main .ht_clone_left th,
.ht-theme-main th {
font-size: 11px !important;
font-weight: 600 !important;
text-transform: uppercase !important;
letter-spacing: 0.5px !important;
color: var(--accent) !important;
}
/* Context menu dark */
.htContextMenu .wtHolder {
background: #1a1a1a !important;
border: 1px solid var(--border) !important;
}
.htContextMenu td {
color: var(--text-primary) !important;
}
.htContextMenu td.current {
background: rgba(255,196,7,0.15) !important;
}

25
frontend/src/main.tsx Normal file
View file

@ -0,0 +1,25 @@
import { StrictMode } from 'react'
import { createRoot } from 'react-dom/client'
import { MsalProvider } from '@azure/msal-react'
import { PublicClientApplication } from '@azure/msal-browser'
import './index.css'
import App from './App'
// MSAL is configured dynamically from /api/auth/config
// We create a placeholder instance here; the real config is loaded in App.tsx
const msalInstance = new PublicClientApplication({
auth: {
clientId: '9079054c-9620-4757-a256-23413042f1ef',
authority: 'https://login.microsoftonline.com/e519c2e6-bc6d-4fdf-8d9c-923c2f002385',
redirectUri: window.location.origin + '/ac-helper/',
},
cache: { cacheLocation: 'sessionStorage' },
})
createRoot(document.getElementById('root')!).render(
<StrictMode>
<MsalProvider instance={msalInstance}>
<App />
</MsalProvider>
</StrictMode>,
)

View file

@ -0,0 +1,175 @@
import { useEffect, useState } from 'react'
import { useParams, useNavigate } from 'react-router-dom'
import { getJobDeliverables } from '../api/jobs'
import { importDeliverables } from '../api/sheets'
import { useSheetStore } from '../stores/useSheetStore'
import type { Deliverable } from '../types'
import toast from 'react-hot-toast'
const EDITABLE_FIELDS: (keyof Deliverable)[] = ['Title', 'Category', 'Media', 'Sub-media', 'Format', 'Supply date', 'Live date', 'Language', 'Country', 'Status']
export default function BriefReviewPage() {
const { jobId } = useParams<{ jobId: string }>()
const navigate = useNavigate()
const { sheets, fetchSheets, createSheet } = useSheetStore()
const [rows, setRows] = useState<Deliverable[]>([])
const [loading, setLoading] = useState(true)
const [selected, setSelected] = useState<Set<number>>(new Set())
const [targetSheetId, setTargetSheetId] = useState<string>('new')
const [newSheetName, setNewSheetName] = useState('Brief Import')
const [importing, setImporting] = useState(false)
useEffect(() => {
fetchSheets()
if (!jobId) return
getJobDeliverables(jobId)
.then(r => {
setRows(r.deliverables)
setSelected(new Set(r.deliverables.map((_, i) => i)))
})
.catch(() => toast.error('Failed to load deliverables'))
.finally(() => setLoading(false))
}, [jobId])
const updateCell = (rowIdx: number, field: keyof Deliverable, value: string) => {
setRows(prev => prev.map((r, i) => i === rowIdx ? { ...r, [field]: value } : r))
}
const toggleRow = (i: number) => {
setSelected(prev => {
const next = new Set(prev)
next.has(i) ? next.delete(i) : next.add(i)
return next
})
}
const handleImport = async () => {
const toImport = rows.filter((_, i) => selected.has(i))
if (!toImport.length) { toast.error('Select at least one row'); return }
setImporting(true)
try {
let sheetId = targetSheetId
if (targetSheetId === 'new') {
sheetId = await createSheet(newSheetName)
}
await importDeliverables(sheetId, toImport, 'append')
toast.success(`Imported ${toImport.length} deliverables`)
navigate(`/sheet/${sheetId}`)
} catch {
toast.error('Import failed')
} finally {
setImporting(false)
}
}
if (loading) {
return <div className="text-center py-12" style={{ color: 'var(--text-muted)' }}>Loading deliverables</div>
}
return (
<div className="max-w-full">
<div className="flex items-center justify-between mb-4">
<div>
<button onClick={() => navigate(-1)} className="text-xs mb-1 hover:opacity-70 block" style={{ color: 'var(--text-muted)' }}>
Back
</button>
<h1 className="text-xl font-bold" style={{ color: 'var(--text-primary)' }}>
Review Extracted Deliverables
</h1>
<p className="text-sm" style={{ color: 'var(--text-muted)' }}>
{rows.length} deliverables found. Edit, deselect unwanted rows, then import.
</p>
</div>
{/* Import controls */}
<div className="flex items-center gap-3">
<select
value={targetSheetId}
onChange={e => setTargetSheetId(e.target.value)}
className="px-3 py-2 rounded text-sm"
style={{ background: 'var(--bg-card)', color: 'var(--text-primary)', border: '1px solid var(--border)' }}
>
<option value="new">+ Create new sheet</option>
{sheets.map(s => <option key={s.id} value={s.id}>{s.name}</option>)}
</select>
{targetSheetId === 'new' && (
<input
value={newSheetName}
onChange={e => setNewSheetName(e.target.value)}
className="px-3 py-2 rounded text-sm"
style={{ background: 'var(--bg-card)', color: 'var(--text-primary)', border: '1px solid var(--border)' }}
placeholder="Sheet name"
/>
)}
<button
onClick={handleImport}
disabled={importing || selected.size === 0}
className="px-4 py-2 rounded text-sm font-medium disabled:opacity-40"
style={{ background: 'var(--accent)', color: '#000' }}
>
{importing ? 'Importing…' : `Import ${selected.size} rows →`}
</button>
</div>
</div>
{/* Table */}
<div className="overflow-auto rounded" style={{ border: '1px solid var(--border)', maxHeight: 'calc(100vh - 200px)' }}>
<table className="w-full text-xs border-collapse">
<thead className="sticky top-0">
<tr style={{ background: '#1a1a1a' }}>
<th className="px-2 py-2 text-left w-8">
<input
type="checkbox"
checked={selected.size === rows.length}
onChange={() => setSelected(selected.size === rows.length ? new Set() : new Set(rows.map((_, i) => i)))}
/>
</th>
{EDITABLE_FIELDS.map(f => (
<th key={f} className="px-2 py-2 text-left font-semibold uppercase tracking-wider" style={{ color: 'var(--accent)' }}>
{f}
</th>
))}
</tr>
</thead>
<tbody>
{rows.map((row, i) => (
<tr
key={i}
className="border-t"
style={{
borderColor: 'var(--border)',
background: selected.has(i) ? 'var(--bg-card)' : '#0a0a0a',
opacity: selected.has(i) ? 1 : 0.4,
}}
>
<td className="px-2 py-1.5">
<input type="checkbox" checked={selected.has(i)} onChange={() => toggleRow(i)} />
</td>
{EDITABLE_FIELDS.map(field => (
<td key={field} className="px-1 py-1">
<input
value={String(row[field] ?? '')}
onChange={e => updateCell(i, field, e.target.value)}
className="w-full px-1 py-0.5 rounded text-xs outline-none"
style={{
background: 'transparent',
color: 'var(--text-primary)',
border: '1px solid transparent',
minWidth: 80,
}}
onFocus={e => { (e.target as HTMLElement).style.borderColor = 'var(--accent)' }}
onBlur={e => { (e.target as HTMLElement).style.borderColor = 'transparent' }}
/>
</td>
))}
</tr>
))}
</tbody>
</table>
</div>
</div>
)
}

View file

@ -0,0 +1,76 @@
import { useEffect } from 'react'
import { useNavigate } from 'react-router-dom'
import { useJobStore } from '../stores/useJobStore'
import FileDropzone from '../components/brief/FileDropzone'
import JobProgressCard from '../components/brief/JobProgressCard'
import { useWebSocket } from '../hooks/useWebSocket'
import toast from 'react-hot-toast'
export default function BriefUploadPage() {
const navigate = useNavigate()
const { jobs, uploadFiles, deleteJob, fetchJobs, loading } = useJobStore()
useWebSocket()
useEffect(() => { fetchJobs() }, [])
const handleFiles = async (files: File[]) => {
try {
const created = await uploadFiles(files)
toast.success(`${files.length} brief(s) queued for extraction`)
if (created.length === 1) {
// Scroll user to the new job card
document.getElementById(`job-${created[0].id}`)?.scrollIntoView({ behavior: 'smooth' })
}
} catch (e: any) {
toast.error(e?.response?.data?.message || 'Upload failed')
}
}
const activeJobs = jobs.filter(j => !['COMPLETED', 'FAILED'].includes(j.phase))
const doneJobs = jobs.filter(j => ['COMPLETED', 'FAILED'].includes(j.phase))
return (
<div className="max-w-2xl mx-auto">
<div className="mb-6">
<button onClick={() => navigate(-1)} className="text-xs mb-3 hover:opacity-70" style={{ color: 'var(--text-muted)' }}>
Back
</button>
<h1 className="text-xl font-bold mb-1" style={{ color: 'var(--text-primary)' }}>Upload Brief</h1>
<p className="text-sm" style={{ color: 'var(--text-muted)' }}>
Upload a PDF, PPTX, DOCX or XLSX brief. AI will extract all deliverables.
</p>
</div>
<FileDropzone onFiles={handleFiles} loading={loading} />
{activeJobs.length > 0 && (
<div className="mt-6">
<h2 className="text-sm font-semibold uppercase tracking-wider mb-3" style={{ color: 'var(--text-muted)' }}>
In Progress
</h2>
<div className="space-y-3">
{activeJobs.map(job => (
<div key={job.id} id={`job-${job.id}`}>
<JobProgressCard job={job} onDelete={deleteJob} />
</div>
))}
</div>
</div>
)}
{doneJobs.length > 0 && (
<div className="mt-6">
<h2 className="text-sm font-semibold uppercase tracking-wider mb-3" style={{ color: 'var(--text-muted)' }}>
Completed
</h2>
<div className="space-y-3">
{doneJobs.map(job => (
<JobProgressCard key={job.id} job={job} onDelete={deleteJob} />
))}
</div>
</div>
)}
</div>
)
}

View file

@ -0,0 +1,102 @@
import { useEffect } from 'react'
import { useNavigate } from 'react-router-dom'
import { useSheetStore } from '../stores/useSheetStore'
import { useJobStore } from '../stores/useJobStore'
import JobProgressCard from '../components/brief/JobProgressCard'
export default function DashboardPage() {
const navigate = useNavigate()
const { sheets, createSheet, loadSheet, fetchSheets } = useSheetStore()
const { jobs, fetchJobs } = useJobStore()
useEffect(() => {
fetchSheets()
fetchJobs()
}, [])
const handleNewSheet = async () => {
const id = await createSheet(`Sheet ${new Date().toLocaleDateString()}`)
navigate(`/sheet/${id}`)
}
const recentJobs = jobs.slice(0, 3)
return (
<div className="max-w-5xl mx-auto">
<div className="mb-6">
<h1 className="text-2xl font-bold mb-1" style={{ color: 'var(--text-primary)' }}>Dashboard</h1>
<p className="text-sm" style={{ color: 'var(--text-muted)' }}>
Manage your Activation Calendar sheets or extract deliverables from a brief.
</p>
</div>
{/* Quick actions */}
<div className="grid grid-cols-2 gap-4 mb-8">
<button
onClick={() => navigate('/brief/upload')}
className="rounded-xl p-6 text-left transition-colors hover:opacity-90"
style={{ background: 'var(--accent)', color: '#000' }}
>
<div className="text-2xl mb-2">📄</div>
<div className="font-bold mb-1">Upload Brief</div>
<div className="text-sm opacity-70">Extract deliverables from PDF, PPTX, DOCX, XLSX</div>
</button>
<button
onClick={handleNewSheet}
className="rounded-xl p-6 text-left transition-colors hover:opacity-90"
style={{ background: 'var(--bg-card)', border: '1px solid var(--border)', color: 'var(--text-primary)' }}
>
<div className="text-2xl mb-2">📋</div>
<div className="font-bold mb-1">New Sheet</div>
<div className="text-sm" style={{ color: 'var(--text-muted)' }}>Start a blank Activation Calendar</div>
</button>
</div>
<div className="grid grid-cols-2 gap-6">
{/* Recent sheets */}
<div>
<h2 className="text-sm font-semibold uppercase tracking-wider mb-3" style={{ color: 'var(--text-muted)' }}>
Recent Sheets
</h2>
<div className="space-y-2">
{sheets.slice(0, 5).map(sheet => (
<button
key={sheet.id}
onClick={() => { loadSheet(sheet.id); navigate(`/sheet/${sheet.id}`) }}
className="w-full rounded-lg p-3 text-left flex items-center justify-between transition-colors hover:opacity-80"
style={{ background: 'var(--bg-card)', border: '1px solid var(--border)' }}
>
<div>
<div className="text-sm font-medium" style={{ color: 'var(--text-primary)' }}>{sheet.name}</div>
<div className="text-xs" style={{ color: 'var(--text-muted)' }}>
{sheet.itemCount} items · {new Date(sheet.modified).toLocaleDateString()}
</div>
</div>
<span style={{ color: 'var(--accent)' }}></span>
</button>
))}
{sheets.length === 0 && (
<div className="text-sm" style={{ color: 'var(--text-muted)' }}>No sheets yet.</div>
)}
</div>
</div>
{/* Recent jobs */}
<div>
<h2 className="text-sm font-semibold uppercase tracking-wider mb-3" style={{ color: 'var(--text-muted)' }}>
Recent Brief Extractions
</h2>
<div className="space-y-2">
{recentJobs.map(job => (
<JobProgressCard key={job.id} job={job} />
))}
{recentJobs.length === 0 && (
<div className="text-sm" style={{ color: 'var(--text-muted)' }}>No extractions yet.</div>
)}
</div>
</div>
</div>
</div>
)
}

View file

@ -0,0 +1,49 @@
import { useMsal } from '@azure/msal-react'
import { useAuthStore } from '../stores/useAuthStore'
export default function LoginPage() {
const { instance } = useMsal()
const { fetchMe } = useAuthStore()
const handleLogin = async () => {
// In dev mode (no MSAL accounts), just call fetchMe — backend uses DEV_MODE
if (import.meta.env.DEV) {
await fetchMe()
return
}
instance.loginRedirect({ scopes: ['openid', 'profile', 'email'] })
}
return (
<div style={{
display: 'flex', alignItems: 'center', justifyContent: 'center',
height: '100vh', background: '#000',
}}>
<div style={{
background: 'var(--bg-card)', border: '1px solid var(--border)',
borderRadius: 12, padding: '48px 56px', textAlign: 'center', maxWidth: 380,
}}>
<div style={{ fontSize: 40, marginBottom: 16 }}>📋</div>
<h1 style={{ color: 'var(--text-primary)', fontSize: 20, fontWeight: 700, marginBottom: 8 }}>
AC Helper
</h1>
<p style={{ color: 'var(--text-muted)', fontSize: 14, marginBottom: 32, lineHeight: 1.5 }}>
Activation Calendar management &amp; brief extraction tool
</p>
<button
onClick={handleLogin}
style={{
background: 'var(--accent)', color: '#000',
border: 'none', borderRadius: 8, padding: '12px 32px',
fontSize: 14, fontWeight: 600, cursor: 'pointer', width: '100%',
}}
>
Sign in with Microsoft
</button>
<p style={{ color: 'var(--text-muted)', fontSize: 11, marginTop: 16 }}>
Oliver Agency · ai-sandbox.oliver.solutions
</p>
</div>
</div>
)
}

View file

@ -0,0 +1,217 @@
import { useEffect, useRef, useState, useCallback } from 'react'
import { useParams } from 'react-router-dom'
import Handsontable from 'handsontable'
import { HotTable } from '@handsontable/react'
import 'handsontable/styles/ht-theme-main.css'
import { useSheetStore } from '../stores/useSheetStore'
import { useDropdownStore } from '../stores/useDropdownStore'
import { sendCommand } from '../api/ai'
import { exportSheet } from '../api/sheets'
import CommandBar from '../components/sheet/CommandBar'
import AIQuestionModal from '../components/sheet/AIQuestionModal'
import AIActivityLog, { type LogEntry } from '../components/sheet/AIActivityLog'
import type { Deliverable } from '../types'
import toast from 'react-hot-toast'
const STATUS_OPTIONS = ['Booked', 'To-do', 'In Progress', 'Done']
export default function SheetPage() {
const { sheetId } = useParams<{ sheetId: string }>()
const { sheets, activeSheetId, deliverables, loadSheet, saveSheet, saving } = useSheetStore()
const { categories, fetch: fetchCategories } = useDropdownStore()
const hotRef = useRef<any>(null)
const [aiLoading, setAiLoading] = useState(false)
const [aiQuestion, setAiQuestion] = useState<string | null>(null)
const [yolo, setYolo] = useState(false)
const [history, setHistory] = useState('')
const [logs, setLogs] = useState<LogEntry[]>([])
const saveTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
const sheetMeta = sheets.find(s => s.id === sheetId)
useEffect(() => {
fetchCategories()
if (sheetId && activeSheetId !== sheetId) {
loadSheet(sheetId)
}
}, [sheetId])
const categoryNames = categories.map(c => c.name)
const columns: Handsontable.ColumnSettings[] = [
{ data: 'Number', title: '#', width: 70, readOnly: true },
{ data: 'Title', title: 'Title', width: 200 },
{
data: 'Status', title: 'Status', width: 110,
type: 'dropdown', source: STATUS_OPTIONS,
},
{
data: 'Category', title: 'Category', width: 180,
type: 'autocomplete', source: categoryNames, strict: false, filter: true,
},
{
data: 'Media', title: 'Media', width: 180,
type: 'autocomplete', strict: false, filter: true,
source(_query: string, process: (items: string[]) => void) {
// Dynamic source — resolved in cells() callback below
process([])
},
},
{ data: 'Sub-media', title: 'Sub-media', width: 120 },
{ data: 'Format', title: 'Format', width: 100 },
{ data: 'Supply date', title: 'Supply Date', width: 110, type: 'date', dateFormat: 'YYYY-MM-DD' },
{ data: 'Live date', title: 'Live Date', width: 110, type: 'date', dateFormat: 'YYYY-MM-DD' },
{ data: 'Language', title: 'Lang', width: 60 },
{ data: 'Country', title: 'Country', width: 70 },
]
const cells = useCallback((row: number, col: number): Handsontable.CellMeta => {
if (col === 4) {
// Media column — filter based on current row's Category
const hot = hotRef.current?.hotInstance
const category = hot?.getDataAtRowProp(row, 'Category') as string | undefined
const cat = categories.find(c => c.name === category)
return { source: cat?.mediaTypes ?? [] }
}
return {}
}, [categories])
const handleAfterChange = useCallback((changes: Handsontable.CellChange[] | null) => {
if (!changes) return
const hot = hotRef.current?.hotInstance
if (!hot) return
const newData: Deliverable[] = hot.getData().map((row: any[]) => {
const obj: any = {}
columns.forEach((col, i) => { if (col.data) obj[col.data as string] = row[i] })
return obj as Deliverable
})
// Debounced auto-save
if (saveTimeoutRef.current) clearTimeout(saveTimeoutRef.current)
saveTimeoutRef.current = setTimeout(() => saveSheet(newData), 1000)
}, [columns, saveSheet])
const addLog = (type: LogEntry['type'], text: string) => {
setLogs(prev => [...prev, {
time: new Date().toLocaleTimeString(),
type,
text,
}])
}
const handleCommand = async (command: string, yolo: boolean) => {
if (!sheetId) return
setAiLoading(true)
addLog('command', command)
try {
const result = await sendCommand(sheetId, command, yolo, history)
if (result.operation === 'question') {
setAiQuestion(result.question || '')
setHistory(prev => prev + `\nUser: ${command}\nAI: ${result.question}`)
addLog('question', result.question || '')
} else if (result.data) {
await loadSheet(sheetId)
addLog('success', `${result.operation}: ${result.count} item(s)`)
setHistory('')
toast.success(`${result.count} item(s) ${result.operation === 'create' ? 'created' : 'updated'}`)
} else if (result.error) {
addLog('error', result.error)
toast.error(result.error)
}
} catch (e: any) {
const msg = e?.response?.data?.message || 'Command failed'
addLog('error', msg)
toast.error(msg)
} finally {
setAiLoading(false)
}
}
const handleAnswer = (answer: string) => {
setAiQuestion(null)
handleCommand(answer, yolo)
setHistory(prev => prev + `\nUser answer: ${answer}`)
}
const handleClear = async () => {
if (!sheetId || !confirm('Clear all rows?')) return
await saveSheet([])
await loadSheet(sheetId)
setLogs([])
toast.success('Sheet cleared')
}
return (
<div className="flex flex-col h-full gap-3">
{/* Header */}
<div className="flex items-center justify-between flex-shrink-0">
<div>
<h1 className="text-lg font-bold" style={{ color: 'var(--text-primary)' }}>
{sheetMeta?.name || 'Sheet'}
</h1>
<div className="text-xs" style={{ color: 'var(--text-muted)' }}>
{deliverables.length} items{saving ? ' · Saving…' : ''}
</div>
</div>
<div className="flex items-center gap-2">
<button
onClick={handleClear}
className="px-3 py-1.5 rounded text-xs transition-colors"
style={{ color: 'var(--danger)', border: '1px solid var(--border)' }}
>
Clear
</button>
<button
onClick={() => sheetId && exportSheet(sheetId)}
className="px-3 py-1.5 rounded text-xs font-medium"
style={{ background: 'var(--accent)', color: '#000' }}
>
Export CSV
</button>
</div>
</div>
{/* AI Command */}
<div className="flex-shrink-0">
<CommandBar onCommand={handleCommand} loading={aiLoading} yolo={yolo} onYoloChange={setYolo} />
</div>
{/* Spreadsheet */}
<div className="flex-1 overflow-hidden rounded" style={{ border: '1px solid var(--border)' }}>
{deliverables !== undefined && (
<HotTable
ref={hotRef}
data={deliverables as any[]}
columns={columns}
cells={cells}
rowHeaders={true}
colHeaders={true}
contextMenu={true}
manualColumnResize={true}
afterChange={handleAfterChange}
height="100%"
width="100%"
licenseKey="non-commercial-and-evaluation"
stretchH="last"
themeName="ht-theme-main"
/>
)}
</div>
{/* Activity log */}
<div className="flex-shrink-0">
<AIActivityLog entries={logs} />
</div>
{/* AI Question Modal */}
{aiQuestion && (
<AIQuestionModal
question={aiQuestion}
onAnswer={handleAnswer}
onDismiss={() => setAiQuestion(null)}
/>
)}
</div>
)
}

View file

@ -0,0 +1,163 @@
import { useEffect, useState, useCallback } from 'react'
import { useDropzone } from 'react-dropzone'
import { previewDropdowns, uploadDropdowns } from '../../api/admin'
import { useDropdownStore } from '../../stores/useDropdownStore'
import type { CategoryData } from '../../types'
import toast from 'react-hot-toast'
export default function AdminDropdownsPage() {
const { categories, fetch: fetchCategories } = useDropdownStore()
const [preview, setPreview] = useState<CategoryData[] | null>(null)
const [previewFile, setPreviewFile] = useState<File | null>(null)
const [uploading, setUploading] = useState(false)
const [previewing, setPreviewing] = useState(false)
useEffect(() => { fetchCategories() }, [])
const onDrop = useCallback(async (files: File[]) => {
if (!files[0]) return
const file = files[0]
setPreviewFile(file)
setPreviewing(true)
try {
const result = await previewDropdowns(file)
setPreview(result)
} catch {
toast.error('Failed to parse Excel file')
setPreview(null)
} finally {
setPreviewing(false)
}
}, [])
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop,
accept: { 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'] },
maxFiles: 1,
})
const handleApply = async () => {
if (!previewFile) return
setUploading(true)
try {
await uploadDropdowns(previewFile)
await fetchCategories()
setPreview(null)
setPreviewFile(null)
toast.success('Dropdowns updated successfully')
} catch {
toast.error('Upload failed')
} finally {
setUploading(false)
}
}
return (
<div className="max-w-4xl">
<div className="mb-6">
<h1 className="text-xl font-bold mb-1" style={{ color: 'var(--text-primary)' }}>Dropdown Data</h1>
<p className="text-sm" style={{ color: 'var(--text-muted)' }}>
Upload a new Excel file to update the Category / Media hierarchy used across all sheets.
</p>
</div>
{/* Upload zone */}
<div
{...getRootProps()}
className="rounded-xl p-8 text-center cursor-pointer mb-6 transition-colors"
style={{
border: `2px dashed ${isDragActive ? 'var(--accent)' : 'var(--border)'}`,
background: isDragActive ? 'rgba(255,196,7,0.05)' : 'var(--bg-card)',
}}
>
<input {...getInputProps()} />
<div className="text-3xl mb-3">📊</div>
<p className="text-sm font-medium" style={{ color: 'var(--text-primary)' }}>
{isDragActive ? 'Drop Excel file here' : 'Drag & drop an .xlsx file, or click to select'}
</p>
<p className="text-xs mt-1" style={{ color: 'var(--text-muted)' }}>
Expected format: columns "Category" and "Media Type" (one row per category/media pair)
</p>
</div>
{previewing && (
<div className="text-sm text-center py-4" style={{ color: 'var(--text-muted)' }}>Parsing file</div>
)}
{/* Preview */}
{preview && (
<div className="mb-6">
<div className="flex items-center justify-between mb-3">
<h2 className="text-sm font-semibold uppercase tracking-wider" style={{ color: 'var(--text-muted)' }}>
Preview {preview.length} categories
</h2>
<div className="flex gap-2">
<button
onClick={() => { setPreview(null); setPreviewFile(null) }}
className="px-3 py-1.5 rounded text-xs"
style={{ border: '1px solid var(--border)', color: 'var(--text-muted)' }}
>
Cancel
</button>
<button
onClick={handleApply}
disabled={uploading}
className="px-4 py-1.5 rounded text-xs font-medium disabled:opacity-40"
style={{ background: 'var(--accent)', color: '#000' }}
>
{uploading ? 'Applying…' : 'Apply Changes'}
</button>
</div>
</div>
<div className="overflow-auto rounded" style={{ border: '1px solid var(--border)', maxHeight: 400 }}>
<table className="w-full text-xs border-collapse">
<thead className="sticky top-0" style={{ background: '#1a1a1a' }}>
<tr>
<th className="px-3 py-2 text-left font-semibold uppercase tracking-wider" style={{ color: 'var(--accent)' }}>Category</th>
<th className="px-3 py-2 text-left font-semibold uppercase tracking-wider" style={{ color: 'var(--accent)' }}>Media Types</th>
</tr>
</thead>
<tbody>
{preview.map(cat => (
<tr key={cat.name} className="border-t" style={{ borderColor: 'var(--border)' }}>
<td className="px-3 py-2" style={{ color: 'var(--text-primary)' }}>{cat.name}</td>
<td className="px-3 py-2" style={{ color: 'var(--text-muted)' }}>
{cat.mediaTypes.join(', ') || '—'}
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
)}
{/* Current data */}
<div>
<h2 className="text-sm font-semibold uppercase tracking-wider mb-3" style={{ color: 'var(--text-muted)' }}>
Current Data {categories.length} categories
</h2>
<div className="overflow-auto rounded" style={{ border: '1px solid var(--border)', maxHeight: 400 }}>
<table className="w-full text-xs border-collapse">
<thead className="sticky top-0" style={{ background: '#1a1a1a' }}>
<tr>
<th className="px-3 py-2 text-left font-semibold uppercase tracking-wider" style={{ color: 'var(--accent)' }}>Category</th>
<th className="px-3 py-2 text-left font-semibold uppercase tracking-wider" style={{ color: 'var(--accent)' }}>Media Types</th>
</tr>
</thead>
<tbody>
{categories.map(cat => (
<tr key={cat.name} className="border-t" style={{ borderColor: 'var(--border)' }}>
<td className="px-3 py-2" style={{ color: 'var(--text-primary)' }}>{cat.name}</td>
<td className="px-3 py-2" style={{ color: 'var(--text-muted)' }}>
{cat.mediaTypes.join(', ') || '—'}
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
</div>
)
}

View file

@ -0,0 +1,104 @@
import { useEffect, useState } from 'react'
import { listUsers, updateUser } from '../../api/admin'
import type { User } from '../../types'
import toast from 'react-hot-toast'
export default function AdminUsersPage() {
const [users, setUsers] = useState<User[]>([])
const [loading, setLoading] = useState(true)
useEffect(() => {
listUsers()
.then(setUsers)
.catch(() => toast.error('Failed to load users'))
.finally(() => setLoading(false))
}, [])
const handleUpdate = async (id: string, patch: Partial<User>) => {
try {
const updated = await updateUser(id, patch)
setUsers(prev => prev.map(u => u.id === id ? { ...u, ...updated } : u))
toast.success('User updated')
} catch {
toast.error('Update failed')
}
}
if (loading) return <div style={{ color: 'var(--text-muted)', padding: 32 }}>Loading users</div>
return (
<div className="max-w-4xl">
<div className="mb-6">
<h1 className="text-xl font-bold mb-1" style={{ color: 'var(--text-primary)' }}>User Management</h1>
<p className="text-sm" style={{ color: 'var(--text-muted)' }}>
{users.length} registered users
</p>
</div>
<div className="overflow-auto rounded" style={{ border: '1px solid var(--border)' }}>
<table className="w-full text-sm border-collapse">
<thead>
<tr style={{ background: '#1a1a1a' }}>
{['Name', 'Email', 'Role', 'Status', 'Last Login', 'Actions'].map(h => (
<th key={h} className="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider"
style={{ color: 'var(--accent)' }}>{h}</th>
))}
</tr>
</thead>
<tbody>
{users.map(user => (
<tr key={user.id} className="border-t" style={{ borderColor: 'var(--border)' }}>
<td className="px-4 py-3" style={{ color: 'var(--text-primary)' }}>
{user.name || '—'}
</td>
<td className="px-4 py-3" style={{ color: 'var(--text-muted)' }}>
{user.email}
</td>
<td className="px-4 py-3">
<select
value={user.role}
onChange={e => handleUpdate(user.id, { role: e.target.value as User['role'] })}
className="text-xs px-2 py-1 rounded"
style={{
background: 'var(--bg-card)', color: 'var(--text-primary)',
border: '1px solid var(--border)',
}}
>
<option value="user">user</option>
<option value="admin">admin</option>
</select>
</td>
<td className="px-4 py-3">
<span
className="text-xs px-2 py-0.5 rounded font-medium"
style={{
background: user.active ? 'rgba(34,197,94,0.15)' : 'rgba(239,68,68,0.15)',
color: user.active ? 'var(--success)' : 'var(--danger)',
}}
>
{user.active ? 'Active' : 'Inactive'}
</span>
</td>
<td className="px-4 py-3 text-xs" style={{ color: 'var(--text-muted)' }}>
{user.last_seen ? new Date(user.last_seen).toLocaleDateString() : '—'}
</td>
<td className="px-4 py-3">
<button
onClick={() => handleUpdate(user.id, { active: !user.active })}
className="text-xs px-3 py-1 rounded transition-opacity hover:opacity-70"
style={{
border: '1px solid var(--border)',
color: user.active ? 'var(--danger)' : 'var(--success)',
}}
>
{user.active ? 'Deactivate' : 'Activate'}
</button>
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
)
}

View file

@ -0,0 +1,37 @@
import { create } from 'zustand'
import type { User } from '../types'
import api from '../api/client'
interface AuthStore {
user: User | null
loading: boolean
fetchMe: () => Promise<void>
setToken: (token: string) => void
logout: () => void
}
export const useAuthStore = create<AuthStore>((set) => ({
user: null,
loading: true, // start true to prevent login flash before fetchMe completes
setToken: (token: string) => {
sessionStorage.setItem('ac_access_token', token)
},
fetchMe: async () => {
set({ loading: true })
try {
const res = await api.get('/auth/me')
set({ user: res.data })
} catch {
set({ user: null })
} finally {
set({ loading: false })
}
},
logout: () => {
sessionStorage.removeItem('ac_access_token')
set({ user: null })
},
}))

View file

@ -0,0 +1,30 @@
import { create } from 'zustand'
import type { CategoryData } from '../types'
import { getCategories } from '../api/dropdowns'
interface DropdownStore {
categories: CategoryData[]
loaded: boolean
fetch: () => Promise<void>
getMediaTypes: (categoryName: string) => string[]
}
export const useDropdownStore = create<DropdownStore>((set, get) => ({
categories: [],
loaded: false,
fetch: async () => {
if (get().loaded) return
try {
const categories = await getCategories(true)
set({ categories, loaded: true })
} catch {
// fall through — will use empty arrays
}
},
getMediaTypes: (categoryName: string) => {
const cat = get().categories.find(c => c.name === categoryName)
return cat?.mediaTypes ?? []
},
}))

View file

@ -0,0 +1,45 @@
import { create } from 'zustand'
import type { Job, ModelConfiguration } from '../types'
import * as jobsApi from '../api/jobs'
interface JobStore {
jobs: Job[]
loading: boolean
fetchJobs: () => Promise<void>
uploadFiles: (files: File[], modelConfig?: ModelConfiguration) => Promise<Job[]>
deleteJob: (id: string) => Promise<void>
updateJob: (job: Job) => void
}
export const useJobStore = create<JobStore>((set) => ({
jobs: [],
loading: false,
fetchJobs: async () => {
set({ loading: true })
try {
const jobs = await jobsApi.listJobs()
set({ jobs })
} finally {
set({ loading: false })
}
},
uploadFiles: async (files: File[], modelConfig?: ModelConfiguration) => {
const created = await jobsApi.createJob(files, modelConfig)
set(s => ({ jobs: [...created, ...s.jobs] }))
return created
},
deleteJob: async (id: string) => {
await jobsApi.deleteJob(id)
set(s => ({ jobs: s.jobs.filter(j => j.id !== id) }))
},
updateJob: (job: Job) => {
set(s => ({
jobs: s.jobs.map(j => j.id === job.id ? job : j)
}))
},
}))

Some files were not shown because too many files have changed in this diff Show more