Compare commits

...

No commits in common. "legacy-php-backup" and "main" have entirely different histories.

141 changed files with 18931 additions and 3822 deletions

81
.env.example Normal file
View file

@ -0,0 +1,81 @@
# AC Tool — production environment configuration
# Copy to .env and fill in the required values:
# cp .env.example .env && nano .env
# ── Application ───────────────────────────────────────────────────────────────
# Host port Docker will bind to (container always listens on 8000 internally)
APP_PORT=8100
# ── Azure AD / MSAL (SPA PKCE — no client secret required) ───────────────────
AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
AZURE_REDIRECT_URI=https://ai-sandbox.oliver.solutions/ac-helper/
# ── Admin bootstrap ───────────────────────────────────────────────────────────
# First login with these emails automatically receives the admin role (comma-separated)
ADMIN_EMAIL=
ADMIN_EMAILS=
# ── Emergency access (bypass SSO) ────────────────────────────────────────────
# Set EMERGENCY_TOKEN to a long random string to allow token-based login when
# Azure AD / 2FA is unavailable. Leave blank to disable entirely.
# Generate with: python3 -c "import secrets; print(secrets.token_hex(32))"
EMERGENCY_TOKEN=
EMERGENCY_USER_EMAIL=
EMERGENCY_USER_NAME=Emergency Access
# ── OpenAI ────────────────────────────────────────────────────────────────────
OPENAI_API_KEY=
OPENAI_MODEL=gpt-4.1
OPENAI_REASONING_EFFORT=medium
OPENAI_TIMEOUT=3600
OPENAI_MAX_RETRIES=2
# ── Google Gemini ─────────────────────────────────────────────────────────────
# Required: used for AI spreadsheet commands
GEMINI_API_KEY=
GOOGLE_MODEL=gemini-2.0-flash-exp
GOOGLE_TEMPERATURE=0.7
GOOGLE_MAX_OUTPUT_TOKENS=100000
GOOGLE_THINKING_BUDGET=12000
GOOGLE_TIMEOUT=3600
# ── Anthropic Claude ──────────────────────────────────────────────────────────
ANTHROPIC_API_KEY=
ANTHROPIC_MODEL_OPUS=claude-opus-4-5-20251101
ANTHROPIC_MODEL_SONNET=claude-sonnet-4-5-20250929
ANTHROPIC_TEMPERATURE=1
ANTHROPIC_MAX_TOKENS=32000
ANTHROPIC_THINKING_BUDGET=12000
ANTHROPIC_TIMEOUT=300
# ── LlamaCloud (PDF parsing) ──────────────────────────────────────────────────
LLAMA_CLOUD_API_KEY=
# ── Brief extraction processing ───────────────────────────────────────────────
DEFAULT_PRIMARY_MODELS=anthropic-sonnet45,google-gemini20
DEFAULT_CONSOLIDATION_MODEL=anthropic-sonnet45
MINIMUM_SUCCESS_THRESHOLD=1
ENABLE_COST_ESTIMATION=true
MAX_PROCESSING_COST_USD=10.00
MAX_CONCURRENT_JOBS=5
# ── PostgreSQL ────────────────────────────────────────────────────────────────
# Password for the ac-tool DB user. Change before deploying.
# Generate with: python3 -c "import secrets; print(secrets.token_hex(24))"
POSTGRES_PASSWORD=
# ── Security ──────────────────────────────────────────────────────────────────
# Generate with: python3 -c "import secrets; print(secrets.token_hex(32))"
SESSION_SECRET=
# ── CORS ──────────────────────────────────────────────────────────────────────
ALLOWED_ORIGINS=https://ai-sandbox.oliver.solutions
# ── File upload ───────────────────────────────────────────────────────────────
MAX_UPLOAD_SIZE_MB=200
FILE_RETENTION_HOURS=24
WS_PING_INTERVAL_SECONDS=30
# ── Dev mode (must be false in production) ────────────────────────────────────
DEV_MODE=false

72
.gitignore vendored
View file

@ -1,50 +1,36 @@
# These are some examples of commonly ignored file patterns.
# You should customize this list as applicable to your project.
# Learn more about .gitignore:
# https://www.atlassian.com/git/tutorials/saving-changes/gitignore
# Node artifact files
node_modules/
dist/
# Compiled Java class files
*.class
# Compiled Python bytecode
# Python
__pycache__/
*.py[cod]
*.so
.env
.env.*
!.env.example
venv/
.venv/
*.egg-info/
dist/
build/
# Log files
# Node / Frontend
node_modules/
frontend/dist/
frontend/.vite/
# Data (user data, uploads — never commit)
data/uploads/
data/outputs/
data/sheets/
data/*.json
# Logs
*.log
# Package files
*.jar
# Maven
target/
dist/
# JetBrains IDE
.idea/
# Unit test reports
TEST*.xml
# Generated by MacOS
# OS
.DS_Store
# Generated by Windows
Thumbs.db
# Applications
*.app
*.exe
*.war
# Large media files
*.mp4
*.tiff
*.avi
*.flv
*.mov
*.wmv
# IDE
.idea/
.vscode/
*.swp
*.bak

View file

@ -1,19 +0,0 @@
# Protect sensitive files
<FilesMatch "^(config\.php|sheet_helpers\.php|.*\.json|.*\.log|^\..*|README\.md)">
Order Allow,Deny
Deny from all
</FilesMatch>
# Prevent directory listing
Options -Indexes
# Allow access to specific public files if needed, but the above rule is specific enough.
# The above rule blocks:
# - config.php
# - sheet_helpers.php
# - Any .json file (data.json, sheets/*.json)
# - Any .log file (activity.log)
# - Any hidden file (.git, .DS_Store)
# Ensure index.php is the default index
DirectoryIndex index.php

40
Dockerfile Normal file
View file

@ -0,0 +1,40 @@
# ── Stage 1: Build React frontend ────────────────────────────────────────────
FROM node:22-alpine AS frontend-builder
WORKDIR /app/frontend
COPY frontend/package*.json ./
RUN npm ci
COPY frontend/ ./
RUN npm run build
# ── Stage 2: Python runtime ───────────────────────────────────────────────────
FROM python:3.11-slim
# System deps for document processing
RUN apt-get update && apt-get install -y --no-install-recommends \
libmagic1 \
libreoffice-core \
libreoffice-writer \
libreoffice-impress \
poppler-utils \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install Python dependencies
COPY backend/requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
# Copy backend source
COPY backend/ ./
# Copy built frontend into static directory
COPY --from=frontend-builder /app/frontend/dist ./frontend/dist
# Create data directory (will be mounted as volume in production)
RUN mkdir -p data/uploads data/outputs data/sheets
EXPOSE 8000
CMD ["python", "-m", "hypercorn", "server.app:create_app()", "--bind", "0.0.0.0:8000", "--worker-class", "asyncio"]

View file

@ -1,3 +0,0 @@
Sep=,
"Number","Title","Status","Category","Media","Sub media","Destination","Format","Supply date","Live date","End date","Special instructions","Language","Country","Quantity"
"","New activation event","Booked","","","","","","","","","","","","1.00"
Can't render this file because it has a wrong number of fields in line 2.

View file

@ -1,50 +0,0 @@
# Activation Calendar Helper
A web-based tool for managing marketing deliverables with AI-powered automation.
## Features
* **AI Command Interface:** Create and update deliverables using natural language (powered by Gemini).
* **Smart Spreadsheet:** Interactive grid with sorting, filtering, and dependent dropdowns.
* **Multi-Sheet Support:** Create, rename, duplicate, and delete multiple sheets.
* **Strict Data Hierarchy:** Enforced 3-level dependency (Category -> Media -> Sub-media).
* **Export:** Export data to CSV.
* **Secure:** Server-side protection for sensitive files.
## Installation
1. **Clone the repository:**
```bash
git clone git@bitbucket.org:zlalani/ac-helper.git
```
2. **Configure:**
* Ensure `config.php` is present with your `$GEMINI_API_KEY`.
* Ensure the `sheets/` directory is writable by the web server.
3. **Run:**
* Serve the application using a PHP server (e.g., Apache, Nginx, or `php -S localhost:8000`).
## File Structure
* `index.php`: Main application entry point.
* `api.php`: Backend API handling commands and data operations.
* `script.js`: Frontend logic (UI, API calls, Jspreadsheet integration).
* `style.css`: Application styling.
* `sheet_helpers.php`: Helper functions for file-based sheet management.
* `sheets/`: Directory storing individual sheet JSON files.
* `sheets_metadata.json`: Registry of all sheets.
* `config.php`: Configuration and secrets (Excluded from git).
* `.htaccess`: Security rules.
## Security
* `.htaccess` prevents direct access to:
* `config.php`
* `sheet_helpers.php`
* `*.json` (Data files)
* `*.log` (Log files)
* `README.md` (Documentation)
* Hidden files (starting with `.`)
## Usage
See `help.php` (accessible via the "Help" button in the app) for user guide.

View file

@ -1,16 +0,0 @@
Number,Title,Status,Category,Media,Sub-media,Supply date,Live date,Language,Country
DEL-052,HP banners 2026 winter campaign 1,Booked,OOH,OOH,Print,11/1/25,12/15/15,EN,GB
DEL-053,HP banners 2026 winter campaign 2,Booked,OOH,OOH,Print,11/1/25,12/15/15,EN,GB
DEL-054,HP banners 2026 winter campaign 3,Booked,OOH,OOH,Print,11/1/25,12/15/15,EN,GB
DEL-055,HP banners 2026 winter campaign 4,Booked,OOH,OOH,Print,11/1/25,12/15/15,EN,GB
DEL-056,HP banners 2026 winter campaign 5,Booked,OOH,OOH,Print,11/1/25,12/15/15,EN,GB
DEL-057,HP banners 2026 winter campaign 6,Booked,OOH,OOH,Print,11/1/25,12/15/15,EN,GB
DEL-058,HP banners 2026 winter campaign 7,Booked,OOH,OOH,Print,11/1/25,12/15/15,EN,GB
DEL-059,HP banners 2026 winter campaign 8,Booked,OOH,OOH,Print,11/1/25,12/15/15,EN,GB
DEL-060,HP banners 2026 winter campaign 9,Booked,OOH,OOH,Print,11/1/25,12/15/15,EN,GB
DEL-061,HP banners 2026 winter campaign 10,Booked,OOH,OOH,Print,11/1/25,12/15/15,EN,GB
DEL-062,HP banners 2026 winter campaign 11,Booked,OOH,OOH,Print,11/1/25,12/15/15,ES,ES
DEL-063,HP banners 2026 winter campaign 12,Booked,OOH,OOH,Print,11/1/25,12/15/15,ES,ES
DEL-064,HP banners 2026 winter campaign 13,Booked,OOH,OOH,Print,11/1/25,12/15/15,ES,ES
DEL-065,HP banners 2026 winter campaign 14,Booked,OOH,OOH,Print,11/1/25,12/15/15,ES,ES
DEL-066,HP banners 2026 winter campaign 15,Booked,OOH,OOH,Print,11/1/25,12/15/15,ES,ES
1 Number Title Status Category Media Sub-media Supply date Live date Language Country
2 DEL-052 HP banners 2026 winter campaign 1 Booked OOH OOH Print 11/1/25 12/15/15 EN GB
3 DEL-053 HP banners 2026 winter campaign 2 Booked OOH OOH Print 11/1/25 12/15/15 EN GB
4 DEL-054 HP banners 2026 winter campaign 3 Booked OOH OOH Print 11/1/25 12/15/15 EN GB
5 DEL-055 HP banners 2026 winter campaign 4 Booked OOH OOH Print 11/1/25 12/15/15 EN GB
6 DEL-056 HP banners 2026 winter campaign 5 Booked OOH OOH Print 11/1/25 12/15/15 EN GB
7 DEL-057 HP banners 2026 winter campaign 6 Booked OOH OOH Print 11/1/25 12/15/15 EN GB
8 DEL-058 HP banners 2026 winter campaign 7 Booked OOH OOH Print 11/1/25 12/15/15 EN GB
9 DEL-059 HP banners 2026 winter campaign 8 Booked OOH OOH Print 11/1/25 12/15/15 EN GB
10 DEL-060 HP banners 2026 winter campaign 9 Booked OOH OOH Print 11/1/25 12/15/15 EN GB
11 DEL-061 HP banners 2026 winter campaign 10 Booked OOH OOH Print 11/1/25 12/15/15 EN GB
12 DEL-062 HP banners 2026 winter campaign 11 Booked OOH OOH Print 11/1/25 12/15/15 ES ES
13 DEL-063 HP banners 2026 winter campaign 12 Booked OOH OOH Print 11/1/25 12/15/15 ES ES
14 DEL-064 HP banners 2026 winter campaign 13 Booked OOH OOH Print 11/1/25 12/15/15 ES ES
15 DEL-065 HP banners 2026 winter campaign 14 Booked OOH OOH Print 11/1/25 12/15/15 ES ES
16 DEL-066 HP banners 2026 winter campaign 15 Booked OOH OOH Print 11/1/25 12/15/15 ES ES

490
api.php
View file

@ -1,490 +0,0 @@
<?php
header('Content-Type: application/json');
require_once 'config.php';
require_once 'sheet_helpers.php';
$dataFile = 'data.json';
$logFile = 'activity.log';
// Helper to log activity
function logActivity($message, $type = 'INFO') {
global $logFile, $CURRENT_USER;
$timestamp = date('Y-m-d H:i:s');
$logEntry = "[$timestamp] [$type] [$CURRENT_USER] $message\n";
file_put_contents($logFile, $logEntry, FILE_APPEND);
}
// Helper to read data
function getData() {
global $dataFile;
if (!file_exists($dataFile)) return [];
$content = file_get_contents($dataFile);
return json_decode($content, true) ?? [];
}
// Helper to save data
function saveData($data) {
global $dataFile;
file_put_contents($dataFile, json_encode($data, JSON_PRETTY_PRINT));
}
// Helper to generate ID
function generateId($data) {
$maxId = 0;
foreach ($data as $row) {
$num = intval(str_replace('DEL-', '', $row['Number'] ?? '0'));
if ($num > $maxId) $maxId = $num;
}
return 'DEL-' . str_pad($maxId + 1, 3, '0', STR_PAD_LEFT);
}
$action = $_GET['action'] ?? '';
if ($_SERVER['REQUEST_METHOD'] === 'GET') {
if ($action === 'load') {
echo json_encode(getData());
exit;
}
// Sheet Management Endpoints
if ($action === 'list_sheets') {
$sheets = getUserSheets($CURRENT_USER);
echo json_encode(['success' => true, 'sheets' => $sheets]);
exit;
}
if ($action === 'load_sheet') {
$sheetId = $_GET['id'] ?? '';
$data = loadSheetData($CURRENT_USER, $sheetId);
if ($data !== null) {
echo json_encode(['success' => true, 'data' => $data]);
} else {
echo json_encode(['success' => false, 'message' => 'Sheet not found']);
}
exit;
}
}
if ($_SERVER['REQUEST_METHOD'] === 'POST') {
$input = json_decode(file_get_contents('php://input'), true);
if ($action === 'save') {
if (isset($input['data']) && is_array($input['data'])) {
saveData($input['data']);
echo json_encode(['success' => true]);
} else {
echo json_encode(['success' => false, 'message' => 'Invalid data format']);
}
exit;
}
// Sheet Management POST Endpoints
if ($action === 'save_sheet') {
$name = $input['name'] ?? '';
$data = getData(); // Get current data
$sheet = createSheet($CURRENT_USER, $name, $data);
echo json_encode(['success' => true, 'sheet' => $sheet]);
exit;
}
if ($action === 'duplicate_sheet') {
$sheetId = $input['id'] ?? '';
$sheet = duplicateSheet($CURRENT_USER, $sheetId);
if ($sheet) {
echo json_encode(['success' => true, 'sheet' => $sheet]);
} else {
echo json_encode(['success' => false, 'message' => 'Failed to duplicate sheet']);
}
exit;
}
if ($action === 'delete_sheet') {
$sheetId = $input['id'] ?? '';
deleteSheet($CURRENT_USER, $sheetId);
echo json_encode(['success' => true]);
exit;
}
if ($action === 'rename_sheet') {
$sheetId = $input['id'] ?? '';
$newName = $input['name'] ?? '';
$success = renameSheet($CURRENT_USER, $sheetId, $newName);
echo json_encode(['success' => $success]);
exit;
}
if ($action === 'update_sheet') {
$sheetId = $input['id'] ?? '';
$data = $input['data'] ?? [];
updateSheet($CURRENT_USER, $sheetId, $data);
echo json_encode(['success' => true]);
exit;
}
if ($action === 'command') {
$sheetId = $input['sheet_id'] ?? '';
if (empty($sheetId)) {
echo json_encode(['success' => false, 'message' => 'Please create or select a sheet first.']);
exit;
}
$data = loadSheetData($CURRENT_USER, $sheetId) ?? [];
$command = trim($input['command']);
// Log the incoming command
logActivity("Command received: $command", 'COMMAND');
$commandLower = strtolower($command);
// Pre-processing: Common speech-to-text corrections
$correctionMap = [
'delivery balls' => 'deliverables',
'delivery ball' => 'deliverable',
'delivery' => 'deliverables',
'liver' => 'deliverables',
'rose' => 'rows',
'row' => 'rows',
'oh oh h' => 'OOH',
'out of home' => 'OOH'
];
foreach ($correctionMap as $wrong => $right) {
$commandLower = str_replace($wrong, $right, $commandLower);
}
// Pre-processing: Convert number words to digits
$numberMap = [
'one' => '1', 'two' => '2', 'three' => '3', 'four' => '4', 'five' => '5',
'six' => '6', 'seven' => '7', 'eight' => '8', 'nine' => '9', 'ten' => '10',
'eleven' => '11', 'twelve' => '12', 'twenty' => '20', 'thirty' => '30'
];
foreach ($numberMap as $word => $digit) {
$commandLower = preg_replace('/\b' . $word . '\b/', $digit, $commandLower);
}
// require_once 'config.php'; // Moved to top
// If API Key is present, use LLM
// Debug: Check if key is loaded
if (isset($GEMINI_API_KEY) && $GEMINI_API_KEY !== 'YOUR_API_KEY_HERE') {
// 1. Construct Prompt
$currentDate = date('Y-m-d');
$dataContext = json_encode($data);
$yoloMode = isset($input['yolo_mode']) && $input['yolo_mode'] ? 'TRUE' : 'FALSE';
// Get conversation history from input (if provided)
$conversationHistory = isset($input['history']) ? $input['history'] : '';
$prompt = "
You are an intelligent assistant managing an Activation Calendar.
Current Date: $currentDate
YOLO MODE: $yoloMode
CONVERSATION HISTORY:
$conversationHistory
CURRENT DATA (Context for your actions):
$dataContext
Data Schema:
- Number (Auto-generated, do not invent)
- Title (String)
- Status (Enum: Booked, To-do, In Progress, Done) - Default to 'Booked'
- Category (Enum: 'Digital', 'Print', 'Out of Home', 'Video')
- Media (Dependent on Category - see below)
- Sub-media (Dependent on Category AND Media - see below)
HIERARCHY RULES (Strictly follow these combinations):
1. Category: Digital
- Media: Online Advertising -> Sub-media: Banner, Rich Media, Landing Page, Static Image, Video, Push notifications, .com
- Media: Social -> Sub-media: GIF, Video, Static Image, Multi-Asset Build
- Media: Community management -> Sub-media: (None)
- Media: POS -> Sub-media: Digital
2. Category: Print
- Media: POS -> Sub-media: Print
- Media: Out of Home -> Sub-media: Print
- Media: Direct Mail -> Sub-media: Print
- Media: Packaging -> Sub-media: Print
3. Category: Out of Home
- Media: Out of Home -> Sub-media: Print, Digital
4. Category: Video
- Media: POS -> Sub-media: Video
- Media: Online Advertising -> Sub-media: Video
- Media: Social -> Sub-media: Video
- Media: Broadcast -> Sub-media: TV, Cinema, Radio, VOD
- Format (String) - Extract sizes/dimensions here! e.g., '300x250', 'A4', '10x15cm', 'Full Page', '1080p'.
- Supply date (YYYY-MM-DD)
- Live date (YYYY-MM-DD)
- Language (ISO 2-letter code, UPPERCASE, e.g., 'EN', 'FR', 'ES')
- Country (ISO 2-letter code, UPPERCASE, e.g., 'GB', 'FR', 'ES')
- Quantity (Integer)
Supported Operations:
1. 'create': Create new items.
Output: { \"operation\": \"create\", \"items\": [ { \"Title\": \"...\", \"Format\": \"300x250\", ... } ] }
2. 'update': Update existing items.
Output: { \"operation\": \"update\", \"target_ids\": [\"DEL-001\"], \"values\": { \"Status\": \"Done\" } }
3. 'batch_update': Update multiple items with DIFFERENT values.
Output: { \"operation\": \"batch_update\", \"updates\": [ { \"Number\": \"DEL-001\", \"values\": { \"Title\": \"Row 1\" } } ] }
4. 'question': Ask for clarification (ONLY if YOLO MODE is FALSE).
Output: { \"operation\": \"question\", \"text\": \"Did you mean 2025 or 2026?\" }
IMPORTANT BRAIN RULES:
0. **CRITICAL - MULTIPLE ITEMS vs QUANTITY**:
- When user says \"add 10 deliverables\" or \"create 5 banners\", you MUST create that many SEPARATE items in the array.
- NEVER use Quantity field to represent the count. Quantity should always be 1 unless explicitly stated otherwise.
- Example: \"Add 10 digital deliverables\" = Create 10 separate objects in the items array, each with Quantity=1.
- **MATH VALIDATION (MANDATORY)**:
* BEFORE creating items, COUNT how many items your pattern will create.
* If the user says \"X items\" but your pattern creates Y items where X ≠ Y, you MUST use 'question' operation.
* Example: User says \"10 print ads, 2 formats, 4 countries, alternating men/women\"
Your calculation: 2 formats × 4 countries × 2 titles = 16 items
Since 16 10, return: {\"operation\": \"question\", \"text\": \"I calculated 16 items (2 formats × 4 countries × 2 titles). Did you mean 16 items total, or should I create only 10 items with a different pattern?\"}
* ONLY create items if the math matches OR if the user didn't specify a total count.
* **EXCEPTION - CONFIRMATION**: If the user's input is a confirmation of the count (e.g., \"Yes\", \"20\", \"Correct\"), DO NOT ask again. EXECUTE with the confirmed count.
* **CHECK HISTORY**: If you just asked \"I calculated X...\", and user says \"X\", PROCEED immediately.
- If the request is vague (no title, format, etc.), ask for clarification using the 'question' operation.
1. **FORMAT EXTRACTION (Ultra Complex Logic)**:
- **STRICT RULE**: ALWAYS use 'x' as the separator for dimensions. NEVER use 'by'.
- **Pixel Dimensions**: Convert '300 by 250' -> '300x250'. Convert '1920 by 1080' -> '1920x1080'.
- **Physical Dimensions**: Convert '30 by 30 cm' -> '30x30cm'. Convert '10 x 15 cm' -> '10x15cm'. Keep the unit attached or separated by space, but use 'x' for the numbers.
- **Print**: 'A4', 'A3', 'Letter', 'Full Page', 'Half Page'.
- **Social**: If user says \"social media sizes\", INFER standard sizes (e.g., 1080x1080 for Instagram) if possible, or ASK if unsure (unless YOLO is ON).
- **Examples**:
* \"300 by 300\" -> \"300x300\"
* \"30x30 cm\" -> \"30x30cm\"
* \"30 by 30 cm\" -> \"30x30cm\"
2. **YOLO MODE (HIGHEST PRIORITY)**:
- If YOLO MODE is TRUE: **YOU ARE FORBIDDEN FROM ASKING QUESTIONS.**
- You MUST GUESS any missing information.
- Example: If user says \"banners\" and nothing else, create 1 banner with default settings.
- Example: If user says \"2027\" in YOLO mode, assume it's the date for the previous request and EXECUTE.
- NEVER return 'question' operation when YOLO is TRUE.
3. **CLARIFICATION RECOVERY (CRITICAL)**:
- **CONTEXT MERGING**: The user's current input is likely an ANSWER to your previous question.
- **DO NOT** treat the input (e.g., \"2027\", \"300x300\") as a standalone command.
- **COMBINE** it with the previous user messages in the history to form a complete request.
- **Example Flow**:
1. User: \"Create ads\" (Missing format)
2. AI: \"What format?\"
3. User: \"300x250\"
-> **INTERNAL THOUGHT**: \"User said 300x250. Previous was 'Create ads'. Combined: 'Create ads 300x250'.\"
-> **ACTION**: Execute the creation. DO NOT ask again.
- If the user provides *some* missing info but not *all*, AND YOLO is FALSE, you *can* ask for the remaining info, but acknowledge what you received.
- **BUT**: If the user seems frustrated or repeats info, JUST EXECUTE with best guesses.
4. **CONTEXT IS KING**: Use 'CURRENT DATA' to resolve references like \"the French ones\".
5. **PRECISE TARGETING**: Use `target_ids` for updates.
5. **INFER FIELDS**:
- \"OOH\" -> Category='OOH', Media='OOH'.
- \"Instagram Story\" -> Category='Digital', Media='Social', Sub-media='Instagram', Format='9:16'.
- \"English\" -> Language='EN', \"French\" -> Language='FR', \"Spanish\" -> Language='ES'
- \"Great Britain\" or \"UK\" -> Country='GB', \"France\" -> Country='FR', \"Spain\" -> Country='ES'
6. **PATTERN RECOGNITION**:
- If user says \"repeat 3 times\" with different languages, create 3 sets of items.
- Extract formats from phrases like \"200 by 200\", \"300x300\", \"400x400 banner\".
- Pay attention to sequences: \"first 5\", \"next 4\", \"remaining\" for language/country assignments.
CRITICAL: You MUST respond with ONLY valid JSON. No explanations, no conversational text, no markdown.
Your response must be a single JSON object starting with { and ending with }.
If you need clarification, use the 'question' operation:
{ \\\"operation\\\": \\\"question\\\", \\\"text\\\": \\\"Your question here\\\" }
User Command: \"$command\"
";
// 2. Call Gemini API
// User mentioned 2.5, likely referring to the new 2.0 Flash Experimental
$url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=" . $GEMINI_API_KEY;
$dataPayload = [
"contents" => [
[
"parts" => [
["text" => $prompt]
]
]
]
];
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($dataPayload));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
// FIX: Disable SSL check for local dev environments (MAMP/XAMPP often lack certs)
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
$response = curl_exec($ch);
if (curl_errno($ch)) {
echo json_encode(['success' => false, 'message' => 'Curl error: ' . curl_error($ch)]);
exit;
}
curl_close($ch);
// 2. Parse LLM Response
$responseObj = json_decode($response, true);
// Check for API Level Errors (e.g. Invalid Key)
if (isset($responseObj['error'])) {
$errorMsg = $responseObj['error']['message'] ?? 'Unknown Error';
logActivity("Gemini API Error: $errorMsg", 'ERROR');
echo json_encode([
'success' => false,
'message' => "Gemini API Error: " . ($responseObj['error']['message'] ?? 'Unknown Error'),
'debug_raw' => $response
]);
exit;
}
$llmText = $responseObj['candidates'][0]['content']['parts'][0]['text'] ?? '';
if (empty($llmText)) {
echo json_encode([
'success' => false,
'message' => "AI returned an empty response.",
'debug_raw' => $response
]);
exit;
}
// Robust JSON Extraction
// Robust JSON Extraction
$start = strpos($llmText, '{');
$end = strrpos($llmText, '}');
if ($start !== false && $end !== false) {
$responseText = substr($llmText, $start, $end - $start + 1);
} else {
$responseText = $llmText; // Fallback
}
$llmAction = json_decode($responseText, true);
// Add debug info to response
$debugInfo = [
'debug_llm' => $llmText,
'debug_extracted' => $responseText,
'debug_json_error' => json_last_error_msg()
];
if (json_last_error() !== JSON_ERROR_NONE || !$llmAction) {
echo json_encode(array_merge([
'success' => false,
'message' => "Invalid JSON from AI",
], $debugInfo));
exit;
}
// 3. Execute Action
// Map $llmAction to $actionData for compatibility with existing logic below if needed,
// OR just use $llmAction directly.
// The previous code used $actionData. Let's stick to $llmAction as per my recent update intent,
// BUT the code below line 220 might expect $actionData.
// Let's check the next lines.
// The view showed `if (!$actionData) ... else { // 3. Execute Logic`.
// So I should probably replace the whole execution block or alias it.
// Let's just handle the execution here as per the NEW logic I wrote in step 528
// which used $llmAction.
if ($llmAction['operation'] === 'create') {
$newItems = $llmAction['items'];
$count = 0;
foreach ($newItems as $item) {
// Generate ID
$lastId = 0;
foreach ($data as $row) {
$num = intval(str_replace('DEL-', '', $row['Number']));
if ($num > $lastId) $lastId = $num;
}
$newId = 'DEL-' . str_pad($lastId + 1, 3, '0', STR_PAD_LEFT);
$item['Number'] = $newId;
// Ensure defaults
if (empty($item['Status'])) $item['Status'] = 'Booked';
if (empty($item['Quantity'])) $item['Quantity'] = 1;
$data[] = $item;
$count++;
}
updateSheet($CURRENT_USER, $sheetId, $data);
logActivity("Created $count items via AI", 'SUCCESS');
echo json_encode(array_merge(['success' => true, 'message' => "Created $count items.", 'count' => $count], $debugInfo));
} elseif ($llmAction['operation'] === 'update') {
$updates = $llmAction['values'];
$targetIds = $llmAction['target_ids'] ?? [];
$count = 0;
foreach ($data as &$row) {
$match = false;
if (!empty($targetIds)) {
if (in_array($row['Number'], $targetIds)) {
$match = true;
}
} else {
// Fallback: Filter (simplified)
$match = true;
}
if ($match) {
foreach ($updates as $key => $val) {
$row[$key] = $val;
}
$count++;
}
}
updateSheet($CURRENT_USER, $sheetId, $data);
logActivity("Updated $count items via AI", 'SUCCESS');
echo json_encode(array_merge(['success' => true, 'message' => "Updated $count items.", 'count' => $count], $debugInfo));
} elseif ($llmAction['operation'] === 'batch_update') {
$updates = $llmAction['updates'];
$count = 0;
foreach ($updates as $update) {
$id = $update['Number'];
$values = $update['values'];
foreach ($data as &$row) {
if ($row['Number'] === $id) {
foreach ($values as $key => $val) {
$row[$key] = $val;
}
$count++;
break;
}
}
}
updateSheet($CURRENT_USER, $sheetId, $data);
logActivity("Batch updated $count items via AI", 'SUCCESS');
echo json_encode(array_merge(['success' => true, 'message' => "Batch updated $count items.", 'count' => $count], $debugInfo));
} elseif ($llmAction['operation'] === 'question') {
logActivity("AI asked question: " . $llmAction['text'], 'QUESTION');
echo json_encode(array_merge(['success' => true, 'question' => $llmAction['text']], $debugInfo));
} else {
echo json_encode(array_merge(['success' => false, 'message' => 'Unknown operation: ' . $llmAction['operation']], $debugInfo));
}
exit;
} // End if (isset($GEMINI_API_KEY))
} // End if ($action === 'command')
} // End if ($_SERVER['REQUEST_METHOD'] === 'POST')
?>

16
backend/.gitignore vendored Normal file
View file

@ -0,0 +1,16 @@
.venv/
__pycache__/
*.pyc
*.pyo
.env
# Data files — never commit user data
data/uploads/*
data/outputs/*
data/sheets/*
data/*.json
!data/uploads/.gitkeep
!data/outputs/.gitkeep
!data/sheets/.gitkeep
data/client_dropdowns/*
!data/client_dropdowns/.gitkeep

150
backend/core/config.py Executable file
View file

@ -0,0 +1,150 @@
"""
Configuration management for Enhanced Brief Processing System
Loads environment variables and provides configuration validation
"""
import os
from typing import List, Dict, Any, Optional
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
class Config:
"""Centralized configuration management"""
# API Keys — support both docker-compose names and legacy names
OPENAI_API_KEY: str = os.getenv('OPENAI_API_KEY', '')
ANTHROPIC_API_KEY: str = os.getenv('ANTHROPIC_API_KEY', '')
# GEMINI_API_KEY is the docker-compose / .env name; GOOGLE_API_KEY is the legacy name
GOOGLE_API_KEY: str = os.getenv('GEMINI_API_KEY') or os.getenv('GOOGLE_API_KEY', '')
# LLAMA_CLOUD_API_KEY is the docker-compose / official SDK name
LLAMACLOUD_API_KEY: str = os.getenv('LLAMA_CLOUD_API_KEY') or os.getenv('LLAMACLOUD_API_KEY', '')
# OpenAI Configuration
OPENAI_MODEL: str = os.getenv('OPENAI_MODEL', 'gpt-5.1')
OPENAI_REASONING_EFFORT: str = os.getenv('OPENAI_REASONING_EFFORT', 'medium')
OPENAI_TIMEOUT: int = int(os.getenv('OPENAI_TIMEOUT', '3600'))
OPENAI_MAX_RETRIES: int = int(os.getenv('OPENAI_MAX_RETRIES', '2'))
# Google Configuration
GOOGLE_MODEL: str = os.getenv('GOOGLE_MODEL', 'gemini-3.1-pro-preview')
GOOGLE_TEMPERATURE: float = float(os.getenv('GOOGLE_TEMPERATURE', '0.1'))
GOOGLE_MAX_OUTPUT_TOKENS: int = int(os.getenv('GOOGLE_MAX_OUTPUT_TOKENS', '8192'))
GOOGLE_THINKING_BUDGET: int = int(os.getenv('GOOGLE_THINKING_BUDGET', '12000'))
GOOGLE_TIMEOUT: int = int(os.getenv('GOOGLE_TIMEOUT', '300'))
# Anthropic Configuration
ANTHROPIC_MODEL_OPUS: str = os.getenv('ANTHROPIC_MODEL_OPUS', 'claude-opus-4-5-20251101')
ANTHROPIC_MODEL_SONNET: str = os.getenv('ANTHROPIC_MODEL_SONNET', 'claude-sonnet-4-5-20250929')
ANTHROPIC_TEMPERATURE: float = float(os.getenv('ANTHROPIC_TEMPERATURE', '0.1'))
ANTHROPIC_MAX_TOKENS: int = int(os.getenv('ANTHROPIC_MAX_TOKENS', '32000'))
ANTHROPIC_THINKING_BUDGET: int = int(os.getenv('ANTHROPIC_THINKING_BUDGET', '12000'))
ANTHROPIC_TIMEOUT: int = int(os.getenv('ANTHROPIC_TIMEOUT', '300'))
# Processing Configuration
DEFAULT_PRIMARY_MODELS: str = os.getenv('DEFAULT_PRIMARY_MODELS', 'openai-gpt51,anthropic-sonnet45,google-gemini31')
DEFAULT_CONSOLIDATION_MODEL: str = os.getenv('DEFAULT_CONSOLIDATION_MODEL', 'openai-gpt51')
MINIMUM_SUCCESS_THRESHOLD: int = int(os.getenv('MINIMUM_SUCCESS_THRESHOLD', '1'))
ENABLE_COST_ESTIMATION: bool = os.getenv('ENABLE_COST_ESTIMATION', 'true').lower() == 'true'
MAX_PROCESSING_COST_USD: float = float(os.getenv('MAX_PROCESSING_COST_USD', '10.00'))
# Model Pricing (per 1M tokens)
PRICING = {
'openai-gpt51': {
'input': 1.25,
'cached_input': 0.625,
'output': 10.00
},
'anthropic-opus45': {
'input': 5.00,
'output': 25.00
},
'anthropic-sonnet45': {
'input': 3.00,
'output': 15.00
},
'google-gemini31': {
'input': 1.25,
'output': 5.00
}
}
# Model mappings for CLI compatibility
MODEL_MAPPINGS = {
'openai-gpt51': ('openai', OPENAI_MODEL),
'anthropic-opus45': ('anthropic', ANTHROPIC_MODEL_OPUS),
'anthropic-sonnet45': ('anthropic', ANTHROPIC_MODEL_SONNET),
'google-gemini31': ('google', GOOGLE_MODEL)
}
@classmethod
def validate_api_keys(cls) -> Dict[str, bool]:
"""Validate that required API keys are set"""
return {
'openai': bool(cls.OPENAI_API_KEY and cls.OPENAI_API_KEY != 'your-openai-api-key-here'),
'anthropic': bool(cls.ANTHROPIC_API_KEY and cls.ANTHROPIC_API_KEY != 'your-anthropic-api-key-here'),
'google': bool(cls.GOOGLE_API_KEY and cls.GOOGLE_API_KEY != 'your-google-api-key-here'),
'llamacloud': bool(cls.LLAMACLOUD_API_KEY and cls.LLAMACLOUD_API_KEY != 'your-llamacloud-api-key-here')
}
@classmethod
def get_provider_config(cls, provider: str) -> Dict[str, Any]:
"""Get configuration for a specific provider"""
if provider == 'openai':
return {
'api_key': cls.OPENAI_API_KEY,
'model': cls.OPENAI_MODEL,
'reasoning_effort': cls.OPENAI_REASONING_EFFORT,
'timeout': cls.OPENAI_TIMEOUT,
'max_retries': cls.OPENAI_MAX_RETRIES
}
elif provider == 'google':
return {
'api_key': cls.GOOGLE_API_KEY,
'model': cls.GOOGLE_MODEL,
'temperature': cls.GOOGLE_TEMPERATURE,
'max_output_tokens': cls.GOOGLE_MAX_OUTPUT_TOKENS,
'thinking_budget': cls.GOOGLE_THINKING_BUDGET,
'timeout': cls.GOOGLE_TIMEOUT
}
elif provider == 'anthropic':
return {
'api_key': cls.ANTHROPIC_API_KEY,
'model_opus': cls.ANTHROPIC_MODEL_OPUS,
'model_sonnet': cls.ANTHROPIC_MODEL_SONNET,
'temperature': cls.ANTHROPIC_TEMPERATURE,
'max_tokens': cls.ANTHROPIC_MAX_TOKENS,
'thinking_budget': cls.ANTHROPIC_THINKING_BUDGET,
'timeout': cls.ANTHROPIC_TIMEOUT
}
else:
raise ValueError(f"Unknown provider: {provider}")
@classmethod
def get_default_primary_models(cls) -> List[str]:
"""Get default list of primary analysis models"""
return cls.DEFAULT_PRIMARY_MODELS.split(',')
@classmethod
def get_model_info(cls, model_key: str) -> tuple:
"""Get provider and model name for a model key"""
if model_key not in cls.MODEL_MAPPINGS:
raise ValueError(f"Unknown model key: {model_key}. Available: {list(cls.MODEL_MAPPINGS.keys())}")
return cls.MODEL_MAPPINGS[model_key]
@classmethod
def estimate_cost(cls, model_key: str, input_tokens: int, output_tokens: int, cached_tokens: int = 0) -> float:
"""Estimate processing cost for a model"""
if model_key not in cls.PRICING:
return 0.0
pricing = cls.PRICING[model_key]
input_cost = (input_tokens / 1_000_000) * pricing['input']
output_cost = (output_tokens / 1_000_000) * pricing['output']
cached_cost = (cached_tokens / 1_000_000) * pricing.get('cached_input', pricing['input'])
return input_cost + output_cost + cached_cost
# Global config instance
config = Config()

View file

@ -0,0 +1,353 @@
"""
Consolidation processor for merging multiple LLM analysis results
"""
import json
import logging
from typing import List, Dict, Any, Tuple
from dataclasses import dataclass
import os
from .llm_service import ProviderManager, LLMResponse
from .config import config
@dataclass
class ConsolidationResult:
"""Result of consolidation process"""
consolidated_deliverables: List[Any] # BaseDeliverable
expanded_assets: List[Any] # MarketingAsset
consolidation_metadata: Dict[str, Any]
warnings: List[str]
class ConsolidationProcessor:
"""Processes multiple LLM analysis results into a single consolidated output"""
def __init__(self):
self.logger = logging.getLogger(self.__class__.__name__)
self.provider_manager = ProviderManager()
async def consolidate_results(
self,
analysis_responses: List[LLMResponse],
consolidation_model: str,
document_content: str = ""
) -> ConsolidationResult:
"""
Consolidate multiple analysis results using the specified consolidation model
Args:
analysis_responses: List of LLM responses from primary analysis
consolidation_model: Model key for consolidation (e.g., 'anthropic-opus45')
document_content: Optional original document content for context
Returns:
ConsolidationResult with final consolidated deliverables
"""
self.logger.info(f"Starting consolidation with {len(analysis_responses)} model results using {consolidation_model}")
# Log individual model deliverable counts
successful_models = []
deliverable_counts = []
for i, response in enumerate(analysis_responses):
if response.success:
count = self._count_deliverables_in_response(response.content)
deliverable_counts.append(count)
successful_models.append(f"{response.provider} {response.model_used}")
self.logger.info(f"Model {i+1} ({response.provider} {response.model_used}): {count} base deliverables")
if deliverable_counts:
avg_deliverables = sum(deliverable_counts) / len(deliverable_counts)
self.logger.info(f"Average deliverables across {len(deliverable_counts)} models: {avg_deliverables:.1f}")
else:
self.logger.warning("No successful model responses to analyze")
# Extract and format results from all models
formatted_results = self._format_model_results(analysis_responses)
# Prepare consolidation prompt
consolidation_prompt = await self._prepare_consolidation_prompt(formatted_results)
# Load system message for consolidation
system_message = self._load_consolidation_system_prompt()
# Execute consolidation using specified model
try:
provider = self.provider_manager.get_provider(consolidation_model)
messages = provider.prepare_messages(system_message, consolidation_prompt)
# Use the universal base deliverable schema for structured output
from .process_brief_enhanced import UNIVERSAL_BASE_DELIVERABLE_SCHEMA
consolidation_response = await provider.generate_response(
messages=messages,
schema=UNIVERSAL_BASE_DELIVERABLE_SCHEMA
)
if not consolidation_response.success:
raise Exception(f"Consolidation failed: {consolidation_response.error}")
# Parse the consolidated results - import here to avoid circular import
from .process_brief_enhanced import BaseDeliverable, expand_deliverables
try:
consolidated_data = json.loads(consolidation_response.content)
if 'assets' not in consolidated_data:
# PROBLEM DETECTED - Log everything verbosely
self.logger.error(f"[CONSOLIDATION] ========== MISSING 'assets' KEY - VERBOSE DEBUG ==========")
self.logger.error(f"[CONSOLIDATION] Model: {consolidation_model}")
self.logger.error(f"[CONSOLIDATION] Response success: {consolidation_response.success}")
self.logger.error(f"[CONSOLIDATION] Response content length: {len(consolidation_response.content)} chars")
self.logger.error(f"[CONSOLIDATION] Response content type: {type(consolidation_response.content)}")
self.logger.error(f"[CONSOLIDATION] Full raw content: {consolidation_response.content}")
self.logger.error(f"[CONSOLIDATION] Parsed data type: {type(consolidated_data)}")
self.logger.error(f"[CONSOLIDATION] Parsed data keys: {list(consolidated_data.keys()) if isinstance(consolidated_data, dict) else 'N/A'}")
self.logger.error(f"[CONSOLIDATION] Full parsed data: {consolidated_data}")
# Save debug file
self._save_consolidation_debug(consolidation_response, consolidated_data, analysis_responses)
raise KeyError("Response missing 'assets' key")
# SUCCESS - Just log summary
self.logger.info(f"Consolidation completed: {len(consolidated_data['assets'])} base deliverables")
base_deliverables = [BaseDeliverable(**item) for item in consolidated_data['assets']]
except json.JSONDecodeError as e:
self.logger.error(f"[CONSOLIDATION] ========== JSON PARSE ERROR ==========")
self.logger.error(f"[CONSOLIDATION] Parse error: {e}")
self.logger.error(f"[CONSOLIDATION] Full response content: {consolidation_response.content}")
raise
except KeyError as e:
# Already logged in detail above
raise
except Exception as e:
self.logger.error(f"[CONSOLIDATION] Error processing consolidation response: {e}")
self.logger.error(f"[CONSOLIDATION] Full response content: {consolidation_response.content}")
raise
# Expand consolidated base deliverables into individual assets
expanded_assets, expansion_warnings = expand_deliverables(base_deliverables)
self.logger.info(f"Expansion completed: {len(expanded_assets)} individual assets")
# Create consolidation metadata
metadata = self._create_consolidation_metadata(
analysis_responses,
consolidation_response,
base_deliverables,
expanded_assets
)
return ConsolidationResult(
consolidated_deliverables=base_deliverables,
expanded_assets=expanded_assets,
consolidation_metadata=metadata,
warnings=expansion_warnings
)
except Exception as e:
self.logger.error(f"Consolidation failed: {e}")
raise
def _count_deliverables_in_response(self, content: str) -> int:
"""Count the number of deliverables in a model's JSON response"""
try:
data = json.loads(content)
if isinstance(data, dict) and 'assets' in data:
return len(data['assets'])
return 0
except (json.JSONDecodeError, KeyError, TypeError):
return 0
def _format_model_results(self, responses: List[LLMResponse]) -> str:
"""Format analysis results from multiple models for consolidation prompt"""
formatted_results = []
for i, response in enumerate(responses):
if response.success:
model_info = f"**MODEL {i+1}: {response.provider.upper()} {response.model_used}**"
# Try to extract JSON content
try:
# Parse the JSON to validate it
result_data = json.loads(response.content)
formatted_content = json.dumps(result_data, indent=2)
except json.JSONDecodeError:
# Fallback to raw content if not valid JSON
formatted_content = response.content
formatted_results.append(f"{model_info}\n```json\n{formatted_content}\n```")
else:
self.logger.warning(f"Skipping failed response from {response.provider} {response.model_used}: {response.error}")
return "\n\n".join(formatted_results)
async def _prepare_consolidation_prompt(self, formatted_results: str) -> str:
"""Prepare the consolidation prompt with model results"""
import asyncio
def _read_template():
"""Blocking template read operation for thread pool"""
# Load consolidation prompt template - go up one level from core/ to find prompts/
prompt_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'prompts', 'consolidation_analysis.txt')
with open(prompt_path, 'r', encoding='utf-8') as f:
return f.read()
try:
loop = asyncio.get_running_loop()
template = await loop.run_in_executor(None, _read_template)
return template.format(models_results=formatted_results)
except FileNotFoundError:
self.logger.error("Consolidation prompt template not found")
raise
except Exception as e:
self.logger.error(f"Error preparing consolidation prompt: {e}")
raise
def _load_consolidation_system_prompt(self) -> str:
"""Load system prompt for consolidation"""
return """You are an expert data consolidation specialist. Your task is to intelligently merge multiple LLM analysis results into the most complete and accurate dataset possible. Follow the consolidation strategy provided in the user prompt, with emphasis on completeness and thoroughness. Return only valid JSON in the specified format."""
def _save_consolidation_debug(self, consolidation_response, consolidated_data, analysis_responses):
"""Save debug information about failed consolidation"""
try:
import tempfile
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
debug_file = os.path.join(tempfile.gettempdir(), f"consolidation_debug_{timestamp}.json")
debug_info = {
"timestamp": timestamp,
"consolidation_model": consolidation_response.model_used,
"consolidation_provider": consolidation_response.provider,
"raw_content": consolidation_response.content,
"parsed_data": consolidated_data,
"response_success": consolidation_response.success,
"response_error": consolidation_response.error,
"token_usage": {
"input": consolidation_response.token_usage.input_tokens,
"output": consolidation_response.token_usage.output_tokens,
"total": consolidation_response.token_usage.get_total()
},
"primary_analysis_results": [
{
"provider": r.provider,
"model": r.model_used,
"success": r.success,
"deliverable_count": self._count_deliverables_in_response(r.content) if r.success else 0,
"content_preview": r.content[:500] if r.success else r.error
}
for r in analysis_responses
]
}
with open(debug_file, 'w') as f:
json.dump(debug_info, f, indent=2)
self.logger.error(f"[CONSOLIDATION] Debug info saved to: {debug_file}")
except Exception as e:
self.logger.error(f"[CONSOLIDATION] Failed to save debug info: {e}")
def _create_consolidation_metadata(
self,
analysis_responses: List[LLMResponse],
consolidation_response: LLMResponse,
base_deliverables: List[Any],
expanded_assets: List[Any]
) -> Dict[str, Any]:
"""Create metadata about the consolidation process"""
# Analyze model contributions
model_stats = {}
total_primary_tokens = 0
total_primary_cost = 0.0
for response in analysis_responses:
if response.success:
model_key = f"{response.provider}_{response.model_used}"
model_stats[model_key] = {
'tokens_used': response.token_usage.get_total(),
'processing_time': response.processing_time,
'success': True
}
total_primary_tokens += response.token_usage.get_total()
# Estimate cost for this response
try:
# Find the correct model key for this response
provider_model_key = None
for key in config.MODEL_MAPPINGS.keys():
provider_name, model_name = config.get_model_info(key)
if provider_name == response.provider and model_name == response.model_used:
provider_model_key = key
break
if provider_model_key:
provider = self.provider_manager.get_provider(provider_model_key)
cost = provider.estimate_cost(
response.token_usage.input_tokens,
response.token_usage.output_tokens,
response.token_usage.cached_input_tokens
)
total_primary_cost += cost
model_stats[model_key]['estimated_cost'] = cost
else:
model_stats[model_key]['estimated_cost'] = 0.0
except:
model_stats[model_key]['estimated_cost'] = 0.0
else:
model_key = f"{response.provider}_{response.model_used}"
model_stats[model_key] = {
'tokens_used': 0,
'processing_time': response.processing_time,
'success': False,
'error': response.error,
'estimated_cost': 0.0
}
# Consolidation model stats
consolidation_cost = 0.0
try:
# Find the correct model key for consolidation response
consolidation_model_key = None
for key in config.MODEL_MAPPINGS.keys():
provider_name, model_name = config.get_model_info(key)
if provider_name == consolidation_response.provider and model_name == consolidation_response.model_used:
consolidation_model_key = key
break
if consolidation_model_key:
provider = self.provider_manager.get_provider(consolidation_model_key)
consolidation_cost = provider.estimate_cost(
consolidation_response.token_usage.input_tokens,
consolidation_response.token_usage.output_tokens,
consolidation_response.token_usage.cached_input_tokens
)
except:
pass
return {
'consolidation_model': consolidation_response.model_used,
'consolidation_provider': consolidation_response.provider,
'primary_models_used': len([r for r in analysis_responses if r.success]),
'total_models_attempted': len(analysis_responses),
'base_deliverables_count': len(base_deliverables),
'final_assets_count': len(expanded_assets),
'model_statistics': model_stats,
'token_usage': {
'primary_analysis_total': total_primary_tokens,
'consolidation_tokens': consolidation_response.token_usage.get_total(),
'grand_total': total_primary_tokens + consolidation_response.token_usage.get_total()
},
'cost_breakdown': {
'primary_analysis_cost': round(total_primary_cost, 4),
'consolidation_cost': round(consolidation_cost, 4),
'total_cost': round(total_primary_cost + consolidation_cost, 4)
},
'processing_times': {
'consolidation_time': consolidation_response.processing_time,
'primary_models_avg_time': sum(r.processing_time for r in analysis_responses if r.success) / max(1, len([r for r in analysis_responses if r.success]))
}
}

View file

@ -0,0 +1,20 @@
"""
LLM Service module for Enhanced Brief Processing System
Provides abstracted access to multiple LLM providers
"""
from .base_provider import BaseLLMProvider, LLMResponse, TokenUsage
from .openai_provider import OpenAIProvider
from .google_provider import GoogleProvider
from .anthropic_provider import AnthropicProvider
from .provider_manager import ProviderManager
__all__ = [
'BaseLLMProvider',
'LLMResponse',
'TokenUsage',
'OpenAIProvider',
'GoogleProvider',
'AnthropicProvider',
'ProviderManager'
]

View file

@ -0,0 +1,375 @@
"""
Anthropic provider implementation for Claude Opus 4.5 and Sonnet 4.5
"""
import time
import json
import logging
from typing import List, Dict, Any, Optional
try:
from anthropic import AsyncAnthropic
anthropic = AsyncAnthropic # Keep reference for compatibility checks
except ImportError:
AsyncAnthropic = None
anthropic = None
from .base_provider import BaseLLMProvider, LLMResponse, TokenUsage
from ..config import config
class AnthropicProvider(BaseLLMProvider):
"""Anthropic Claude provider supporting Opus and Sonnet models"""
def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None, **kwargs):
if AsyncAnthropic is None:
raise ImportError("anthropic package not installed. Run: pip install anthropic>=0.67.0")
provider_config = config.get_provider_config('anthropic')
super().__init__(
api_key=api_key or provider_config['api_key'],
model_name=model_name or self._select_model(kwargs.get('model_variant', 'sonnet'), provider_config),
**kwargs
)
self.temperature = kwargs.get('temperature', provider_config['temperature'])
self.max_tokens = kwargs.get('max_tokens', provider_config['max_tokens'])
self.thinking_budget = kwargs.get('thinking_budget', provider_config['thinking_budget'])
self.timeout = kwargs.get('timeout', provider_config['timeout'])
self.client = None
self._setup_client()
def _select_model(self, variant: str, provider_config: Dict[str, Any]) -> str:
"""Select appropriate Claude model based on variant"""
if variant.lower() in ['opus', 'opus4', 'opus45']:
return provider_config['model_opus']
elif variant.lower() in ['sonnet', 'sonnet4', 'sonnet45']:
return provider_config['model_sonnet']
else:
# Default to Sonnet for better cost-performance ratio
return provider_config['model_sonnet']
def _setup_client(self):
"""Initialize AsyncAnthropic client"""
try:
self.client = AsyncAnthropic(
api_key=self.api_key,
timeout=self.timeout
)
self.logger.info(f"AsyncAnthropic client initialized - Model: {self.model_name}")
except Exception as e:
self.logger.error(f"Failed to initialize AsyncAnthropic client: {e}")
raise
async def generate_response(
self,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
**kwargs
) -> LLMResponse:
"""Generate response using Anthropic Claude"""
start_time = time.time()
# Determine if we need two-call architecture
if self.thinking_budget > 0 and schema:
self.logger.info(f"Anthropic Two-Call Request - Model: {self.model_name} (thinking: {self.thinking_budget} budget + schema)")
return await self._two_call_with_thinking(messages, schema, start_time, **kwargs)
else:
self.logger.info(f"Anthropic Single-Call Request - Model: {self.model_name}")
return await self._single_call(messages, schema, start_time, **kwargs)
async def _two_call_with_thinking(
self,
messages: List[Dict[str, str]],
schema: Dict[str, Any],
start_time: float,
**kwargs
) -> LLMResponse:
"""Execute two-call pattern: thinking analysis + schema formatting"""
try:
# Prepare messages for Anthropic
system_message, user_messages = self._prepare_messages(messages)
# === CALL A: Analysis with Thinking (No Forced Tools) ===
self.logger.info(" Call A: Analysis with thinking (no forced tools)")
# Enhance prompt with schema guidance for Call A
enhanced_messages = self._add_schema_guidance_to_messages(user_messages, schema)
call_a_params = {
'model': self.model_name,
'messages': enhanced_messages,
'max_tokens': self.max_tokens,
'temperature': self.temperature,
'thinking': {"type": "enabled", "budget_tokens": self.thinking_budget},
**kwargs
}
if system_message:
call_a_params['system'] = system_message
# Execute Call A (no tools, no tool_choice)
analysis_response = await self.client.messages.create(**call_a_params)
# Extract analysis text
analysis_text = self._extract_text_content(analysis_response.content)
if not analysis_text:
raise Exception("Call A produced no analysis text")
self.logger.info(f" Call A completed: {len(analysis_text)} chars analysis")
# === CALL B: Schema Formatting (No Thinking) ===
self.logger.info(" Call B: Schema formatting (no thinking)")
formatting_prompt = f"Convert the following analysis into the required JSON schema. Call extract_structured_data exactly once with the final result.\n\nAnalysis:\n{analysis_text}"
call_b_params = {
'model': self.model_name,
'messages': [{"role": "user", "content": formatting_prompt}],
'max_tokens': self.max_tokens,
'temperature': self.temperature,
'tools': [self._create_tool_from_schema(schema)],
'tool_choice': {"type": "tool", "name": "extract_structured_data"},
**kwargs
}
# Execute Call B (no thinking)
format_response = await self.client.messages.create(**call_b_params)
# Extract structured content from tool use
structured_content = self._extract_tool_response(format_response.content)
if not structured_content:
raise Exception("Call B failed to produce structured output")
self.logger.info(f" Call B completed: Structured JSON extracted")
# Combine token usage from both calls
combined_token_usage = TokenUsage()
if hasattr(analysis_response, 'usage'):
usage_dict_a = {
'input_tokens': getattr(analysis_response.usage, 'input_tokens', 0),
'output_tokens': getattr(analysis_response.usage, 'output_tokens', 0),
'cache_read_input_tokens': getattr(analysis_response.usage, 'cache_read_input_tokens', 0)
}
combined_token_usage.add_usage(usage_dict_a)
if hasattr(format_response, 'usage'):
usage_dict_b = {
'input_tokens': getattr(format_response.usage, 'input_tokens', 0),
'output_tokens': getattr(format_response.usage, 'output_tokens', 0),
'cache_read_input_tokens': getattr(format_response.usage, 'cache_read_input_tokens', 0)
}
combined_token_usage.add_usage(usage_dict_b)
processing_time = time.time() - start_time
return LLMResponse(
content=structured_content,
raw_response={'call_a': analysis_response, 'call_b': format_response},
token_usage=combined_token_usage,
model_used=self.model_name,
provider="anthropic",
success=True,
processing_time=processing_time
)
except Exception as e:
processing_time = time.time() - start_time
self.logger.error(f"Anthropic two-call request failed: {e}")
return LLMResponse(
content="",
raw_response=None,
token_usage=TokenUsage(),
model_used=self.model_name,
provider="anthropic",
success=False,
error=str(e),
processing_time=processing_time
)
async def _single_call(
self,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]],
start_time: float,
**kwargs
) -> LLMResponse:
"""Execute single-call pattern: existing behavior for when thinking=0 or no schema"""
try:
# Prepare messages for Anthropic
system_message, user_messages = self._prepare_messages(messages)
# Configure request parameters (no thinking or minimal thinking)
request_params = {
'model': self.model_name,
'messages': user_messages,
'max_tokens': self.max_tokens,
'temperature': self.temperature,
**kwargs
}
# Add thinking only if no schema (to avoid conflict)
if not schema and self.thinking_budget > 0:
request_params['thinking'] = {"type": "enabled", "budget_tokens": self.thinking_budget}
if system_message:
request_params['system'] = system_message
# Handle structured output using tools if schema provided
if schema:
request_params['tools'] = [self._create_tool_from_schema(schema)]
request_params['tool_choice'] = {"type": "tool", "name": "extract_structured_data"}
# Generate response using async client
response = await self.client.messages.create(**request_params)
# Extract content
if schema and response.content:
# Look for tool use in response
content = self._extract_tool_response(response.content)
else:
content = response.content[0].text if response.content else ""
# Extract token usage
token_usage = TokenUsage()
if hasattr(response, 'usage'):
usage_dict = {
'input_tokens': getattr(response.usage, 'input_tokens', 0),
'output_tokens': getattr(response.usage, 'output_tokens', 0),
'cached_input_tokens': getattr(response.usage, 'cache_read_input_tokens', 0)
}
token_usage.add_usage(usage_dict)
processing_time = time.time() - start_time
llm_response = LLMResponse(
content=content,
raw_response=response,
token_usage=token_usage,
model_used=self.model_name,
provider="anthropic",
success=True,
processing_time=processing_time
)
self.log_response(llm_response)
return llm_response
except Exception as e:
processing_time = time.time() - start_time
self.logger.error(f"Anthropic single-call request failed: {e}")
return LLMResponse(
content="",
raw_response=None,
token_usage=TokenUsage(),
model_used=self.model_name,
provider="anthropic",
success=False,
error=str(e),
processing_time=processing_time
)
def _add_schema_guidance_to_messages(self, user_messages: List[Dict[str, str]], schema: Dict[str, Any]) -> List[Dict[str, str]]:
"""Add schema guidance to the last user message for Call A"""
enhanced_messages = user_messages.copy()
# Get schema description
schema_description = schema.get('description', 'structured data')
# Add schema guidance to last message
if enhanced_messages:
last_message = enhanced_messages[-1]
original_content = last_message['content']
schema_guidance = f"\n\nPlease analyze this document and provide your findings according to this schema structure: {schema_description}. Focus on extracting base deliverables with multiplier arrays as specified in the schema."
enhanced_messages[-1] = {
'role': last_message['role'],
'content': original_content + schema_guidance
}
return enhanced_messages
def _extract_text_content(self, content: List[Any]) -> str:
"""Extract text content from Anthropic response, ignoring thinking blocks"""
text_content = ""
for block in content:
if hasattr(block, 'type') and block.type == 'text':
text_content += block.text
return text_content.strip()
def _prepare_messages(self, messages: List[Dict[str, str]]) -> tuple:
"""Separate system messages from user/assistant messages for Anthropic format"""
system_message = None
user_messages = []
for message in messages:
if message['role'] == 'system':
system_message = message['content']
else:
user_messages.append({
'role': message['role'],
'content': message['content']
})
return system_message, user_messages
def _create_tool_from_schema(self, schema: Dict[str, Any]) -> Dict[str, Any]:
"""Convert JSON schema to Anthropic tool format for structured output"""
# Extract schema definition
schema_def = schema.get('schema', schema)
return {
"name": "extract_structured_data",
"description": schema.get('description', 'Extract structured data from the document'),
"input_schema": schema_def
}
def _extract_tool_response(self, content: List[Any]) -> str:
"""Extract structured data from tool use response"""
for block in content:
if hasattr(block, 'type') and block.type == 'tool_use':
return json.dumps(block.input)
# Fallback to text content
text_content = ""
for block in content:
if hasattr(block, 'type') and block.type == 'text':
text_content += block.text
return text_content
def validate_config(self) -> bool:
"""Validate Anthropic configuration"""
if not self.api_key or self.api_key == 'your-anthropic-api-key-here':
self.logger.error("Anthropic API key not configured")
return False
if AsyncAnthropic is None:
self.logger.error("anthropic package not installed")
return False
return True
def estimate_cost(self, input_tokens: int, output_tokens: int, cached_tokens: int = 0) -> float:
"""Estimate cost using Anthropic pricing"""
if 'opus' in self.model_name.lower():
return config.estimate_cost('anthropic-opus45', input_tokens, output_tokens, cached_tokens)
else:
return config.estimate_cost('anthropic-sonnet45', input_tokens, output_tokens, cached_tokens)
def get_max_tokens(self) -> int:
"""Get maximum token limit for Claude models"""
return 200000 # Claude 3 context window
def get_model_variant(self) -> str:
"""Get the model variant (opus or sonnet)"""
if 'opus' in self.model_name.lower():
return 'opus'
else:
return 'sonnet'

View file

@ -0,0 +1,116 @@
"""
Base provider class for LLM service abstraction
Defines common interface that all providers must implement
"""
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional, Union
from dataclasses import dataclass
from enum import Enum
import logging
class ModelType(Enum):
GPT51 = "gpt-5.1"
CLAUDE_OPUS = "claude-opus-4-5"
CLAUDE_SONNET = "claude-sonnet-4-5"
GEMINI_PRO = "gemini-3.1-pro"
@dataclass
class TokenUsage:
"""Token usage tracking across different providers"""
input_tokens: int = 0
output_tokens: int = 0
cached_input_tokens: int = 0
def add_usage(self, usage_dict: Dict[str, int]):
"""Add token usage from provider response"""
# Safely handle potential None values
input_tokens = usage_dict.get('input_tokens') or usage_dict.get('prompt_tokens') or 0
output_tokens = usage_dict.get('output_tokens') or usage_dict.get('completion_tokens') or 0
cached_tokens = usage_dict.get('cached_input_tokens') or usage_dict.get('prompt_tokens_cached') or 0
self.input_tokens += input_tokens
self.output_tokens += output_tokens
self.cached_input_tokens += cached_tokens
def get_total(self) -> int:
"""Get total token count"""
return self.input_tokens + self.output_tokens + self.cached_input_tokens
@dataclass
class LLMResponse:
"""Standardized response format across all providers"""
content: str
raw_response: Any
token_usage: TokenUsage
model_used: str
provider: str
success: bool = True
error: Optional[str] = None
processing_time: float = 0.0
class BaseLLMProvider(ABC):
"""Abstract base class for all LLM providers"""
def __init__(self, api_key: str, model_name: str, **kwargs):
self.api_key = api_key
self.model_name = model_name
self.config = kwargs
self.logger = logging.getLogger(f"{self.__class__.__name__}")
@abstractmethod
async def generate_response(
self,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
**kwargs
) -> LLMResponse:
"""
Generate response from the LLM provider
Args:
messages: List of message dictionaries with 'role' and 'content'
schema: Optional JSON schema for structured output
**kwargs: Provider-specific parameters
Returns:
LLMResponse object with standardized format
"""
pass
@abstractmethod
def validate_config(self) -> bool:
"""Validate provider configuration"""
pass
@abstractmethod
def estimate_cost(self, input_tokens: int, output_tokens: int) -> float:
"""Estimate cost for token usage"""
pass
@abstractmethod
def get_max_tokens(self) -> int:
"""Get maximum token limit for this provider/model"""
pass
def get_provider_name(self) -> str:
"""Get provider name"""
return self.__class__.__name__.replace('Provider', '').lower()
def prepare_messages(self, system_prompt: str, user_prompt: str) -> List[Dict[str, str]]:
"""Prepare messages in standard format"""
return [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
def log_response(self, response: LLMResponse, request_info: str = ""):
"""Log response details"""
self.logger.info(
f"{self.get_provider_name().title()} Response - "
f"Model: {response.model_used}, "
f"Tokens: {response.token_usage.input_tokens} input / {response.token_usage.output_tokens} output, "
f"Time: {response.processing_time:.2f}s, "
f"Success: {response.success}"
+ (f", Request: {request_info}" if request_info else "")
)

View file

@ -0,0 +1,256 @@
"""
Google provider implementation for Gemini 2.5 Pro using the new google-genai SDK
"""
import time
import json
import logging
from typing import List, Dict, Any, Optional
try:
from google import genai
from google.genai.types import GenerateContentConfig, ThinkingConfig
except ImportError:
genai = None
GenerateContentConfig = None
ThinkingConfig = None
from .base_provider import BaseLLMProvider, LLMResponse, TokenUsage
from ..config import config
class GoogleProvider(BaseLLMProvider):
"""Google Gemini 2.5 Pro provider using new google-genai SDK"""
def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None, **kwargs):
if genai is None:
raise ImportError("google-genai package not installed. Run: pip install google-genai")
provider_config = config.get_provider_config('google')
super().__init__(
api_key=api_key or provider_config['api_key'],
model_name=model_name or provider_config['model'],
**kwargs
)
self.temperature = kwargs.get('temperature', provider_config['temperature'])
self.max_output_tokens = kwargs.get('max_output_tokens', provider_config['max_output_tokens'])
self.thinking_budget = kwargs.get('thinking_budget', provider_config['thinking_budget'])
self.timeout = kwargs.get('timeout', provider_config['timeout'])
self.client = None
self._setup_client()
def _setup_client(self):
"""Initialize Google GenAI client"""
try:
self.client = genai.Client(api_key=self.api_key)
self.logger.info(f"Google GenAI client initialized - Model: {self.model_name}")
except Exception as e:
self.logger.error(f"Failed to initialize Google GenAI client: {e}")
raise
async def generate_response(
self,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
**kwargs
) -> LLMResponse:
"""Generate response using Google Gemini 2.5 Pro"""
start_time = time.time()
try:
self.logger.info(f"Google Request - Model: {self.model_name} (thinking enabled: {self.thinking_budget} budget)")
# Convert messages to Google format
content = self._prepare_content(messages)
# Configure generation with thinking capabilities
config_dict = {
'temperature': self.temperature,
'max_output_tokens': self.max_output_tokens,
'thinking_config': ThinkingConfig(thinking_budget=self.thinking_budget) if ThinkingConfig else None,
}
# Add JSON schema for structured output if provided
if schema:
config_dict['response_mime_type'] = 'application/json'
converted_schema = self._convert_schema_to_google_format(schema)
# Google GenAI SDK expects response_schema, not response_json_schema
config_dict['response_schema'] = converted_schema
self.logger.info("Using structured output with converted schema")
generation_config = GenerateContentConfig(**config_dict)
# Generate response using native async API
response = await self.client.aio.models.generate_content(
model=self.model_name,
contents=content,
config=generation_config
)
# Extract content
if hasattr(response, 'text'):
content = response.text
elif hasattr(response, 'candidates') and response.candidates:
content = response.candidates[0].content.parts[0].text
else:
content = str(response)
# Extract token usage
token_usage = TokenUsage()
if hasattr(response, 'usage_metadata'):
# Safely extract token counts with proper defaults
input_tokens = getattr(response.usage_metadata, 'prompt_token_count', None) or 0
output_tokens = getattr(response.usage_metadata, 'candidates_token_count', None) or 0
cached_tokens = getattr(response.usage_metadata, 'cached_content_token_count', None) or 0
usage_dict = {
'input_tokens': input_tokens,
'output_tokens': output_tokens,
'cached_input_tokens': cached_tokens
}
self.logger.debug(f"Google token usage: {usage_dict}")
token_usage.add_usage(usage_dict)
else:
self.logger.warning("No usage_metadata found in Google response")
processing_time = time.time() - start_time
llm_response = LLMResponse(
content=content,
raw_response=response,
token_usage=token_usage,
model_used=self.model_name,
provider="google",
success=True,
processing_time=processing_time
)
self.log_response(llm_response)
return llm_response
except Exception as e:
processing_time = time.time() - start_time
self.logger.error(f"Google request failed: {e}")
return LLMResponse(
content="",
raw_response=None,
token_usage=TokenUsage(),
model_used=self.model_name,
provider="google",
success=False,
error=str(e),
processing_time=processing_time
)
def _prepare_content(self, messages: List[Dict[str, str]]) -> List[Dict[str, Any]]:
"""Convert standard messages to Google GenAI format"""
contents = []
for message in messages:
role = message['role']
text = message['content']
# Map roles to Google format
if role == 'system':
# System messages go into parts directly
contents.append({
'role': 'user', # Google doesn't have explicit system role
'parts': [{'text': f"System: {text}"}]
})
elif role == 'user':
contents.append({
'role': 'user',
'parts': [{'text': text}]
})
elif role == 'assistant':
contents.append({
'role': 'model',
'parts': [{'text': text}]
})
return contents
def _convert_schema_to_google_format(self, schema: Dict[str, Any]) -> Dict[str, Any]:
"""Convert OpenAI JSON schema to Google GenAI format"""
def convert_type(openai_type: str) -> str:
"""Convert OpenAI type to Google GenAI type"""
type_mapping = {
'string': 'STRING',
'array': 'ARRAY',
'object': 'OBJECT',
'integer': 'INTEGER',
'number': 'NUMBER',
'boolean': 'BOOLEAN'
}
return type_mapping.get(openai_type.lower(), 'STRING')
def convert_schema_node(node):
if isinstance(node, dict):
converted = {}
for key, value in node.items():
if key == 'type':
# Convert type to Google format
converted['type'] = convert_type(value)
elif key == 'oneOf':
# Google doesn't support oneOf - use the string type option
if isinstance(value, list) and len(value) > 0:
string_option = next((item for item in value if item.get('type') == 'string'), value[0])
return convert_schema_node(string_option)
elif key == 'items':
# Convert array items
converted['items'] = convert_schema_node(value)
elif key == 'properties':
# Convert object properties
converted['properties'] = {}
for prop_name, prop_schema in value.items():
converted['properties'][prop_name] = convert_schema_node(prop_schema)
elif key == 'required':
# Keep required fields as-is
converted['required'] = value
elif key == 'additionalProperties':
# Skip additionalProperties - not supported by Gemini API
self.logger.debug(f"Skipping unsupported 'additionalProperties' in Google schema")
continue
elif key in ['description', 'title']:
# Keep description and title
converted[key] = value
# Skip other OpenAI-specific fields like 'name'
return converted
elif isinstance(node, list):
return [convert_schema_node(item) for item in node]
else:
return node
# Extract the actual schema from OpenAI format
if 'schema' in schema:
google_schema = convert_schema_node(schema['schema'])
else:
google_schema = convert_schema_node(schema)
return google_schema
def validate_config(self) -> bool:
"""Validate Google configuration"""
if not self.api_key or self.api_key == 'your-google-api-key-here':
self.logger.error("Google API key not configured")
return False
if genai is None:
self.logger.error("google-genai package not installed")
return False
return True
def estimate_cost(self, input_tokens: int, output_tokens: int, cached_tokens: int = 0) -> float:
"""Estimate cost using Google Gemini pricing"""
return config.estimate_cost('google-gemini31', input_tokens, output_tokens, cached_tokens)
def get_max_tokens(self) -> int:
"""Get maximum token limit for Gemini 3.1 Pro"""
return 2000000 # Gemini 3.1 Pro context window

View file

@ -0,0 +1,309 @@
"""
OpenAI provider implementation for GPT-5 with reasoning effort support
"""
import time
import json
import logging
from typing import List, Dict, Any, Optional
from openai import AsyncOpenAI
from pydantic import BaseModel
from .base_provider import BaseLLMProvider, LLMResponse, TokenUsage
from ..config import config
class OpenAIProvider(BaseLLMProvider):
"""OpenAI GPT-5 provider with reasoning effort support"""
def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None, **kwargs):
provider_config = config.get_provider_config('openai')
super().__init__(
api_key=api_key or provider_config['api_key'],
model_name=model_name or provider_config['model'],
**kwargs
)
self.reasoning_effort = kwargs.get('reasoning_effort', provider_config['reasoning_effort'])
self.timeout = kwargs.get('timeout', provider_config['timeout'])
self.max_retries = kwargs.get('max_retries', provider_config['max_retries'])
self.client = None
self._setup_client()
def _setup_client(self):
"""Initialize AsyncOpenAI client with configuration"""
try:
self.client = AsyncOpenAI(
api_key=self.api_key,
timeout=self.timeout,
max_retries=self.max_retries
)
self.logger.info(f"AsyncOpenAI client initialized - Model: {self.model_name}, Reasoning: {self.reasoning_effort}")
except Exception as e:
self.logger.error(f"Failed to initialize AsyncOpenAI client: {e}")
raise
async def generate_response(
self,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
**kwargs
) -> LLMResponse:
"""Generate response using OpenAI GPT-5 with reasoning effort"""
start_time = time.time()
try:
self.logger.info(f"OpenAI Request - Model: {self.model_name}, Reasoning: {self.reasoning_effort}")
if schema:
# Use structured output with Pydantic model
stage_tag = "[CONSOLIDATION]" if "MODELS' ANALYSIS RESULTS" in str(messages) else "[INITIAL]"
self.logger.info(f"{stage_tag} Using structured output with schema: {schema.get('name', 'unknown')}")
schema_model = self._create_pydantic_model(schema)
self.logger.debug(f"{stage_tag} Created Pydantic model: {schema_model.__name__}")
response = await self.client.responses.parse(
model=self.model_name,
input=messages,
reasoning={"effort": self.reasoning_effort},
text_format=schema_model
)
# Extract structured content
if hasattr(response, 'output_parsed') and response.output_parsed is not None:
try:
# Extract JSON from Pydantic model
content = response.output_parsed.model_dump_json()
# Validate the content has expected structure
try:
parsed_content = json.loads(content)
if not isinstance(parsed_content, dict):
self.logger.error(f"{stage_tag} Structured output is not a dict: {type(parsed_content)}")
raise ValueError("Expected dict structure")
if 'assets' not in parsed_content:
# PROBLEM DETECTED - Log everything verbosely
self.logger.error(f"{stage_tag} ========== MISSING 'assets' KEY - VERBOSE DEBUG ==========")
self.logger.error(f"{stage_tag} Response type: {type(response).__name__}")
self.logger.error(f"{stage_tag} Has output_parsed: {hasattr(response, 'output_parsed')}")
self.logger.error(f"{stage_tag} output_parsed type: {type(response.output_parsed)}")
self.logger.error(f"{stage_tag} Raw output_parsed value: {response.output_parsed}")
self.logger.error(f"{stage_tag} Extracted JSON length: {len(content)} chars")
self.logger.error(f"{stage_tag} Full JSON content: {content}")
self.logger.error(f"{stage_tag} Parsed data keys: {list(parsed_content.keys())}")
self.logger.error(f"{stage_tag} Full parsed content: {parsed_content}")
# Try to fix common issues
if not parsed_content: # Empty dict
self.logger.warning(f"{stage_tag} Got empty dict, creating default structure")
content = json.dumps({"assets": []})
self.logger.info(f"{stage_tag} Fixed content: {content}")
else:
# Save to file and fail
self._save_debug_response(response, content, stage_tag)
raise KeyError("Missing assets key")
else:
# SUCCESS - Just log summary
assets_count = len(parsed_content.get('assets', []))
self.logger.info(f"{stage_tag} Structured output validated: {assets_count} assets")
except json.JSONDecodeError as je:
self.logger.error(f"Failed to parse structured output as JSON: {je}")
self.logger.error(f"Raw structured content: {content[:500]}...")
raise
except Exception as e:
self.logger.error(f"Error processing structured output: {e}")
self.logger.error(f"Raw response object: {str(response)[:500]}...")
raise
else:
self.logger.error(f"{stage_tag} No structured output found in response (output_parsed is None)")
self.logger.error(f"{stage_tag} Response attributes: {dir(response)}")
# Save debug info
self._save_debug_response(response, None, stage_tag)
# Fallback to raw response content if available
if hasattr(response, 'choices') and response.choices:
fallback_content = response.choices[0].message.content
self.logger.warning(f"{stage_tag} Using fallback content from choices: {len(fallback_content) if fallback_content else 0} chars")
# Try to parse the fallback content as JSON
if fallback_content:
try:
parsed = json.loads(fallback_content)
content = fallback_content
self.logger.info(f"{stage_tag} Successfully parsed fallback content as JSON")
except json.JSONDecodeError:
self.logger.error(f"{stage_tag} Fallback content is not valid JSON: {fallback_content[:500]}")
content = json.dumps({"assets": []}) # Empty default
else:
self.logger.warning(f"{stage_tag} No fallback content, using empty assets array")
content = json.dumps({"assets": []}) # Empty default
else:
self.logger.error(f"{stage_tag} No fallback content available in response")
self.logger.error(f"{stage_tag} Response has choices: {hasattr(response, 'choices')}")
content = json.dumps({"assets": []}) # Empty default structure
else:
# Use regular chat completion
response = await self.client.chat.completions.create(
model=self.model_name,
messages=messages,
**kwargs
)
content = response.choices[0].message.content
# Extract token usage
token_usage = TokenUsage()
if hasattr(response, 'usage'):
usage_dict = {
'input_tokens': getattr(response.usage, 'input_tokens', getattr(response.usage, 'prompt_tokens', 0)),
'output_tokens': getattr(response.usage, 'output_tokens', getattr(response.usage, 'completion_tokens', 0)),
'cached_input_tokens': getattr(response.usage, 'input_tokens_cached', getattr(response.usage, 'prompt_tokens_cached', 0))
}
token_usage.add_usage(usage_dict)
processing_time = time.time() - start_time
llm_response = LLMResponse(
content=content,
raw_response=response,
token_usage=token_usage,
model_used=self.model_name,
provider="openai",
success=True,
processing_time=processing_time
)
self.log_response(llm_response, f"Reasoning: {self.reasoning_effort}")
return llm_response
except Exception as e:
processing_time = time.time() - start_time
self.logger.error(f"OpenAI request failed: {e}")
return LLMResponse(
content="",
raw_response=None,
token_usage=TokenUsage(),
model_used=self.model_name,
provider="openai",
success=False,
error=str(e),
processing_time=processing_time
)
def _create_pydantic_model(self, schema: Dict[str, Any]) -> BaseModel:
"""Create Pydantic model from JSON schema for structured output"""
try:
# For base deliverable extraction, we can use the existing models
from ..process_brief_enhanced import BaseExtractionResult
return BaseExtractionResult
except ImportError as e:
self.logger.warning(f"Failed to import BaseExtractionResult: {e}, using dynamic model")
# Fallback: create dynamic model with proper nested structure
from pydantic import create_model
# Handle nested schema structure properly
try:
# Create dynamic models for nested structures
schema_props = schema.get('schema', {}).get('properties', {})
# Handle the assets array specifically
if 'assets' in schema_props:
assets_def = schema_props['assets']
if assets_def.get('type') == 'array':
item_def = assets_def.get('items', {})
item_props = item_def.get('properties', {})
# Create fields for the asset item model
asset_fields = {}
for field_name, field_def in item_props.items():
if field_def.get('type') == 'array':
asset_fields[field_name] = (Optional[List[str]], [])
else:
asset_fields[field_name] = (Optional[str], "")
# Create the asset item model
AssetModel = create_model('DynamicAssetModel', **asset_fields)
# Create the main response model with assets array
return create_model('DynamicResponseModel', assets=(List[AssetModel], ...))
# Fallback to simple structure
fields = {'assets': (List[Any], ...)}
return create_model('DynamicModel', **fields)
except Exception as schema_error:
self.logger.error(f"Failed to create dynamic model from schema: {schema_error}")
# Ultimate fallback
return create_model('FallbackModel', assets=(List[Any], ...))
def validate_config(self) -> bool:
"""Validate OpenAI configuration"""
if not self.api_key or self.api_key == 'your-openai-api-key-here':
self.logger.error("OpenAI API key not configured")
return False
if self.reasoning_effort not in ['high', 'medium', 'low', 'minimal']:
self.logger.warning(f"Invalid reasoning effort: {self.reasoning_effort}, using 'medium'")
self.reasoning_effort = 'medium'
return True
def estimate_cost(self, input_tokens: int, output_tokens: int, cached_tokens: int = 0) -> float:
"""Estimate cost using OpenAI GPT-5.1 pricing"""
return config.estimate_cost('openai-gpt51', input_tokens, output_tokens, cached_tokens)
def get_max_tokens(self) -> int:
"""Get maximum token limit for GPT-5.1"""
return 200000 # GPT-5.1 context window
def set_reasoning_effort(self, effort: str):
"""Update reasoning effort setting"""
if effort in ['high', 'medium', 'low', 'minimal']:
self.reasoning_effort = effort
self.logger.info(f"Updated reasoning effort to: {effort}")
else:
self.logger.warning(f"Invalid reasoning effort: {effort}, keeping current: {self.reasoning_effort}")
def _save_debug_response(self, response, content, stage_tag):
"""Save debug information about problematic responses"""
try:
import tempfile
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
debug_file = os.path.join(tempfile.gettempdir(), f"openai_debug_{stage_tag.strip('[]')}_{timestamp}.txt")
with open(debug_file, 'w') as f:
f.write(f"=== OpenAI Response Debug {stage_tag} ===\n")
f.write(f"Timestamp: {timestamp}\n")
f.write(f"Model: {self.model_name}\n")
f.write(f"Reasoning: {self.reasoning_effort}\n\n")
f.write("=== Response Object ===\n")
f.write(f"Type: {type(response)}\n")
f.write(f"Dir: {dir(response)}\n\n")
if hasattr(response, 'output_parsed'):
f.write(f"output_parsed: {response.output_parsed}\n")
f.write(f"output_parsed type: {type(response.output_parsed)}\n\n")
if hasattr(response, 'choices'):
f.write(f"Has choices: {len(response.choices) if response.choices else 0}\n")
if response.choices:
f.write(f"choices[0]: {response.choices[0]}\n\n")
f.write("=== Extracted Content ===\n")
f.write(f"Content: {content}\n\n")
f.write("=== Full Response ===\n")
f.write(f"{response}\n")
self.logger.error(f"{stage_tag} Debug info saved to: {debug_file}")
except Exception as e:
self.logger.error(f"{stage_tag} Failed to save debug info: {e}")

View file

@ -0,0 +1,293 @@
"""
Provider manager for coordinating parallel execution across multiple LLM providers
"""
import asyncio
import logging
from typing import List, Dict, Any, Optional, Tuple
import time
from .base_provider import BaseLLMProvider, LLMResponse, TokenUsage
from .openai_provider import OpenAIProvider
from .google_provider import GoogleProvider
from .anthropic_provider import AnthropicProvider
from ..config import config
class ProviderManager:
"""Manages multiple LLM providers and coordinates parallel execution"""
def __init__(self):
self.providers: Dict[str, BaseLLMProvider] = {}
self.logger = logging.getLogger(self.__class__.__name__)
def create_provider(self, model_key: str) -> BaseLLMProvider:
"""Create provider instance for given model key"""
try:
provider_name, model_name = config.get_model_info(model_key)
if provider_name == 'openai':
return OpenAIProvider(model_name=model_name)
elif provider_name == 'google':
return GoogleProvider(model_name=model_name)
elif provider_name == 'anthropic':
# Extract variant from model key for Anthropic
variant = 'opus' if 'opus' in model_key else 'sonnet'
return AnthropicProvider(model_name=model_name, model_variant=variant)
else:
raise ValueError(f"Unknown provider: {provider_name}")
except Exception as e:
self.logger.error(f"Failed to create provider for {model_key}: {e}")
raise
def get_provider(self, model_key: str) -> BaseLLMProvider:
"""Get or create provider for model key"""
if model_key not in self.providers:
self.providers[model_key] = self.create_provider(model_key)
return self.providers[model_key]
async def execute_parallel_analysis(
self,
model_keys: List[str],
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
minimum_success_threshold: int = 1,
on_model_event: Optional[callable] = None
) -> Tuple[List[LLMResponse], Dict[str, Any]]:
"""
Execute analysis across multiple models in parallel
Args:
model_keys: List of model identifiers to use
messages: Messages to send to all models
schema: Optional JSON schema for structured output
minimum_success_threshold: Minimum number of successful responses required
on_model_event: Optional callback for model start/end events
Returns:
Tuple of (successful_responses, metadata)
"""
self.logger.info(f"Starting parallel analysis with models: {model_keys}")
start_time = time.time()
# Validate model keys
valid_model_keys = []
for model_key in model_keys:
try:
provider = self.get_provider(model_key)
if provider.validate_config():
valid_model_keys.append(model_key)
else:
self.logger.warning(f"Skipping {model_key} due to configuration issues")
except Exception as e:
self.logger.error(f"Failed to validate {model_key}: {e}")
if len(valid_model_keys) == 0:
raise ValueError("No valid models available for analysis")
if len(valid_model_keys) < minimum_success_threshold:
self.logger.warning(
f"Only {len(valid_model_keys)} valid models, but minimum threshold is {minimum_success_threshold}"
)
# Create tasks for parallel execution
tasks = []
for model_key in valid_model_keys:
provider = self.get_provider(model_key)
task = asyncio.create_task(
self._execute_with_provider(provider, model_key, messages, schema, on_model_event)
)
tasks.append((model_key, task))
# Execute all tasks in parallel using asyncio.gather
results = []
successful_responses = []
failed_responses = []
# Await all tasks simultaneously
task_results = await asyncio.gather(*[task for _, task in tasks], return_exceptions=True)
# Process results
for i, (model_key, task) in enumerate(tasks):
result = task_results[i]
if isinstance(result, Exception):
self.logger.error(f"Task for {model_key} raised exception: {result}")
failed_responses.append((model_key, str(result)))
else:
response = result
results.append((model_key, response))
if response.success:
successful_responses.append(response)
# Try to parse the response to count deliverables
deliverable_count = self._count_deliverables_in_response(response.content)
self.logger.info(f"{model_key} analysis completed successfully - found {deliverable_count} deliverables")
else:
failed_responses.append((model_key, response.error))
self.logger.warning(f"{model_key} analysis failed: {response.error}")
total_time = time.time() - start_time
# Check if we meet minimum success threshold
if len(successful_responses) < minimum_success_threshold:
raise RuntimeError(
f"Only {len(successful_responses)} models succeeded, "
f"but minimum threshold is {minimum_success_threshold}"
)
# Compile metadata
metadata = {
'total_models_requested': len(model_keys),
'valid_models': len(valid_model_keys),
'successful_models': len(successful_responses),
'failed_models': len(failed_responses),
'total_processing_time': total_time,
'model_results': {
model_key: {
'success': response.success,
'processing_time': response.processing_time,
'tokens_used': response.token_usage.get_total(),
'provider': response.provider,
'model': response.model_used,
'error': response.error
} for model_key, response in results
},
'failures': failed_responses
}
self.logger.info(
f"Parallel analysis completed - {len(successful_responses)}/{len(valid_model_keys)} "
f"models succeeded in {total_time:.2f}s"
)
return successful_responses, metadata
def _count_deliverables_in_response(self, content: str) -> int:
"""Count the number of deliverables in a model's JSON response"""
try:
import json
data = json.loads(content)
if isinstance(data, dict) and 'assets' in data:
return len(data['assets'])
return 0
except (json.JSONDecodeError, KeyError, TypeError):
return 0
async def _execute_with_provider(
self,
provider: BaseLLMProvider,
model_key: str,
messages: List[Dict[str, str]],
schema: Optional[Dict[str, Any]] = None,
on_model_event: Optional[callable] = None
) -> LLMResponse:
"""Execute analysis with a single provider"""
import time
from datetime import datetime
try:
self.logger.debug(f"Starting analysis with {model_key}")
# Notify start event
if on_model_event:
await on_model_event(model_key, 'start', {
'timestamp': datetime.utcnow().isoformat()
})
start_time = time.time()
response = await provider.generate_response(messages, schema)
processing_time = time.time() - start_time
# Calculate cost if possible
cost = 0.0
try:
cost = provider.estimate_cost(
response.token_usage.input_tokens,
response.token_usage.output_tokens,
response.token_usage.cached_input_tokens
)
except:
pass
# Notify success event
if on_model_event:
await on_model_event(model_key, 'end', {
'response': response,
'cost': cost,
'processing_time': processing_time,
'timestamp': datetime.utcnow().isoformat()
})
return response
except Exception as e:
self.logger.error(f"Provider {model_key} execution failed: {e}")
# Notify error event
if on_model_event:
await on_model_event(model_key, 'end', {
'error': str(e),
'timestamp': datetime.utcnow().isoformat()
})
return LLMResponse(
content="",
raw_response=None,
token_usage=TokenUsage(),
model_used=model_key,
provider=provider.get_provider_name(),
success=False,
error=str(e)
)
def estimate_total_cost(self, model_keys: List[str], estimated_input_tokens: int, estimated_output_tokens: int) -> Dict[str, float]:
"""Estimate total cost for all models"""
cost_breakdown = {}
total_cost = 0.0
for model_key in model_keys:
try:
provider = self.get_provider(model_key)
model_cost = provider.estimate_cost(estimated_input_tokens, estimated_output_tokens)
cost_breakdown[model_key] = model_cost
total_cost += model_cost
except Exception as e:
self.logger.warning(f"Could not estimate cost for {model_key}: {e}")
cost_breakdown[model_key] = 0.0
cost_breakdown['total'] = total_cost
return cost_breakdown
def get_aggregated_token_usage(self, responses: List[LLMResponse]) -> TokenUsage:
"""Aggregate token usage from multiple responses"""
total_usage = TokenUsage()
for response in responses:
total_usage.input_tokens += response.token_usage.input_tokens
total_usage.output_tokens += response.token_usage.output_tokens
total_usage.cached_input_tokens += response.token_usage.cached_input_tokens
return total_usage
def get_actual_cost_breakdown(self, responses: List[LLMResponse]) -> Dict[str, float]:
"""Calculate actual costs from responses"""
cost_breakdown = {}
total_cost = 0.0
for response in responses:
try:
provider = self.providers.get(response.model_used)
if provider:
cost = provider.estimate_cost(
response.token_usage.input_tokens,
response.token_usage.output_tokens,
response.token_usage.cached_input_tokens
)
cost_breakdown[response.model_used] = cost
total_cost += cost
except Exception as e:
self.logger.warning(f"Could not calculate cost for {response.model_used}: {e}")
cost_breakdown['total'] = total_cost
return cost_breakdown

File diff suppressed because it is too large Load diff

View file

View file

View file

25
backend/hypercorn.toml Executable file
View file

@ -0,0 +1,25 @@
[application]
module = "server.app:create_app()"
[server]
bind = ["0.0.0.0:8000"]
workers = 2
worker_class = "asyncio"
[websockets]
ping_interval = 30
ping_timeout = 10
[timeouts]
keep_alive = 5
graceful_timeout = 30
[logging]
access_log = "-"
error_log = "-"
log_level = "info"
[ssl]
# Enable for production
# certfile = "path/to/cert.pem"
# keyfile = "path/to/key.pem"

56
backend/prompts/README.md Executable file
View file

@ -0,0 +1,56 @@
# AI Prompts Directory
This directory contains the AI prompts used by the Enhanced Brief Processing System, extracted from the main Python code for better maintainability and editability.
## Files Overview
### Core Analysis Prompts
- **`multi_perspective_analysis.txt`** - Main prompt for extracting marketing deliverables from documents
- Used in `_perform_multi_perspective_analysis()` method
- Contains comprehensive extraction rules and multiplier handling logic
- Template parameter: `{doc_type}` (e.g., "powerpoint", "word", "pdf")
- **`validation_analysis.txt`** - Quality assurance prompt for validating extractions
- Used in `_enhance_and_validate_results()` method
- Validates completeness and accuracy of initial extraction
- Template parameters: `{asset_count}`, `{doc_type}`
### System Messages
- **`system_multi_perspective.txt`** - System message for main analysis
- **`system_validation.txt`** - System message for validation phase
## Usage in Code
The prompts are loaded dynamically using the `_load_prompt()` method in the `DocumentAnalyzer` class:
```python
# Load and format prompts
prompt_template = self._load_prompt('multi_perspective_analysis')
prompt = prompt_template.format(doc_type=doc_type.value)
system_message = self._load_prompt('system_multi_perspective')
```
## Benefits of External Prompts
1. **Easy Editing** - Modify prompts without touching Python code
2. **Version Control** - Track prompt changes separately from code changes
3. **Readability** - View full prompts in text editors with proper formatting
4. **Collaboration** - Non-programmers can review and modify prompts
5. **Testing** - Easier to A/B test different prompt variations
## Template Variables
### multi_perspective_analysis.txt
- `{doc_type}` - Document type (powerpoint, word, pdf, excel)
### validation_analysis.txt
- `{asset_count}` - Number of assets found in initial extraction
- `{doc_type}` - Document type from metadata
## Modifying Prompts
1. Edit the `.txt` files directly
2. Use standard Python string formatting for variables: `{variable_name}`
3. Test changes by running the processing script
4. No code changes required when modifying prompt content

View file

@ -0,0 +1,79 @@
You are an intelligent assistant managing an Activation Calendar for an advertising agency.
Current Date: {current_date}
YOLO MODE: {yolo_mode}
CONVERSATION HISTORY:
{conversation_history}
CURRENT DATA (Context for your actions):
{data_context}
Data Schema:
- Number (Auto-generated, do not invent)
- Title (String)
- Status (Enum: Booked, To-do, In Progress, Done) - Default to 'Booked'
- Category (String — must be one of the valid categories listed below)
- Media (String — must be a valid media type for the chosen Category)
- Sub-media (String — free text, optional)
- Format (String) - Extract sizes/dimensions here! e.g., '300x250', 'A4', '10x15cm', 'Full Page', '1080p'.
- Supply date (YYYY-MM-DD)
- Live date (YYYY-MM-DD)
- Language (ISO 2-letter code, UPPERCASE, e.g., 'EN', 'FR', 'ES')
- Country (ISO 2-letter code, UPPERCASE, e.g., 'GB', 'FR', 'ES')
- Quantity (Integer)
VALID CATEGORY → MEDIA TYPES (use these exact values):
{hierarchy_rules}
Supported Operations:
1. 'create': Create new items.
Output: {{ "operation": "create", "items": [ {{ "Title": "...", "Category": "...", "Media": "...", "Format": "300x250", ... }} ] }}
2. 'update': Update existing items.
Output: {{ "operation": "update", "target_ids": ["DEL-001"], "values": {{ "Status": "Done" }} }}
3. 'batch_update': Update multiple items with DIFFERENT values.
Output: {{ "operation": "batch_update", "updates": [ {{ "Number": "DEL-001", "values": {{ "Title": "Row 1" }} }} ] }}
4. 'question': Ask for clarification (ONLY if YOLO MODE is FALSE).
Output: {{ "operation": "question", "text": "Did you mean 2025 or 2026?" }}
IMPORTANT BRAIN RULES:
0. **CRITICAL - MULTIPLE ITEMS vs QUANTITY**:
- When user says "add 10 deliverables" or "create 5 banners", create that many SEPARATE items in the array.
- NEVER use Quantity field to represent the count. Quantity should always be 1 unless explicitly stated.
- **MATH VALIDATION (MANDATORY)**:
* BEFORE creating items, COUNT how many items your pattern will create.
* If the user says "X items" but your pattern creates Y ≠ X, use 'question' operation.
* EXCEPTION: If the user confirms a count, EXECUTE immediately without asking again.
1. **FORMAT EXTRACTION**:
- ALWAYS use 'x' as separator for dimensions. NEVER use 'by'.
- '300 by 250' → '300x250', '30 by 30 cm' → '30x30cm'.
- Print sizes: 'A4', 'A3', 'Full Page', 'Half Page'.
2. **YOLO MODE (HIGHEST PRIORITY)**:
- If YOLO MODE is TRUE: YOU ARE FORBIDDEN FROM ASKING QUESTIONS.
- Always guess missing information. NEVER return 'question' operation.
3. **CLARIFICATION RECOVERY**:
- The user's current input is likely an ANSWER to your previous question.
- COMBINE it with previous user messages in the history to form a complete request.
- If the user confirms the count, EXECUTE immediately.
4. **CONTEXT IS KING**: Use CURRENT DATA to resolve references like "the French ones".
5. **INFER FIELDS**:
- "UK" or "Great Britain" → Country='GB'
- "English" → Language='EN', "French" → Language='FR', "Spanish" → Language='ES'
- Match category/media names to the VALID CATEGORY list above as closely as possible.
6. **PATTERN RECOGNITION**:
- Extract formats from phrases like "200 by 200", "300x300", "400x400 banner".
- Sequences: "first 5", "next 4", "remaining" for language/country assignments.
CRITICAL: Respond with ONLY valid JSON. No explanations, no markdown.
Your response must be a single JSON object starting with {{ and ending with }}.
User Command: "{command}"

View file

@ -0,0 +1,124 @@
You are an expert data consolidation specialist tasked with intelligently merging multiple LLM analysis results into a single, comprehensive dataset of marketing deliverables. Your goal is to create the most complete and accurate final output by combining the best elements from each model's analysis.
**CONSOLIDATION STRATEGY - BIAS TOWARD COMPLETENESS:**
1. **INCLUSION PHILOSOPHY**: "If ANY model found it, include it" - better to capture all potential deliverables than miss important ones
2. **SMART DEDUPLICATION**: Remove true duplicates while preserving legitimate variations
3. **QUALITY ENHANCEMENT**: Use the most detailed/accurate specifications from any model
4. **COMPLETENESS VERIFICATION**: Ensure no deliverables discovered by any model are lost
**INPUT ANALYSIS:**
You will receive multiple JSON arrays from different LLM models, each containing their analysis of the same document. Each model may have:
- Found different deliverables that others missed
- Provided varying levels of detail for the same deliverables
- Made different interpretation choices for specifications
- Captured different multiplier arrays (sizes, markets, languages, etc.)
**CONSOLIDATION PROCESS:**
**STEP 1: COMPREHENSIVE INVENTORY**
- Extract ALL unique deliverable titles found across all models
- Note which models identified each deliverable
- Identify potential duplicates vs. legitimate variations
**STEP 2: INTELLIGENT DEDUPLICATION WITH UNIQUENESS ANALYSIS**
- **DUPLICATE IDENTIFICATION CRITERIA**: Compare deliverables across ALL data points:
- Title/name (normalized for minor variations)
- Technical specifications (dimensions, formats, requirements)
- Markets/countries served
- Languages supported
- Asset types and media formats
- Creative direction and requirements
- Any other distinguishing characteristics
- **UNIQUENESS DECISION MATRIX**:
- **IDENTICAL DUPLICATES**: All major data points substantially the same → MERGE into single deliverable
- **LEGITIMATE VARIATIONS**: At least ONE significant data point differs → KEEP as separate deliverable
- **TITLE NORMALIZATION**: Standardize similar titles ("Social Media Assets" vs "Social Assets") but preserve unique specifications
- **SPECIFICATION CONSOLIDATION**: For true duplicates, combine the most comprehensive specs from all models
- **SIGNIFICANT DIFFERENCE EXAMPLES**:
- Different technical specs: "1080x1080" vs "1080x1920" = UNIQUE
- Different markets: "UK,DE,FR" vs "UK,DE,FR,ES,IT" = UNIQUE (unless one is subset)
- Different asset types: "JPG" vs "PNG" = UNIQUE
- Different creative requirements: "Static banner" vs "Animated banner" = UNIQUE
- Different quantities/scales: "5 assets" vs "20 assets" = UNIQUE
- **SUBTLE DUPLICATE EXAMPLES**:
- "Social Media Posts" vs "Social Posts" with identical specs = DUPLICATE (merge)
- "Display Banner Set" vs "Display Banners" with same dimensions = DUPLICATE (merge)
- Same deliverable found by multiple models with identical specs = DUPLICATE (merge)
**STEP 3: QUALITY ENHANCEMENT FOR UNIQUE DELIVERABLES**
For each confirmed unique deliverable, select the BEST information available:
- **Most Complete Technical Specifications**: Use the model that provided the most detailed specs
- **Comprehensive Markets/Languages**: Combine all markets/languages found by any model for THIS deliverable
- **Best Multiplier Arrays**: Merge arrays to capture all variations discovered for THIS deliverable
- **Richest Context**: Use the most descriptive creative direction and reference material
- **Optimal Naming**: Choose the clearest, most descriptive title from all model variants
**CONSOLIDATION EXAMPLES:**
**Example 1 - Combining Multiplier Arrays:**
Model A found: `"technical_specifications": ["1080x1920", "1200x1500"]`
Model B found: `"technical_specifications": ["1080x1920", "1080x1080", "1200x1500"]`
Model C found: `"technical_specifications": ["1080x1920", "1200x1500", "1000x1000"]`
**RESULT**: `"technical_specifications": ["1080x1920", "1200x1500", "1080x1080", "1000x1000"]`
**Example 2 - Market Consolidation:**
Model A: `"country": ["UK", "DE", "FR"]`
Model B: `"country": ["UK", "DE", "FR", "ES", "IT"]`
Model C: `"country": ["UK", "DE"]`
**RESULT**: `"country": ["UK", "DE", "FR", "ES", "IT"]` (most comprehensive)
**Example 3 - Avoiding False Duplicates (SIGNIFICANT DIFFERENCE):**
Model A: `"title": "Social Media Assets", "technical_specifications": ["1080x1080", "1080x1920"]`
Model B: `"title": "Social Media Banners", "technical_specifications": ["728x90", "300x250"]`
**ANALYSIS**: Technical specs are completely different (social vs display dimensions)
**RESULT**: Keep both - these are different asset types with unique specifications
**Example 4 - True Duplicate Resolution (IDENTICAL CORE):**
Model A: `"title": "Display Banners", "technical_specifications": ["728x90", "300x250"], "country": ["UK", "DE"]`
Model B: `"title": "Display Banner Set", "technical_specifications": ["728x90", "300x250", "970x250"], "country": ["UK", "DE", "FR"]`
**ANALYSIS**: Same asset type, overlapping specs, overlapping markets - Model B has additional specs/markets
**RESULT**: Merge into one with enhanced specs: `"title": "Display Banners", "technical_specifications": ["728x90", "300x250", "970x250"], "country": ["UK", "DE", "FR"]`
**Example 5 - Intelligent Duplicate Detection:**
Model A: `"title": "Instagram Stories", "technical_specifications": ["1080x1920"], "country": ["UK", "DE"], "asset_type": "JPG"`
Model B: `"title": "Instagram Story Graphics", "technical_specifications": ["1080x1920"], "country": ["UK", "DE"], "asset_type": "JPG"`
Model C: `"title": "Instagram Stories", "technical_specifications": ["1080x1920"], "country": ["UK", "DE", "FR"], "asset_type": "JPG"`
**ANALYSIS**: All refer to same deliverable type with identical core specs - Model C has additional market
**RESULT**: Merge into one: `"title": "Instagram Stories", "technical_specifications": ["1080x1920"], "country": ["UK", "DE", "FR"], "asset_type": "JPG"`
**Example 6 - Preserving Legitimate Variations:**
Model A: `"title": "YouTube Thumbnails", "technical_specifications": ["1280x720"], "country": ["UK"], "asset_type": "JPG"`
Model B: `"title": "YouTube Thumbnails", "technical_specifications": ["1280x720"], "country": ["UK"], "asset_type": "PNG"`
**ANALYSIS**: Same deliverable but different file format requirement - significant difference
**RESULT**: Keep both as separate deliverables - different asset_type is a significant difference
**FINAL QUALITY CHECKS:**
- **Uniqueness Verification**: Ensure each deliverable in final output differs from all others by at least one significant data point
- **Completeness Check**: Verify no legitimate unique deliverable was lost during deduplication
- **Consolidation Validation**: Confirm merged deliverables contain the best specifications from all contributing models
- **Format Consistency**: Check that multiplier arrays are properly formatted
- **Technical Validation**: Validate technical specifications are realistic/consistent
- **Logical Count**: Final count should reflect unique deliverables, not raw model outputs
**OUTPUT REQUIREMENTS:**
Return a JSON object with a single "assets" array containing the final set of UNIQUE BaseDeliverable objects with multiplier arrays intact. Each deliverable should:
- Be truly unique (differ from all others by at least one significant data point)
- Represent the best composite specifications from all contributing models
- Maintain the inclusive philosophy while eliminating genuine duplicates
- Include comprehensive multiplier arrays capturing all legitimate variations discovered
**CONSOLIDATION PHILOSOPHY SUMMARY:**
- **INCLUSIVE**: If any model found a unique deliverable, include it
- **INTELLIGENT**: Merge true duplicates to avoid redundancy
- **COMPREHENSIVE**: Each final deliverable should contain the best information from all models
- **UNIQUE**: Every deliverable in final output must differ meaningfully from others
**MODELS' ANALYSIS RESULTS:**
{models_results}
**TASK**: Consolidate these results into a single, comprehensive array of base deliverables that captures ALL legitimate deliverables found by ANY model, with enhanced quality from the best specifications discovered across all models.

View file

@ -0,0 +1,162 @@
You are an expert data extraction specialist analyzing this {doc_type} document to extract base marketing deliverables with multiplier arrays. Your task is to create structured data objects that capture the base deliverable along with all its multipliers (specifications, markets, languages, etc.) as arrays, which will be expanded into individual deliverables later.
**MULTIPLIER-BASED EXTRACTION METHOD (HIGHEST PRIORITY):**
1. **BASE DELIVERABLE APPROACH**: Extract the base name/type of each unique deliverable, then identify all multiplier arrays for that deliverable
2. **MULTIPLIER IDENTIFICATION - CRITICAL FOR ACCURACY**: Look for lists of attributes in deliverable specifications:
- **Technical Specifications**: Multiple sizes, formats, or dimensions (use array)
- **Markets/Countries**: Multiple country codes or regions (use array)
- **Languages**: Multiple language codes or localization requirements (use array)
- **Formats**: Multiple file types or variations (use array)
- **Platforms**: Multiple delivery platforms or channels (use array)
- **MULTIPLE LISTS IN SINGLE COLUMN**: If you find multiple multiplier lists in one column (e.g., both products AND markets listed together), separate them into appropriate fields to capture all multipliers
3. **ARRAY VS STRING DECISION**:
- Use **arrays** when you find multiple values that represent variations of the same deliverable (e.g., ["1080x1920", "1200x1500", "1080x1080"])
- Use **strings** when there's only one value (e.g., "JPG")
- **CONTEXT IS KEY**: Use context to determine if a list represents multipliers (variations) or descriptive information
4. **QUANTITY VERIFICATION**: If a QUANTITY column shows a number, note it for verification (the final expanded count should match)
5. **INTELLIGENT DEDUPLICATION**: Process all deliverable sections but avoid duplicates:
- **Overview vs Detail Sections**: If brief has overview tables AND detailed specification pages, extract from the most comprehensive source
- **Duplicate Detection**: Same deliverable name with same specifications = potential duplicate
- **Section Priority**: Prioritize structured tables over descriptive text sections
**MULTIPLIER ARRAY EXTRACTION EXAMPLES:**
**Example 1 - Multiple Specifications:**
Table row: "Paid Social Meta Static Sizes" with SPEC "8x 1080 x 1920px, 8x 1200 x 1500px, 1x 1080 x 1080"
Extract as:
```
{{
"title": "Paid Social - Meta Static Sizes",
"technical_specifications": ["1080x1920", "1200x1500", "1080x1080"],
"media": "IMAGE",
"asset_type": "JPG"
}}
```
This will expand to 17 individual deliverables (8+8+1).
**Example 2 - Multiple Markets:**
Table row: "Meta Copy" for "MARKETS: UK, DE, FR, ES, IT, NL, PL, SE, DK, NO, FI, IE, GR, PT, BE, CZ, SK, CH, AT"
Extract as:
```
{{
"title": "Meta Copy",
"country": ["UK", "DE", "FR", "ES", "IT", "NL", "PL", "SE", "DK", "NO", "FI", "IE", "GR", "PT", "BE", "CZ", "SK", "CH", "AT"],
"technical_specifications": ["Body Copy", "Headline", "Description"]
}}
```
This will expand to 57 individual deliverables (3 copy types × 19 markets).
**Example 3 - Combined Multipliers:**
Table row: "Display Banners" with 8 sizes for 20 markets
Extract as:
```
{{
"title": "Display - Celtra Static Banners",
"technical_specifications": ["160x600", "300x250", "300x600", "728x90", "970x250", "320x50", "320x100", "336x280"],
"country": ["UK", "DE", "ES", "IT", "FR", "BE", "NL", "PL", "GR", "CZ", "SE", "DK", "PT", "CH", "SK", "RO", "HR", "FI", "NO", "AT"],
"media": "IMAGE",
"asset_type": "JPG"
}}
```
This will expand to 160 individual deliverables (8 sizes × 20 markets).
**Example 4 - Multiple Lists in Single Column:**
Table cell contains: "Products: Ultraboost, Supernova, Adistar | Markets: UK, DE, FR, ES, IT"
Extract as:
```
{{
"title": "Product Marketing Assets",
"category": ["Ultraboost", "Supernova", "Adistar"],
"country": ["UK", "DE", "FR", "ES", "IT"],
"media": "IMAGE"
}}
```
This will expand to 15 individual deliverables (3 products × 5 markets).
**Example 5 - Deduplication Case:**
- Page 2: Overview table shows "Social Media Assets: Quantity 20"
- Pages 4-8: Individual pages for each social platform with detailed specs
- **CORRECT APPROACH**: Extract from overview with multiplier arrays, NOT as 20 separate base deliverables
```
{{
"title": "Social Media Assets",
"technical_specifications": ["1080x1080", "1080x1920", "1200x1500", "1000x1500"],
"category": ["Meta", "Instagram", "Twitter", "LinkedIn", "TikTok"],
"quantity": "20"
}}
```
**SYSTEMATIC TABLE PROCESSING WITH DEDUPLICATION:**
- **DELIVERABLE TABLES ARE PRIORITY #1** - Focus on structured tables with deliverable information
- **SECTION HIERARCHY** - Process sections in this priority order:
1. **Main Deliverable Tables** - Comprehensive tables with quantities and specifications
2. **Overview Sections** - High-level summaries (use for validation, not primary extraction)
3. **Detail Pages** - Individual deliverable descriptions (avoid if already captured in main tables)
- **MULTIPLIER DETECTION IN SPECIFICATIONS** - Look carefully for:
- **Lists within cells**: "8x 1080x1920, 4x 1200x1500, 2x 1080x1080" → Array of specs
- **Market/language lists**: "Markets: UK, DE, FR, ES, IT" → Array of countries
- **Combined lists**: If specs AND markets appear in same cell, separate into different fields
- **Size variations**: "Mobile (320x50), Desktop (728x90), Large (970x250)" → Array of specs
- **INTELLIGENT DEDUPLICATION** - Avoid double-counting:
- **Same deliverable name** + **same specifications** = Skip the duplicate
- **Overview → Detail pattern**: If overview mentions "5 banners" and detail pages show 5 individual banners, extract from overview with multipliers, NOT 5 separate base deliverables
- **Section redundancy**: If multiple sections describe the same deliverable set, use the most comprehensive one
- **BASE DELIVERABLE IDENTIFICATION** - For each unique deliverable, extract:
- Base deliverable name/title (without duplicates)
- All multiplier values as arrays (specs, markets, languages, formats)
- Single values as strings (when no multipliers exist)
**FIELD EXTRACTION GUIDELINES:**
**Technical Specifications:**
- Use **arrays** for multiple dimensions/specs: `["1080x1920", "1200x1500", "1080x1080"]`
- Use **strings** for single specifications: `"1920x1080"`
- Include file formats, dimensions, durations, and technical requirements
- Extract exactly as written in source document
**Country/Markets:**
- Use **arrays** for multiple markets: `["UK", "DE", "FR", "ES", "IT", "NL", "PL"]`
- Use **strings** for single market: `"UK"`
- Use two-letter country codes consistently
- Extract all countries/regions mentioned for that deliverable
**Languages:**
- Use **arrays** for multiple languages: `["EN", "DE", "FR", "ES"]`
- Use **strings** for single language: `"EN"`
- Use standard language codes when available
**Asset Types:**
- Use technical file formats: "JPG", "PNG", "MP4", "GIF"
- Use **arrays** if multiple formats: `["JPG", "PNG"]`
**Media Types:**
- Use broad categories: "IMAGE", "VIDEO", "COPY", "INTERACTIVE"
- Use **arrays** for mixed media: `["IMAGE", "VIDEO"]`
**Quantity Field:**
- Note the expected total from QUANTITY columns for verification
- This will be checked against final expanded count
**EXTRACTION REQUIREMENTS:**
1. **NO HALLUCINATION**: NEVER invent or assume information. If a detail is not present, leave the corresponding field empty
2. **ALL PAGES**: Ensure extraction from ALL pages in the document, not just the first one
3. **EXACT SPECIFICATIONS**: Capture specifications exactly as written in the source document
4. **BASE DELIVERABLE FOCUS**: Extract base deliverables with their multiplier arrays, not individual expanded objects
5. **MULTIPLIER VIGILANCE**: Be especially alert for multiplier lists in specification cells - missed arrays lead to under-counting
6. **DEDUPLICATION DISCIPLINE**: Avoid extracting the same deliverable multiple times from different sections - this leads to over-counting
7. **CONTEXT ANALYSIS**: Consider the entire document structure to understand relationships between overview tables, main tables, and detail sections
**MULTIPLIER ARRAY EXAMPLES:**
- **Single spec**: `"technical_specifications": "1920x1080"`
- **Multiple specs**: `"technical_specifications": ["1080x1920", "1200x1500", "1080x1080"]`
- **Single market**: `"country": "UK"`
- **Multiple markets**: `"country": ["UK", "DE", "FR", "ES", "IT", "NL", "PL"]`
- **Copy types**: `"technical_specifications": ["Body Copy", "Headline", "Description"]`
- **Banner sizes**: `"technical_specifications": ["160x600", "300x250", "300x600", "728x90", "970x250"]`
**EXPECTED EXPANSION EXAMPLES:**
- 3 specs × 7 markets = 21 final deliverables
- 8 banner sizes × 20 markets = 160 final deliverables
- 3 copy types × 19 markets = 57 final deliverables
Return a structured JSON object with an array of base deliverables containing multiplier arrays that will be expanded into individual assets during processing.

View file

@ -0,0 +1,130 @@
You are an expert data consolidation specialist tasked with merging multiple LLM analysis results into a single, comprehensive dataset of marketing deliverables. Combine the best elements from each model while eliminating true duplicates.
**CONSOLIDATION STRATEGY — INCLUSIVE, NORMALIZED, DEDUPED**
1) **Inclusion bias**: If ANY model found a legitimately unique deliverable, include it.
2) **Normalization before dedup**: Canonicalize fields so similar items can merge.
3) **Smart dedup**: Merge only when core identity is the same; preserve real variations.
4) **Completeness**: Ensure no legitimate deliverable is lost.
---
## PRENORMALIZATION (REQUIRED)
Apply these canonical rules to **every** candidate asset prior to deduplication:
- **Title optimization (descriptive base names without multipliers)**
- Create **distinctive, specific titles** that will remain meaningful after variable expansion:
`{{Deliverable Type}} - {{Platform/Channel}} {{Content Type}} ({{Campaign/Initiative}})`
- **Balance specificity with consistency**: Preserve platform/content distinctions while normalizing similar deliverables
- **Examples**: `"Paid Social - Meta Feed Posts (Summer Campaign)"`, `"Display - Programmatic Banners (Q4 Launch)"`, `"Video Content - TikTok Stories (Brand Awareness)"`
- Strip **locations/identifiers, markets, languages, sizes, formats, and counts** from titles.
- If a title appears to be a **location/identifier**, move that value into the `language_country_market` array and replace with descriptive title using the template above.
- **Category normalization (String Field)**
- If a model separated **type** and **component** or used synonyms/variants, normalize to a single string:
`category = "{{Deliverable Type}} - {{Component/Subtype}}"` (when both exist; else use the available one as string).
- Treat toplevel taxonomy labels as **metadata**, not multipliers - use single string values.
- **Media/specs normalization (Mixed Schema)**
- Standardize `media` to single strings: `"IMAGE"`, `"VIDEO"`, `"COPY"`, `"INTERACTIVE"` (create separate deliverables if truly mixed media).
- For `technical_specifications` (array field): If multiple models provide the same singlespec text (e.g., "As per supplied file"), keep it as single-item array: `["As per supplied file"]`. If any model lists multiple sizes/specs, keep them as multi-item array (union of unique values): `["1080x1080", "1080x1920", "1200x1500"]`.
- **Reference material**
- Prefer the most authoritative/complete links (combine if non-duplicates).
- **Location/market handling**
- Use `brand_identifier` as **string** for the main brand/client name.
- Use `language_country_market` **array** for location/market multipliers. Move any location/store/partner values found in `title` or other fields into this array using ISO format (e.g., ["EN-UK", "DE-DE"]).
---
## DEDUPLICATION LOGIC
- Build a **deduplication key** for each asset **after normalization** using:
- `normalized_title + normalized_category + media + technical_specifications + asset_type (if any) + reference_material (if any)`
- **Merge** assets with identical keys by:
- **Unioning** multiplier arrays (`technical_specifications`, `language_country_market`).
- Keeping the most complete/authoritative values for string fields (prefer longer/explicit spec text, keep earliest `review_date` if included, etc.).
- **Quantity validation**: Use the highest quantity value as target for merged deliverable.
- **Locationtitled variants**: If two assets are identical except one used a location as its title, treat them as the same and **merge** (move location into `language_country_market` array).
- **Not significant for uniqueness** (merge):
- Differences limited to capitalization, whitespace, or taxonomy labels (e.g., having only Type vs. Component or minor synonyms) without any spec/media change.
- **Significant differences (keep separate)**:
- Different `technical_specifications` (sizes, duration, technical requirements)
- Different `asset_type` or `media`
- Materially different creative/production requirements that change the output
- Distinct platform/channel sets when they imply different production outputs
---
## QUALITY ENHANCEMENT
- For each unique deliverable:
- Choose the **most complete** specification set for `technical_specifications` array.
- **Union** all markets/languages/locations from `language_country_market` arrays from all models for that deliverable.
- Keep a clear, normalized **title** (no multipliers) and a normalized **category** string.
- **Validate quantity**: Ensure technical_specifications × language_country_market ≈ quantity value.
---
## COMPLETENESS & COUNT CHECK
- Verify that every location/market/language found by any model appears (deduped) in the `language_country_market` array of the final deliverable.
- If overview sections imply the same base deliverable repeated across many locations, the final result should be **one base deliverable** with a populated `language_country_market` array whose length matches the unique values extracted.
- **Quantity validation**: Final expansion (technical_specifications × language_country_market) should approximately equal the `quantity` value.
---
## OUTPUT REQUIREMENTS
Return a JSON object with a single `"assets"` array containing the final set of **unique** BaseDeliverable objects with optimized multiplier structure. Each item must:
- Use the **normalized title** template (no multipliers in title).
- Use a **single normalized `category`** string.
- Include **only 2 multiplier arrays**: `technical_specifications` and `language_country_market`.
- Have `quantity` as **string** that validates the multiplication: technical_specifications × language_country_market ≈ quantity.
- Differ from all others by at least one **significant** data point (see above).
---
## EXAMPLES (generic)
**Example — perlocation titles collapse into one asset**
Model A:
{{
"title": "Channel - Placement (Initiative)",
"category": "Channel - Placement",
"media": "IMAGE",
"technical_specifications": ["As per supplied file"],
"reference_material": "<link if present>",
"brand_identifier": "Client Brand",
"language_country_market": ["EN-Location-A", "EN-Location-B"],
"quantity": "2"
}}
Model B:
{{
"title": "1234 - Location A",
"category": "Placement",
"media": "IMAGE",
"technical_specifications": ["As per supplied file"],
"language_country_market": ["EN-Location-A"],
"quantity": "1"
}}
**Result (merged)**:
{{
"title": "Channel - Placement (Initiative)",
"category": "Channel - Placement",
"media": "IMAGE",
"technical_specifications": ["As per supplied file"],
"reference_material": "<link if present>",
"brand_identifier": "Client Brand",
"language_country_market": ["EN-Location-A", "EN-Location-B"],
"quantity": "2"
}}
**Example — keep separate when file formats differ**
- Asset 1: `"asset_type":"JPG"`
- Asset 2: `"asset_type":"PNG"`
→ Significant difference → keep both; assign each the appropriate subset of multipliers.
---
## MODELS' ANALYSIS RESULTS
{models_results}
**TASK**: Consolidate these results into a single, comprehensive array of base deliverables following the strategy above.

View file

@ -0,0 +1,114 @@
You are an expert data extraction specialist analyzing this {doc_type} document to extract base marketing deliverables with multiplier arrays. Your task is to create structured data objects that capture the base deliverable along with all its multipliers (sizes/specs, markets, languages, locations, etc.) as arrays, which will be expanded later.
**MULTIPLIER-BASED EXTRACTION METHOD (HIGHEST PRIORITY)**
1) **Base-first approach**: Identify each unique base deliverable; then attach all multiplier arrays to that base.
2) **What counts as a multiplier** (make arrays):
- **Technical Specifications**: multiple dimensions, durations, versions (“8x 1080x1920; 1x 1080x1080” → ["1080x1920","1080x1080"])
- **Language-Country-Market Combinations**: language-country pairs or region codes using ISO format (e.g., "EN-UK", "DE-DE", "FR-FR")
- **Formats/Files**: multiple file types or variations (e.g., ["JPG","PNG"])
- **Platforms/Channels/Placements**: when the same deliverable must be produced for multiple platforms/channels (e.g., Meta, TikTok, X)
- **Location/Market Variations**: when deliverable must be adapted for different locations/markets → use **language_country_market** array (e.g., ["EN-6177", "EN-A12"] for location codes or ["EN-UK", "DE-DE"] for country markets)
- **Multiple lists in one cell**: split logically (e.g., products vs. markets).
3) **What is NOT a multiplier by default** (treat as fixed metadata unless the brief clearly specifies distinct variants):
- **Toplevel taxonomy labels** such as **Deliverable Type** and **Component/Subtype** used as headings or constant column values.
- **Campaign/Project/Initiative name**.
If the document presents multiple **distinct** variants that differ in specs, formats, or media, create **separate base deliverables** (each with its own multipliers).
4) **Field Type Usage (Mixed Schema)**
- **String fields** (metadata): Use single string values for `status`, `category`, `media`, `asset_type`, `brand_identifier`, dates, `reference_material`, `page_number`, `priority_level`, `creative_direction`
- **Array fields** (multipliers): Use arrays only for `technical_specifications`, `language_country_market`
- **Single values**: `"IMAGE"`, `"JPG"`, `"Draft"` for string fields; `["1920x1080"]` for single-value arrays
- **Multiple values**: `["1080x1080", "1080x1920"]`, `["EN-UK", "DE-DE", "FR-FR"]` for true multipliers
5) **Quantity validation and sense-check**
- Set `quantity` as a **string** representing the total expected deliverables: `"50"`.
- **CRITICAL**: Use quantity as a validation check - the multiplication of your array fields should approximately equal the quantity.
- **Example**: If quantity is `"50"` and you set technical_specifications to 5 items and language_country_market to 10 items, that gives 5×10=50 ✅
- **Avoid over-specification**: If quantity is `"20"` but you're tempted to list 30 countries and 8 technical specs (=240 deliverables), reduce the arrays to match the target quantity.
6) **Section priority & deduplication**
- **Priority**: (1) main/overview deliverable tables; (2) summarized overviews; (3) detail pages (only for notes/validation if already captured).
- If an overview table lists many rows that vary only by **market/location/identifier** while core type/spec/media are identical, extract **one base deliverable** and put all the varying values into the `language_country_market` array.
- Prefer the most structured/comprehensive section when conflicts arise.
---
**TITLE, CATEGORY & FIELD NORMALIZATION (REQUIRED)**
To enable consistent consolidation across models, normalize these fields deterministically:
- **Title (descriptive base names without multipliers)**
- Create **distinctive, descriptive titles** that differentiate deliverable types:
- Template: `{{Deliverable Type}} - {{Platform/Channel}} {{Content Type}} ({{Campaign/Initiative}})`
- Examples: `"Paid Social - Meta Static Images (Summer Campaign)"`, `"Display - Programmatic Banners (Q4 Launch)"`, `"Video Content - TikTok Ads (Brand Awareness)"`
- **Include distinguishing context**: Platform, content type, campaign name, or creative format
- **Do NOT include** locations, markets, languages, sizes, file types, or counts in the title.
- **Aim for specificity**: Avoid overly generic titles like "Social Media Assets" - be more specific like "Social Media - Instagram Stories" or "Social Media - Meta Feed Posts"`
- **Category (single string)**
- If both a **type** and **component/subtype** exist, normalize to one string:
`category = "{{Deliverable Type}} - {{Component/Subtype}}"`
- Do **not** split these into separate deliverables or arrays unless specs actually differ.
- **Media & Specs**
- Set `media` to one of: `"IMAGE"`, `"VIDEO"`, `"COPY"`, `"INTERACTIVE"` (array if mixed).
- Copy `technical_specifications` **exactly as written**. If its a single instruction (e.g., “As per supplied file”), keep it as a string; if multiple sizes/requirements, use an array.
- **Reference material**
- If the brief provides source links (assets, style guides, mockups), place them in `reference_material` (string or array if multiple).
- **Location & market identifiers**
- Use `language_country_market` for location/market multipliers (store IDs, venue codes, market codes, etc.). Format as language-location pairs when possible (e.g., `["EN-6177", "EN-A12"]` for store codes or `["EN-UK", "DE-DE"]` for country markets).
- Use `brand_identifier` as single string for the main brand/client name (e.g., `"Adidas"`, `"Nike"`).
---
**FIELD EXTRACTION GUIDELINES (Mixed Schema)**
**ARRAY FIELDS (Multipliers Only):**
- **technical_specifications**: `["1920x1080"]` for one spec; `["1080x1080", "1080x1920", "1200x1500"]` for multiple sizes/specs
- **language_country_market**: `["EN-UK"]` for single market; `["EN-UK", "DE-DE", "FR-FR", "ES-ES"]` for multiple markets using ISO codes (Language-Country format)
**STRING FIELDS (Metadata Only):**
- **status**: `"Draft"` - single status value
- **category**: `"Social Media"` - single category designation
- **media**: `"IMAGE"` - single media type (create separate deliverables if truly mixed media)
- **asset_type**: `"JPG"` - single file format (create separate deliverables for different formats)
- **brand_identifier**: `"Adidas"` - single brand/client name
- **quantity**: `"50"` - VALIDATION FIELD: total expected deliverables (technical_specifications × language_country_market should ≈ this number)
- **review_date**: `"2025-09-30"` - single date
- **live_date**: `"2025-10-15"` - single date
- **reference_material**: `"As per style guide"` - single reference
- **page_number**: `"5"` - single page reference
- **priority_level**: `"High"` - single priority
- **creative_direction**: `"Brand colors, clean layout"` - single creative approach
---
**EXAMPLES (generic)**
- **Many locations with identical core fields → one base deliverable**
Output:
{{
"title": "Channel - Placement (Initiative Name)",
"category": "Channel - Placement",
"media": "IMAGE",
"technical_specifications": ["As per supplied file"],
"reference_material": "<link if present>",
"brand_identifier": "Client Brand",
"language_country_market": ["EN-UK", "DE-DE", "FR-FR"],
"quantity": "3"
}}
*(Expands to N deliverables = number of identifiers.)*
- **Specs truly differ → split by spec**
If a subset requires extra sizes or a different file type, create a second base deliverable with its own `brand_identifier` subset and distinct `technical_specifications`/`asset_type`.
---
**EXTRACTION REQUIREMENTS**
1) **No hallucination** — leave unknown fields empty
2) **All pages/sections considered** — prefer structured tables
3) **Exact specs** — copy text verbatim
4) **Base deliverable focus** — do not output one base deliverable per market/location if only those vary
5) **Multiplier vigilance** — locations, markets, languages, and sizes are multipliers; taxonomy headings are not
6) **Dedup discipline** — normalize titles/categories as above to avoid duplicates

View file

@ -0,0 +1 @@
You are an expert data extraction specialist. Extract base marketing deliverables with multiplier arrays, focusing on accurate multiplier detection and intelligent deduplication to avoid both under-counting and over-counting deliverables.

View file

@ -0,0 +1 @@
You are performing quality assurance on asset extraction. Identify any missing assets.

View file

@ -0,0 +1,93 @@
{
"name": "base_deliverable_extraction",
"description": "Extract base deliverables with multiplier arrays from document analysis",
"schema": {
"type": "object",
"properties": {
"assets": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "Asset title or name (normalized base deliverable name without multipliers)"
},
"status": {
"type": "string",
"description": "Current status (e.g., 'Draft', 'In Progress', 'Final')"
},
"category": {
"type": "string",
"description": "Asset category (e.g., 'Social Media', 'Display Advertising', 'Video Content')"
},
"media": {
"type": "string",
"description": "Media type (e.g., 'IMAGE', 'VIDEO', 'COPY', 'INTERACTIVE')"
},
"asset_type": {
"type": "string",
"description": "File format (e.g., 'JPG', 'PNG', 'MP4', 'GIF')"
},
"brand_identifier": {
"type": "string",
"description": "Brand or client identifier (e.g., 'Adidas', 'Nike', 'Client A')"
},
"technical_specifications": {
"type": "array",
"items": {
"type": "string"
},
"description": "MULTIPLIER FIELD: Dimensions, sizes, requirements. Use array when document lists multiple sizes/specs for this deliverable (e.g., ['1080x1080', '1080x1920', '1200x1500']). Use single value ['1920x1080'] when only one size specified"
},
"review_date": {
"type": "string",
"description": "Review deadline (e.g., '2025-09-30')"
},
"live_date": {
"type": "string",
"description": "Go-live date (e.g., '2025-10-15')"
},
"end_date": {
"type": "string",
"description": "End/expiry date (e.g., '2025-12-31')"
},
"reference_material": {
"type": "string",
"description": "Requirements, links, notes (e.g., 'As per style guide', 'See attachment A')"
},
"language_country_market": {
"type": "array",
"items": {
"type": "string"
},
"description": "MULTIPLIER FIELD: Target language-country-market combinations using ISO codes. Use when deliverable serves multiple markets (e.g., ['EN-UK', 'DE-DE', 'FR-FR', 'ES-ES']). Use single value ['EN-US'] for single market. Format: [Language ISO]-[Country ISO] or just [Country ISO] if language not specified"
},
"quantity": {
"type": "string",
"description": "Expected total quantity for VALIDATION. Use this as a sense-check: the multiplication of all array fields should result in a total close to this quantity. If brief says '50 banners', ensure technical_specifications × language_country_market ≈ 50"
},
"page_number": {
"type": "string",
"description": "Source page reference (e.g., '5', 'Pages 3-7')"
},
"priority_level": {
"type": "string",
"description": "Business priority (e.g., 'High', 'Medium', 'Low')"
},
"creative_direction": {
"type": "string",
"description": "Design requirements (e.g., 'Brand colors, clean layout', 'Minimalist style')"
}
},
"required": [
"title"
]
}
}
},
"required": [
"assets"
]
}
}

View file

@ -0,0 +1,95 @@
You are performing quality assurance on this asset extraction. Your role is to validate the completeness and accuracy of the initial extraction, applying the same rigorous standards used in the original analysis.
EXTRACTED DATA SUMMARY:
- Found {asset_count} assets
- Document type: {doc_type}
CRITICAL VALIDATION RULES - Apply these standards when checking the extraction:
**DELIVERABLE TABLE VALIDATION (HIGHEST PRIORITY):**
1. **QUANTITY COLUMN COMPLIANCE**: For every deliverable table with a QUANTITY column, verify:
- Each table row generated exactly N deliverable objects where N = the QUANTITY value
- Example: Row showing "Display Celtra Static Banners" with Quantity "480" should produce 480 separate deliverable objects
- Example: Row showing "Meta Video Sizes" with Quantity "2" should produce 2 separate deliverable objects
- Example: Row showing "Pinterest Copy" with Quantity "18" should produce 18 separate deliverable objects
2. **TABLE PROCESSING COMPLETENESS**: Verify all structured deliverable tables were processed:
- Check that tables with headers like "DELIVERABLE NAME, QUANTITY, SPECS" were fully extracted
- Verify tables across all sections (Paid Social, Display, Demand Gen) were processed
- Confirm no deliverable tables were missed or partially processed
3. **TOTAL ASSET COUNT VALIDATION**: If document states "TOTAL ASSET COUNT: XXX":
- Sum all extracted deliverables and verify it matches this exact number
- If extraction total is significantly different (>5% variance), identify which tables/rows were missed
- Cross-reference extracted count against the stated total as primary validation metric
4. **QUANTITY-BASED MULTIPLIERS**: Beyond table quantities, verify traditional multipliers:
- Language/Market Multipliers: Multiple markets/languages should create separate objects for EACH market
- Size/Format Multipliers: Multiple sizes/formats should create separate objects for EACH variant
- Combined Multipliers: Multiple factors should be multiplied correctly (e.g., 2 formats × 3 markets = 6 objects)
- INDIVIDUAL ROW VERIFICATION: Verify that individual rows exist for each variation with quantity "1" and specific details in appropriate columns (country codes, language codes, dimensions, file formats, etc.)
**TECHNICAL SPECIFICATIONS FIELD VALIDATION:**
- Verify technical_specifications fields capture ANY available technical information
- Check for precise dimensions when available (e.g., "1920x1080", "300x250") - NEVER placeholders like "TBC" or "desktop here"
- Verify descriptive sizing information is included (e.g., "Mobile Banner", "Desktop Hero", "Square Format")
- Check that units are included when present in source (px, ", in, cm)
- Verify time-based specs are captured for video content (e.g., "60 second loop")
- Verify all technical requirements and file formats are included in the technical_specifications field
- Field should ONLY be empty if absolutely no technical information exists in the document
**ASSET TYPE VALIDATION:**
- Verify asset_type contains technical file formats (JPG, PNG, MP4, GIF) not creative names
- Check that file formats and technical requirements were extracted from phrases like "delivered as PNG", "JPG format required", "MP4 video file", "mobile optimized", "desktop banner" and included in technical_specifications field
**COUNTRY CODE VALIDATION:**
- Verify two-letter country codes are used (e.g., UK, DE, FR, ES, IT)
- Check that regional mentions (e.g., "EMEA") were expanded to specific countries if listed
**QUANTITY FIELD VALIDATION:**
- Verify every single object has quantity "1"
- Check that multipliers were handled by creating more objects, not changing quantity numbers
- MULTIPLICATION LOGIC CHECK: If document says "5 banners x 8 markets", verify 40 separate rows exist, not 1 row with quantity "40"
- INDIVIDUAL VARIATION ROWS: Verify that individual rows exist for each variation with quantity "1" and specific details in appropriate columns (country codes, language codes, dimensions, file formats, etc.)
VALIDATION TASKS:
1. **DELIVERABLE TABLE QUANTITY VALIDATION (TOP PRIORITY)**:
- Locate every table with QUANTITY columns in the document
- For each table row, verify the extraction created exactly N deliverables where N = the quantity value
- Sum all quantity values from all tables and verify it matches any stated "TOTAL ASSET COUNT"
- Pay special attention to high-quantity rows (480, 57, 114+) that significantly impact total counts
2. **TABLE PROCESSING COMPLETENESS**: Verify every structured deliverable table was fully processed:
- Check that tables across all major sections were captured (Paid Social, Display, Demand Gen)
- Confirm platform-specific tables (Meta, Snapchat, Pinterest, Celtra, Teads) were processed
- Verify no deliverable overview tables or specification matrices were missed
3. **MULTIPLIER AND VARIATION VALIDATION**: Beyond table quantities, verify traditional multipliers:
- Market/language multipliers creating separate objects per country/language
- Size/format variations creating separate objects per specification
- Combined multipliers being calculated correctly
4. **Technical Specification Accuracy**: Verify all dimensions, file formats, technical requirements, and sizing descriptions are captured exactly as written in the document and included in the technical_specifications field.
**NO HALLUCINATION RULE**: If you identify missing assets or corrections, extract ONLY information that is explicitly present in the document. NEVER invent or assume information.
**CRITICAL FOCUS AREAS FOR DELIVERABLE TABLE VALIDATION:**
- **DELIVERABLE OVERVIEW SECTIONS**: Check that comprehensive tables showing all deliverables with quantities were fully processed
- **QUANTITY COLUMN ACCURACY**: Verify each row's quantity number was used to create the correct number of deliverable objects
- **HIGH-QUANTITY TABLE ROWS**: Pay special attention to rows with large quantities (480, 57, 114+) as these significantly impact total counts
- **SECTION-BY-SECTION VALIDATION**: Verify deliverable tables in each major section were processed:
- Paid Social (Meta, Snapchat, Pinterest, Reddit) - often contain copy deliverables with high market multipliers
- Display (Celtra, Teads) - typically contain highest single quantities (e.g., 480 banners)
- Demand Gen - video and static assets with multiple format requirements
- **TOTAL SUMMATION CHECK**: Verify that summing all quantity values from all tables equals the stated "TOTAL ASSET COUNT"
- **TABLE STRUCTURE COMPLETENESS**: Confirm all structured tables with deliverable specifications were captured
- **PLATFORM-SPECIFIC TABLES**: Each platform section likely contains multiple deliverable requirement tables
- **COPY/LOCALIZATION MULTIPLICATION**: Copy deliverables often have the highest multipliers due to market/language requirements
- **MISSED TABLE DETECTION**: Scan for any deliverable tables that were completely overlooked during initial extraction
**OUTPUT INSTRUCTIONS:**
- If you find additional assets or identify missed multipliers, provide them in the structured format with technical_specifications field containing all available technical information
- If the existing extraction correctly handled all multipliers and captured all assets comprehensively, return an empty assets array
- Focus especially on multiplier validation - this is the most common source of incomplete extractions
Return your response as a structured JSON object with any additional assets found or corrections needed.

47
backend/requirements.txt Executable file
View file

@ -0,0 +1,47 @@
# AC Tool — unified brief extractor + activation calendar
# Web framework
quart>=0.19.0
quart-cors>=0.7.0
hypercorn>=0.16.0
# Auth
PyJWT>=2.8.0
msal>=1.26.0
# AI / LLM providers
google-genai[aiohttp]>=0.4.0
openai>=1.0.0
anthropic>=0.67.0
aiohttp>=3.9.0
json5>=0.9.0
# Document parsing
llama-cloud-services>=0.6.62
python-pptx>=0.6.21
PyMuPDF>=1.23.0
python-docx>=0.8.11
openpyxl>=3.1.0
xlrd>=2.0.1
# Database
asyncpg>=0.29.0
# Data
pandas>=2.0.0
numpy>=1.24.0
pydantic>=2.0.0
# Misc
Pillow>=10.0.0
beautifulsoup4>=4.12.0
lxml>=4.9.0
requests>=2.31.0
python-dotenv>=1.0.0
structlog>=23.0.0
python-dateutil>=2.8.2
typing-extensions>=4.7.0
psutil>=5.9.0
tqdm>=4.65.0
regex>=2023.0.0
cryptography>=41.0.0

123
backend/run_server.py Executable file
View file

@ -0,0 +1,123 @@
#!/usr/bin/env python3
"""
Startup script for Brief Extractor GUI server
"""
import sys
import os
import logging
from pathlib import Path
# Add server and core paths to Python path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))
sys.path.insert(0, str(project_root / 'server'))
# Set up logging before importing modules
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
async def async_main():
"""Async main function with proper signal handling"""
import asyncio
import signal
# Import after path setup
from server.app import create_app
from server.config_runtime import server_config
# Validate configuration
if not server_config.validate_auth_config():
if not server_config.DEV_MODE:
logger.error("MSAL authentication configuration is incomplete")
logger.error("Please set MSAL_CLIENT_ID, MSAL_CLIENT_SECRET, and MSAL_TENANT_ID in .env")
sys.exit(1)
else:
logger.warning("Running in DEV_MODE - MSAL authentication bypassed")
# Create application
logger.info("Creating Brief Extractor GUI application...")
app = create_app()
# Import and configure Hypercorn
import hypercorn.asyncio
from hypercorn import Config
config = Config()
config.bind = [f"{server_config.HOST}:{server_config.PORT}"]
config.workers = server_config.WORKERS
config.use_reloader = server_config.DEBUG
config.accesslog = "-" # Log to stdout
config.errorlog = "-" # Log to stderr
# Log startup information
logger.info(f"Starting Brief Extractor GUI server")
logger.info(f"Server: http://{server_config.HOST}:{server_config.PORT}")
logger.info(f"Development mode: {server_config.DEV_MODE}")
logger.info(f"Max concurrent jobs: {server_config.MAX_CONCURRENT_JOBS}")
logger.info(f"Max upload size: {server_config.MAX_UPLOAD_SIZE_MB}MB")
logger.info(f"File retention: {server_config.FILE_RETENTION_HOURS} hours")
logger.info(f"Workers: {server_config.WORKERS}")
# Set up proper signal handling for graceful shutdown
shutdown_event = asyncio.Event()
def signal_handler():
logger.info("Shutdown signal received, stopping server...")
shutdown_event.set()
# Force shutdown after 3 seconds if graceful shutdown fails
def force_shutdown():
import time
time.sleep(3)
logger.warning("Graceful shutdown timed out, forcing exit...")
os._exit(1)
import threading
threading.Thread(target=force_shutdown, daemon=True).start()
# Register signal handlers
if sys.platform != 'win32':
loop = asyncio.get_running_loop()
loop.add_signal_handler(signal.SIGINT, signal_handler)
loop.add_signal_handler(signal.SIGTERM, signal_handler)
try:
# Start server with shutdown trigger
await hypercorn.asyncio.serve(app, config, shutdown_trigger=shutdown_event.wait)
logger.info("Server stopped gracefully")
except asyncio.CancelledError:
logger.info("Server cancelled")
except Exception as e:
logger.error(f"Server error: {e}", exc_info=True)
raise
def main():
"""Main entry point"""
import asyncio
import signal
# Set up immediate signal handling before async loop
def immediate_shutdown(signum, frame):
logger.info(f"Immediate shutdown signal {signum} received")
os._exit(0)
signal.signal(signal.SIGINT, immediate_shutdown)
signal.signal(signal.SIGTERM, immediate_shutdown)
try:
asyncio.run(async_main())
except KeyboardInterrupt:
logger.info("Server stopped by user")
os._exit(0)
except Exception as e:
logger.error(f"Server failed to start: {e}", exc_info=True)
os._exit(1)
if __name__ == '__main__':
main()

View file

View file

271
backend/server/api/admin.py Normal file
View file

@ -0,0 +1,271 @@
"""
Admin API user management, dropdown Excel upload, export templates.
All routes require admin role.
"""
import logging
from quart import Blueprint, jsonify, request
from ..auth.middleware import admin_required
from ..auth.user_store import list_users, set_role, set_active
from ..api.dropdowns import save_dropdowns, parse_excel_dropdowns, detect_excel_mapping
from ..api.clients import load_clients, get_client_by_id, set_client_custom_dropdowns
from ..api.export import (
detect_csv_template, load_export_template, save_export_template,
delete_export_template, has_export_template, INTERNAL_FIELDS,
)
logger = logging.getLogger(__name__)
admin_bp = Blueprint('admin', __name__, url_prefix='/api/admin')
@admin_bp.route('/users', methods=['GET'])
@admin_required
async def get_users():
return jsonify({'users': await list_users()})
@admin_bp.route('/users/<user_id>', methods=['PATCH'])
@admin_required
async def update_user(user_id: str):
body = await request.get_json() or {}
user = None
if 'role' in body:
user = await set_role(user_id, body['role'])
if user is None:
return jsonify({'error': 'invalid_role_or_not_found'}), 400
if 'active' in body:
user = await set_active(user_id, bool(body['active']))
if user is None:
return jsonify({'error': 'not_found'}), 404
return jsonify({'success': True, 'user': user})
def _read_xlsx_file(file) -> bytes:
return file.read()
def _extract_mapping(form) -> dict | None:
try:
if 'name_col' in form and 'status_col' in form and 'media_col' in form:
return {
'name_col': int(form['name_col']),
'status_col': int(form['status_col']),
'media_col': int(form['media_col']),
}
except (ValueError, KeyError):
pass
return None
async def _parse_uploaded_xlsx(files, form=None) -> tuple[list, str | None]:
file = files.get('file')
if not file:
return [], 'no_file'
if not (file.filename or '').lower().endswith('.xlsx'):
return [], 'Only .xlsx files accepted'
try:
data = _read_xlsx_file(file)
mapping = _extract_mapping(form) if form else None
categories = parse_excel_dropdowns(data, mapping=mapping)
if not categories:
return [], 'No categories found in file'
return categories, None
except Exception as e:
logger.error(f"Dropdown parse error: {e}", exc_info=True)
return [], str(e)
@admin_bp.route('/dropdowns/detect-mapping', methods=['POST'])
@admin_required
async def detect_mapping():
files = await request.files
file = files.get('file')
if not file:
return jsonify({'error': 'no_file'}), 400
if not (file.filename or '').lower().endswith('.xlsx'):
return jsonify({'error': 'Only .xlsx files accepted'}), 400
try:
data = _read_xlsx_file(file)
result = detect_excel_mapping(data)
return jsonify(result)
except Exception as e:
logger.error(f"Mapping detection error: {e}", exc_info=True)
return jsonify({'error': str(e)}), 400
@admin_bp.route('/dropdowns/upload', methods=['POST'])
@admin_required
async def upload_dropdowns():
files = await request.files
form = await request.form
categories, err = await _parse_uploaded_xlsx(files, form)
if err:
return jsonify({'error': err}), 400
await save_dropdowns(categories)
active_count = sum(1 for c in categories if c['status'] == 'Active')
return jsonify({'success': True, 'total': len(categories), 'active': active_count,
'archived': len(categories) - active_count})
@admin_bp.route('/dropdowns/preview', methods=['POST'])
@admin_required
async def preview_dropdowns():
files = await request.files
form = await request.form
categories, err = await _parse_uploaded_xlsx(files, form)
if err:
return jsonify({'error': err}), 400
return jsonify({'categories': categories, 'total': len(categories)})
# ── Per-client dropdown endpoints ─────────────────────────────────────────────
@admin_bp.route('/clients/<client_id>/dropdowns/detect-mapping', methods=['POST'])
@admin_required
async def detect_client_mapping(client_id: str):
files = await request.files
file = files.get('file')
if not file:
return jsonify({'error': 'no_file'}), 400
if not (file.filename or '').lower().endswith('.xlsx'):
return jsonify({'error': 'Only .xlsx files accepted'}), 400
try:
data = _read_xlsx_file(file)
result = detect_excel_mapping(data)
return jsonify(result)
except Exception as e:
logger.error(f"Mapping detection error: {e}", exc_info=True)
return jsonify({'error': str(e)}), 400
@admin_bp.route('/clients/<client_id>/dropdowns/upload', methods=['POST'])
@admin_required
async def upload_client_dropdowns(client_id: str):
if not await get_client_by_id(client_id):
return jsonify({'error': 'client_not_found'}), 404
files = await request.files
form = await request.form
categories, err = await _parse_uploaded_xlsx(files, form)
if err:
return jsonify({'error': err}), 400
await save_dropdowns(categories, client_id=client_id)
await set_client_custom_dropdowns(client_id, True)
active_count = sum(1 for c in categories if c['status'] == 'Active')
return jsonify({'success': True, 'total': len(categories), 'active': active_count,
'archived': len(categories) - active_count})
@admin_bp.route('/clients/<client_id>/dropdowns/preview', methods=['POST'])
@admin_required
async def preview_client_dropdowns(client_id: str):
files = await request.files
form = await request.form
categories, err = await _parse_uploaded_xlsx(files, form)
if err:
return jsonify({'error': err}), 400
return jsonify({'categories': categories, 'total': len(categories)})
@admin_bp.route('/clients/<client_id>/dropdowns', methods=['DELETE'])
@admin_required
async def delete_client_dropdowns(client_id: str):
await save_dropdowns([], client_id=client_id)
await set_client_custom_dropdowns(client_id, False)
return jsonify({'success': True})
# ── Export template endpoints ──────────────────────────────────────────────────
@admin_bp.route('/export-template', methods=['GET'])
@admin_required
async def get_global_export_template():
template = await load_export_template()
return jsonify({'template': template, 'fields': INTERNAL_FIELDS})
@admin_bp.route('/export-template/detect', methods=['POST'])
@admin_required
async def detect_global_export_template():
files = await request.files
file = files.get('file')
if not file:
return jsonify({'error': 'no_file'}), 400
if not (file.filename or '').lower().endswith('.csv'):
return jsonify({'error': 'Only .csv files accepted'}), 400
try:
result = detect_csv_template(file.read())
result['fields'] = INTERNAL_FIELDS
return jsonify(result)
except Exception as e:
return jsonify({'error': str(e)}), 400
@admin_bp.route('/export-template', methods=['POST'])
@admin_required
async def save_global_export_template():
body = await request.get_json() or {}
template = body.get('template')
if not template or not isinstance(template, list):
return jsonify({'error': 'invalid_template'}), 400
await save_export_template(template)
return jsonify({'success': True, 'columns': len(template)})
@admin_bp.route('/export-template', methods=['DELETE'])
@admin_required
async def delete_global_export_template():
await delete_export_template()
return jsonify({'success': True})
@admin_bp.route('/clients/<client_id>/export-template', methods=['GET'])
@admin_required
async def get_client_export_template(client_id: str):
if not await get_client_by_id(client_id):
return jsonify({'error': 'client_not_found'}), 404
has_custom = await has_export_template(client_id=client_id)
template = await load_export_template(client_id=client_id)
return jsonify({'template': template, 'hasCustomTemplate': has_custom, 'fields': INTERNAL_FIELDS})
@admin_bp.route('/clients/<client_id>/export-template/detect', methods=['POST'])
@admin_required
async def detect_client_export_template(client_id: str):
files = await request.files
file = files.get('file')
if not file:
return jsonify({'error': 'no_file'}), 400
if not (file.filename or '').lower().endswith('.csv'):
return jsonify({'error': 'Only .csv files accepted'}), 400
try:
result = detect_csv_template(file.read())
result['fields'] = INTERNAL_FIELDS
return jsonify(result)
except Exception as e:
return jsonify({'error': str(e)}), 400
@admin_bp.route('/clients/<client_id>/export-template', methods=['POST'])
@admin_required
async def save_client_export_template(client_id: str):
if not await get_client_by_id(client_id):
return jsonify({'error': 'client_not_found'}), 404
body = await request.get_json() or {}
template = body.get('template')
if not template or not isinstance(template, list):
return jsonify({'error': 'invalid_template'}), 400
await save_export_template(template, client_id=client_id)
return jsonify({'success': True, 'columns': len(template)})
@admin_bp.route('/clients/<client_id>/export-template', methods=['DELETE'])
@admin_required
async def delete_client_export_template(client_id: str):
await delete_export_template(client_id=client_id)
return jsonify({'success': True})

View file

@ -0,0 +1,186 @@
"""
AI command API processes natural language commands against a sheet.
"""
import json
import logging
import os
import re
import aiohttp
from datetime import date
from quart import Blueprint, jsonify, request
from ..auth.middleware import auth_required, get_user_id
from ..sheets.manager import load_sheet_data, update_sheet, generate_next_id, get_sheet_client_id
from ..api.dropdowns import _load_dropdowns
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
ai_bp = Blueprint('ai', __name__, url_prefix='/api/sheets')
SPEECH_CORRECTIONS = {
'delivery balls': 'deliverables',
'delivery ball': 'deliverable',
'delivery': 'deliverables',
'liver': 'deliverables',
'rose': 'rows',
'oh oh h': 'OOH',
'out of home': 'OOH',
}
NUMBER_WORDS = {
'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5',
'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10',
'eleven': '11', 'twelve': '12', 'twenty': '20', 'thirty': '30',
}
_PROMPT_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'prompts', 'ac_command.txt')
def _load_prompt_template() -> str:
try:
with open(_PROMPT_PATH, 'r') as f:
return f.read()
except Exception:
return ""
def _preprocess(command: str) -> str:
cmd = command.lower()
for wrong, right in SPEECH_CORRECTIONS.items():
cmd = cmd.replace(wrong, right)
for word, digit in NUMBER_WORDS.items():
cmd = re.sub(r'\b' + word + r'\b', digit, cmd)
return cmd
async def _build_hierarchy_rules(client_id: str = None) -> str:
categories = await _load_dropdowns(client_id)
lines = []
for cat in categories:
if cat.get('status') != 'Active':
continue
media_str = ', '.join(cat.get('mediaTypes', []))
lines.append(f"- {cat['name']}: {media_str}")
return '\n'.join(lines)
async def _call_gemini(prompt: str) -> dict:
api_key = server_config.GEMINI_API_KEY
model = server_config.GEMINI_MODEL
url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
payload = {"contents": [{"parts": [{"text": prompt}]}]}
async with aiohttp.ClientSession() as session:
async with session.post(url, json=payload) as resp:
return await resp.json()
def _extract_json(text: str) -> dict:
start = text.find('{')
end = text.rfind('}')
if start == -1 or end == -1:
raise ValueError("No JSON object found in response")
return json.loads(text[start:end + 1])
@ai_bp.route('/<sheet_id>/command', methods=['POST'])
@auth_required
async def run_command(sheet_id: str):
user_id = get_user_id()
body = await request.get_json() or {}
raw_command = body.get('command', '').strip()
yolo_mode = bool(body.get('yolo_mode', False))
history = body.get('history', '')
if not raw_command:
return jsonify({'error': 'empty_command'}), 400
data = await load_sheet_data(user_id, sheet_id)
if data is None:
return jsonify({'error': 'sheet_not_found'}), 404
command = _preprocess(raw_command)
template = _load_prompt_template()
client_id = await get_sheet_client_id(user_id, sheet_id)
hierarchy = await _build_hierarchy_rules(client_id)
prompt = template.format(
current_date=date.today().isoformat(),
yolo_mode='TRUE' if yolo_mode else 'FALSE',
conversation_history=history or '(none)',
data_context=json.dumps(data),
hierarchy_rules=hierarchy,
command=command,
)
try:
gemini_resp = await _call_gemini(prompt)
except Exception as e:
logger.error(f"Gemini API error: {e}")
return jsonify({'error': 'ai_error', 'message': str(e)}), 502
if 'error' in gemini_resp:
msg = gemini_resp['error'].get('message', 'Unknown error')
return jsonify({'error': 'gemini_error', 'message': msg}), 502
llm_text = (
gemini_resp.get('candidates', [{}])[0]
.get('content', {})
.get('parts', [{}])[0]
.get('text', '')
)
if not llm_text:
return jsonify({'error': 'empty_ai_response'}), 502
try:
action = _extract_json(llm_text)
except Exception:
return jsonify({'error': 'invalid_ai_json', 'debug_llm': llm_text}), 502
operation = action.get('operation')
if operation == 'create':
items = action.get('items', [])
for item in items:
item['Number'] = generate_next_id(data)
item.setdefault('Status', 'Booked')
item.setdefault('Quantity', 1)
data.append(item)
await update_sheet(user_id, sheet_id, data)
return jsonify({'success': True, 'operation': 'create', 'count': len(items), 'data': data})
elif operation == 'update':
values = action.get('values', {})
target_ids = action.get('target_ids', [])
count = 0
for row in data:
if not target_ids or row.get('Number') in target_ids:
row.update(values)
count += 1
await update_sheet(user_id, sheet_id, data)
return jsonify({'success': True, 'operation': 'update', 'count': count, 'data': data})
elif operation == 'batch_update':
updates = action.get('updates', [])
count = 0
for upd in updates:
num = upd.get('Number')
vals = upd.get('values', {})
for row in data:
if row.get('Number') == num:
row.update(vals)
count += 1
break
await update_sheet(user_id, sheet_id, data)
return jsonify({'success': True, 'operation': 'batch_update', 'count': count, 'data': data})
elif operation == 'question':
return jsonify({'success': True, 'operation': 'question', 'question': action.get('text', '')})
return jsonify({'error': 'unknown_operation', 'operation': operation}), 400

View file

@ -0,0 +1,82 @@
"""
Auth API endpoints.
"""
import logging
from quart import Blueprint, jsonify, request
from ..auth.msal_auth import msal_auth
from ..auth.middleware import auth_required, get_current_user
from ..auth.user_store import upsert_user, get_user
logger = logging.getLogger(__name__)
auth_bp = Blueprint('auth', __name__, url_prefix='/api/auth')
@auth_bp.route('/config', methods=['GET'])
async def get_auth_config():
return jsonify({'config': msal_auth.get_client_config(), 'devMode': msal_auth.is_dev_mode()})
@auth_bp.route('/validate', methods=['POST'])
async def validate_token():
try:
data = await request.get_json()
token = (data or {}).get('accessToken')
if not token:
return jsonify({'error': 'invalid_request', 'message': 'accessToken required'}), 400
user_info = await msal_auth.validate_token(token)
if not user_info:
return jsonify({'valid': False, 'error': 'invalid_token'}), 401
stored = await upsert_user(user_info['oid'], user_info.get('preferred_username', ''), user_info.get('name', ''))
return jsonify({
'valid': True,
'user': {
'id': user_info['oid'],
'email': user_info.get('preferred_username'),
'name': user_info.get('name'),
'role': stored.get('role', 'user'),
},
})
except Exception as e:
logger.error(f"Token validation error: {e}")
return jsonify({'error': 'validation_error'}), 500
@auth_bp.route('/me', methods=['GET'])
@auth_required
async def me():
"""Return current user profile including role."""
user = await get_current_user()
stored = await get_user(user['oid']) or {}
return jsonify({
'id': user['oid'],
'email': user.get('preferred_username'),
'name': user.get('name'),
'role': user.get('role', 'user'),
'active': stored.get('active', True),
'created': stored.get('created'),
'last_seen': stored.get('last_seen'),
})
@auth_bp.route('/user', methods=['GET'])
@auth_required
async def get_current_user_info():
user = await get_current_user()
return jsonify({'user': {
'id': user['oid'],
'username': user.get('preferred_username'),
'name': user.get('name'),
'role': user.get('role', 'user'),
}})
@auth_bp.route('/logout', methods=['POST'])
async def logout():
data = await request.get_json() or {}
logout_url = await msal_auth.get_logout_url(data.get('redirectUri'))
return jsonify({'logoutUrl': logout_url})

View file

@ -0,0 +1,101 @@
"""
Client management API PostgreSQL-backed.
"""
import logging
import time
import random
from datetime import datetime, timezone
from quart import Blueprint, jsonify, request
from ..auth.middleware import auth_required, admin_required
from ..db.pool import get_pool
logger = logging.getLogger(__name__)
clients_bp = Blueprint('clients', __name__, url_prefix='/api/clients')
async def load_clients() -> list:
pool = get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch('SELECT * FROM clients ORDER BY name')
return [_row_to_dict(r) for r in rows]
async def get_client_by_id(client_id: str) -> dict | None:
pool = get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow('SELECT * FROM clients WHERE id = $1', client_id)
return _row_to_dict(row) if row else None
async def set_client_custom_dropdowns(client_id: str, value: bool):
pool = get_pool()
async with pool.acquire() as conn:
await conn.execute(
'UPDATE clients SET has_custom_dropdowns = $2 WHERE id = $1',
client_id, value
)
def _row_to_dict(row) -> dict:
return {
'id': row['id'],
'name': row['name'],
'hasCustomDropdowns': row['has_custom_dropdowns'],
'created': row['created_at'].isoformat() if row['created_at'] else None,
}
@clients_bp.route('', methods=['GET'])
@auth_required
async def list_clients():
return jsonify({'clients': await load_clients()})
@clients_bp.route('', methods=['POST'])
@admin_required
async def create_client():
body = await request.get_json() or {}
name = body.get('name', '').strip()
if not name:
return jsonify({'error': 'name_required', 'message': 'Client name is required'}), 400
client_id = f"client_{int(time.time())}{random.randint(100, 999)}"
pool = get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow('''
INSERT INTO clients (id, name, has_custom_dropdowns)
VALUES ($1, $2, FALSE)
RETURNING *
''', client_id, name)
return jsonify({'client': _row_to_dict(row)}), 201
@clients_bp.route('/<client_id>', methods=['DELETE'])
@admin_required
async def delete_client(client_id: str):
pool = get_pool()
async with pool.acquire() as conn:
# Cascades to dropdown_categories via FK; export templates by scope
await conn.execute('DELETE FROM clients WHERE id = $1', client_id)
await conn.execute("DELETE FROM export_templates WHERE scope = $1", f'client:{client_id}')
return jsonify({'success': True})
@clients_bp.route('/<client_id>', methods=['PATCH'])
@admin_required
async def update_client(client_id: str):
body = await request.get_json() or {}
pool = get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow('SELECT * FROM clients WHERE id = $1', client_id)
if not row:
return jsonify({'error': 'not_found'}), 404
name = body.get('name', row['name']).strip() or row['name']
row = await conn.fetchrow(
'UPDATE clients SET name = $2 WHERE id = $1 RETURNING *', client_id, name
)
return jsonify({'client': _row_to_dict(row)})

273
backend/server/api/config.py Executable file
View file

@ -0,0 +1,273 @@
"""
Configuration API endpoints for model selection and system settings
"""
import logging
from quart import Blueprint, jsonify, request, g
from ..auth.middleware import dev_mode_bypass, get_user_id
from ..jobs.models import ModelConfiguration
from ..jobs.manager import JobManager
logger = logging.getLogger(__name__)
config_bp = Blueprint('config', __name__, url_prefix='/api/config')
@config_bp.route('/models', methods=['GET'])
@dev_mode_bypass
async def get_available_models():
"""
Get list of available models with pricing and capabilities
Returns:
List of available models with metadata
"""
try:
models = JobManager.get_available_models()
return jsonify({
'models': [model.to_dict() for model in models]
})
except Exception as e:
logger.error(f"Failed to get available models: {e}")
return jsonify({
'error': 'configuration_error',
'message': 'Failed to retrieve available models'
}), 500
@config_bp.route('/defaults', methods=['GET'])
@dev_mode_bypass
async def get_default_config():
"""
Get default model configuration
Returns:
Default model configuration settings
"""
try:
default_config = JobManager.get_default_model_config()
return jsonify({
'config': default_config.to_dict()
})
except Exception as e:
logger.error(f"Failed to get default config: {e}")
return jsonify({
'error': 'configuration_error',
'message': 'Failed to retrieve default configuration'
}), 500
@config_bp.route('/estimate', methods=['POST'])
@dev_mode_bypass
async def estimate_processing_cost():
"""
Estimate processing cost for given models and file size
Expects:
{
"modelConfig": {
"primaryModels": ["model1", "model2"],
"consolidationModel": "model3"
},
"fileSizeBytes": 12345,
"estimatedTokens": 10000
}
Returns:
Cost breakdown by model and total estimated cost
"""
try:
data = await request.get_json()
if not data:
return jsonify({
'error': 'invalid_request',
'message': 'Request body required'
}), 400
model_config_data = data.get('modelConfig', {})
file_size = data.get('fileSizeBytes', 0)
estimated_tokens = data.get('estimatedTokens')
# If no token estimate provided, estimate based on file size
if not estimated_tokens:
# Rough heuristic: 4 characters per token, with document structure overhead
estimated_tokens = min(file_size // 3, 100000) # Cap at 100k tokens
# Parse model configuration
try:
model_config = ModelConfiguration.from_dict(model_config_data)
except Exception as e:
return jsonify({
'error': 'invalid_model_config',
'message': f'Invalid model configuration: {e}'
}), 400
# Get all models to estimate
all_models = model_config.primary_models + [model_config.consolidation_model]
# Estimate cost using provider manager
from ..jobs.manager import JobManager
job_manager = JobManager.get_instance()
cost_breakdown = job_manager.provider_manager.estimate_total_cost(
model_keys=all_models,
estimated_input_tokens=estimated_tokens,
estimated_output_tokens=estimated_tokens // 2 # Assume 50% of input as output
)
# Separate primary and consolidation costs
primary_cost = sum(
cost_breakdown.get(model, 0) for model in model_config.primary_models
)
consolidation_cost = cost_breakdown.get(model_config.consolidation_model, 0)
return jsonify({
'estimatedTokens': estimated_tokens,
'costBreakdown': {
'primaryModels': {
model: cost_breakdown.get(model, 0)
for model in model_config.primary_models
},
'consolidationModel': {
model_config.consolidation_model: consolidation_cost
},
'primaryTotal': primary_cost,
'consolidationTotal': consolidation_cost,
'grandTotal': cost_breakdown.get('total', 0)
}
})
except Exception as e:
logger.error(f"Cost estimation error: {e}")
return jsonify({
'error': 'estimation_error',
'message': 'Failed to estimate processing cost'
}), 500
@config_bp.route('/validate', methods=['POST'])
@dev_mode_bypass
async def validate_model_config():
"""
Validate a model configuration
Expects:
{
"modelConfig": {
"primaryModels": ["model1", "model2"],
"consolidationModel": "model3",
"minimumSuccessThreshold": 1
}
}
Returns:
Validation result with any warnings or errors
"""
try:
data = await request.get_json()
if not data:
return jsonify({
'error': 'invalid_request',
'message': 'Request body required'
}), 400
model_config_data = data.get('modelConfig', {})
try:
model_config = ModelConfiguration.from_dict(model_config_data)
except Exception as e:
return jsonify({
'valid': False,
'error': f'Invalid model configuration: {e}'
}), 400
# Validate models exist
available_models = [model.key for model in JobManager.get_available_models()]
warnings = []
errors = []
# Check primary models
for model in model_config.primary_models:
if model not in available_models:
errors.append(f"Primary model '{model}' is not available")
# Check consolidation model
if model_config.consolidation_model not in available_models:
errors.append(f"Consolidation model '{model_config.consolidation_model}' is not available")
# Check minimum success threshold
if model_config.minimum_success_threshold > len(model_config.primary_models):
warnings.append(
f"Minimum success threshold ({model_config.minimum_success_threshold}) "
f"is higher than number of primary models ({len(model_config.primary_models)})"
)
# Check for duplicate models
if len(set(model_config.primary_models)) != len(model_config.primary_models):
warnings.append("Duplicate models detected in primary models list")
# Check if consolidation model is also in primary models
if model_config.consolidation_model in model_config.primary_models:
warnings.append("Consolidation model is also used as a primary model")
return jsonify({
'valid': len(errors) == 0,
'errors': errors,
'warnings': warnings,
'modelCount': {
'primary': len(model_config.primary_models),
'consolidation': 1,
'total': len(set(model_config.primary_models + [model_config.consolidation_model]))
}
})
except Exception as e:
logger.error(f"Model config validation error: {e}")
return jsonify({
'error': 'validation_error',
'message': 'Failed to validate model configuration'
}), 500
@config_bp.route('/system', methods=['GET'])
@dev_mode_bypass
async def get_system_info():
"""
Get system configuration and status information
Returns:
System information for admin/debugging purposes
"""
try:
from ..config_runtime import server_config
from ..jobs.manager import JobManager
job_manager = JobManager.get_instance()
# Get system stats
queue_size = await job_manager.get_queue_size()
active_jobs = await job_manager.get_active_jobs_count()
return jsonify({
'system': {
'devMode': server_config.DEV_MODE,
'maxConcurrentJobs': server_config.MAX_CONCURRENT_JOBS,
'maxUploadSizeMB': server_config.MAX_UPLOAD_SIZE_MB,
'fileRetentionHours': server_config.FILE_RETENTION_HOURS,
'allowedExtensions': list(server_config.ALLOWED_EXTENSIONS)
},
'queue': {
'pending': queue_size,
'active': active_jobs,
'maxConcurrent': server_config.MAX_CONCURRENT_JOBS
}
})
except Exception as e:
logger.error(f"Failed to get system info: {e}")
return jsonify({
'error': 'system_error',
'message': 'Failed to retrieve system information'
}), 500

View file

@ -0,0 +1,271 @@
"""
Dropdown data API category / media type hierarchy.
Data stored in PostgreSQL. Seeded from embedded SEED_CATEGORIES if DB is empty.
"""
import logging
from quart import Blueprint, jsonify, request
from ..db.pool import get_pool
logger = logging.getLogger(__name__)
dropdowns_bp = Blueprint('dropdowns', __name__, url_prefix='/api/dropdowns')
# Seed data embedded as fallback (from Excel Grid (1).xlsx)
SEED_CATEGORIES = [
{"name": "3D", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "A/B Testing", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Admin", "status": "Active", "mediaTypes": ["Management"]},
{"name": "Amazon page", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Animation", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "App Design", "status": "Active", "mediaTypes": ["Online advertising - .com"]},
{"name": "Artworking (Print)", "status": "Active", "mediaTypes": ["Literature", "Catalogue", "Press - Magazine", "Press - Newspaper", "POS - Print", "POS - Digital", "OOH - Print", "Direct mail - Email", "Direct mail - Print"]},
{"name": "Audio", "status": "Active", "mediaTypes": ["Broadcast - Radio"]},
{"name": "Augmented Reality", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Branday Adaptation", "status": "Active", "mediaTypes": ["Online advertising - Rich media"]},
{"name": "Branding", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "CMS", "status": "Active", "mediaTypes": ["Online advertising - .com"]},
{"name": "Campaign Print Complex", "status": "Active", "mediaTypes": ["Press - Newspaper"]},
{"name": "Campaign Print Simple", "status": "Active", "mediaTypes": ["Press - Magazine"]},
{"name": "Cinema", "status": "Active", "mediaTypes": ["Broadcast - TV", "Broadcast - Cinema", "Broadcast - Radio"]},
{"name": "Cinema Adaptation", "status": "Active", "mediaTypes": ["Broadcast - Cinema"]},
{"name": "Community Management", "status": "Active", "mediaTypes": ["Community management"]},
{"name": "Concept (Video)", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Copywriting", "status": "Active", "mediaTypes": ["Literature", "Transcreation", "Copywriting"]},
{"name": "Copywriting Newsletter", "status": "Active", "mediaTypes": ["Direct mail - Email", "Direct mail - Print"]},
{"name": "Copywriting Social", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Creative Development", "status": "Active", "mediaTypes": ["Literature", "Creative development"]},
{"name": "Creative Development Big Campaign", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Creative Development Small Campaign", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Creative Direction", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Creative Packaging Box", "status": "Active", "mediaTypes": ["Packaging - Print"]},
{"name": "DM", "status": "Active", "mediaTypes": ["Direct mail - Print"]},
{"name": "Digital Display (.com)", "status": "Active", "mediaTypes": ["Online advertising - Banner", "Online advertising - Static Image"]},
{"name": "Digital Display (Animation)", "status": "Active", "mediaTypes": ["POS - Digital", "Online advertising - Banner", "Online advertising - Rich media", "Online advertising - Push notifications", "Online advertising - .com"]},
{"name": "Digital Display (POS)", "status": "Active", "mediaTypes": ["Online advertising - Banner", "Online advertising - Static Image"]},
{"name": "Digital Display (Push Notification)", "status": "Active", "mediaTypes": ["Online advertising - Banner", "Online advertising - Static Image"]},
{"name": "Digital Display (Rich Media)", "status": "Active", "mediaTypes": ["Online advertising - Static Image"]},
{"name": "Digital Display (Static)", "status": "Active", "mediaTypes": ["Online advertising - Static Image"]},
{"name": "Display Static Adaptation Standard formats", "status": "Active", "mediaTypes": ["Online advertising - Static Image"]},
{"name": "Display Static Master Standard formats", "status": "Active", "mediaTypes": ["Online advertising - Static Image"]},
{"name": "E-commerce site", "status": "Active", "mediaTypes": ["Online advertising - .com"]},
{"name": "Email", "status": "Active", "mediaTypes": ["Direct mail - Email"]},
{"name": "Event", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Event Management", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Illustration", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Image Adaptation Social", "status": "Active", "mediaTypes": ["Social - Static Image"]},
{"name": "Image Animation", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Infographics", "status": "Active", "mediaTypes": ["Literature", "Online advertising - Banner", "Online advertising - Rich media", "Online advertising - Landing page", "Online advertising - Push notifications"]},
{"name": "Internal Comms", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Key Visual", "status": "Active", "mediaTypes": ["Literature", "Social - Static Image"]},
{"name": "Key Visual Adaptation", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Key Visual Design", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Logo creation", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Management", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Mechandise", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Non-Project Time", "status": "Active", "mediaTypes": ["Management"]},
{"name": "OOH (Digital)", "status": "Active", "mediaTypes": ["OOH - Digital"]},
{"name": "OOH (Print)", "status": "Active", "mediaTypes": ["OOH - Print"]},
{"name": "OOH Complex (Digital)", "status": "Active", "mediaTypes": ["OOH - Digital"]},
{"name": "OOH Complex (Print)", "status": "Active", "mediaTypes": ["OOH - Print"]},
{"name": "OOH Simple (Digital)", "status": "Active", "mediaTypes": ["OOH - Digital"]},
{"name": "OOH Simple (Print)", "status": "Active", "mediaTypes": ["OOH - Print"]},
{"name": "POS", "status": "Active", "mediaTypes": ["POS - Print", "POS - Digital"]},
{"name": "POS Complex", "status": "Active", "mediaTypes": ["POS - Print"]},
{"name": "POS Merchandising Complex (up to 10)", "status": "Active", "mediaTypes": ["Packaging - Print"]},
{"name": "POS Merchandising Simple (up to 5)", "status": "Active", "mediaTypes": ["Packaging - Print"]},
{"name": "POS Simple", "status": "Active", "mediaTypes": ["POS - Print"]},
{"name": "Packaging", "status": "Active", "mediaTypes": ["Packaging - Print"]},
{"name": "Packaging Box", "status": "Active", "mediaTypes": ["Packaging - Print"]},
{"name": "Paid Media", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Photography Shooting (10-20)", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Photography Shooting (20-40)", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Photography Shooting (up to 10)", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Photography Shooting Still Life", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Photoshoot", "status": "Active", "mediaTypes": ["Literature", "Photography"]},
{"name": "Presentations", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Presentations Template", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Print Design", "status": "Active", "mediaTypes": ["Literature", "Catalogue", "Press - Magazine", "Press - Newspaper", "POS - Print", "OOH - Print", "Direct mail - Print"]},
{"name": "Production", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Production (Post)", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Production (Pre)", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Programmatic", "status": "Active", "mediaTypes": ["Online advertising - Rich media"]},
{"name": "Project Management", "status": "Active", "mediaTypes": ["Management"]},
{"name": "Retouching", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Retouching Complex", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Retouching Simple", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "SEM", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "SEO", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Scoping", "status": "Active", "mediaTypes": ["Management"]},
{"name": "Seedtag Banner Adaptation", "status": "Active", "mediaTypes": ["Online advertising - Rich media"]},
{"name": "Sell Sheet", "status": "Active", "mediaTypes": ["Literature", "Catalogue", "Direct mail - Print"]},
{"name": "Signage", "status": "Active", "mediaTypes": ["POS - Print"]},
{"name": "Single Website Page Design", "status": "Active", "mediaTypes": ["Online advertising - Landing page"]},
{"name": "Skin Adaptation", "status": "Active", "mediaTypes": ["Online advertising - Rich media"]},
{"name": "Social (Animation)", "status": "Active", "mediaTypes": ["Social - Gif"]},
{"name": "Social (Static)", "status": "Active", "mediaTypes": ["Social - Static Image"]},
{"name": "Social (Video)", "status": "Active", "mediaTypes": ["Social - Video"]},
{"name": "Social Carousel (up to 5 images)", "status": "Active", "mediaTypes": ["Social - Static Image"]},
{"name": "Social Reporting", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Social Twitter Thread", "status": "Active", "mediaTypes": ["Social - Static Image"]},
{"name": "Sound", "status": "Active", "mediaTypes": ["Broadcast - Radio"]},
{"name": "Sound Editing", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Storyboarding", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Strategy", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Subtitling", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "TVC", "status": "Active", "mediaTypes": ["Broadcast - TV"]},
{"name": "Transcreation", "status": "Active", "mediaTypes": ["Transcreation"]},
{"name": "Typography", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Video (Edit)", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video (Shoot)", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 10s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 15s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 20s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 30s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 5s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Adaptation 60s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing 15s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing 1m", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing 20s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing 45s", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing Event", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Editing Stock Images", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Video Recording", "status": "Active", "mediaTypes": ["Online advertising - Video"]},
{"name": "Virtual Reality", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Voice Over", "status": "Active", "mediaTypes": ["Broadcast - Radio"]},
{"name": "Web", "status": "Active", "mediaTypes": ["Online advertising - Landing page"]},
{"name": "Web Analytics", "status": "Active", "mediaTypes": ["Literature"]},
{"name": "Web UI & UX", "status": "Active", "mediaTypes": ["Online advertising - .com"]},
{"name": "Website Design", "status": "Active", "mediaTypes": ["Online advertising - .com"]},
]
async def _load_dropdowns(client_id: str = None) -> list:
"""
Load categories from DB.
If client_id is given, tries per-client rows first, falls back to global.
"""
pool = get_pool()
async with pool.acquire() as conn:
if client_id:
rows = await conn.fetch(
'SELECT name, status, media_types FROM dropdown_categories WHERE client_id = $1 ORDER BY name',
client_id
)
if rows:
return [_row_to_cat(r) for r in rows]
# Global (client_id IS NULL)
rows = await conn.fetch(
'SELECT name, status, media_types FROM dropdown_categories WHERE client_id IS NULL ORDER BY name'
)
return [_row_to_cat(r) for r in rows]
async def save_dropdowns(categories: list, client_id: str = None):
"""Replace all categories for the given scope (global if client_id is None)."""
pool = get_pool()
async with pool.acquire() as conn:
async with conn.transaction():
if client_id:
await conn.execute(
'DELETE FROM dropdown_categories WHERE client_id = $1', client_id
)
else:
await conn.execute(
'DELETE FROM dropdown_categories WHERE client_id IS NULL'
)
for cat in categories:
await conn.execute('''
INSERT INTO dropdown_categories (client_id, name, status, media_types)
VALUES ($1, $2, $3, $4)
''', client_id, cat['name'], cat.get('status', 'Active'), cat.get('mediaTypes', []))
def _row_to_cat(row) -> dict:
return {
'name': row['name'],
'status': row['status'],
'mediaTypes': row['media_types'] if row['media_types'] else [],
}
# ── Sync helpers (file parsing — no DB involved) ────────────────────────────
def detect_excel_mapping(file_bytes: bytes) -> dict:
"""
Read the first row of an .xlsx and auto-detect column mapping.
Returns: {headers, mapping: {name_col, status_col, media_col}, sample}
"""
import openpyxl
from io import BytesIO
wb = openpyxl.load_workbook(BytesIO(file_bytes))
ws = wb.active
header_row = [str(c.value or '').strip() for c in next(ws.iter_rows(min_row=1, max_row=1))]
def _find(keywords, headers):
for i, h in enumerate(headers):
hl = h.lower()
if any(k in hl for k in keywords):
return i
return None
name_col = _find(['category', 'name', 'task', 'deliverable'], header_row) or 0
status_col = _find(['status', 'active', 'archived'], header_row) or 4
media_col = _find(['media', 'type', 'channel'], header_row) or 6
mapping = {'name_col': name_col, 'status_col': status_col, 'media_col': media_col}
sample = []
for row in ws.iter_rows(min_row=2, max_row=6, values_only=True):
if len(row) <= name_col or not row[name_col]:
continue
name = str(row[name_col]).strip()
status_raw = str(row[status_col]).strip() if len(row) > status_col and row[status_col] else 'Active'
status = 'Active' if 'active' in status_raw.lower() else 'Archived'
media_raw = str(row[media_col]).strip() if len(row) > media_col and row[media_col] else ''
media_types = [m.strip() for m in media_raw.split(',') if m.strip()] if media_raw else []
sample.append({'name': name, 'status': status, 'mediaTypes': media_types})
return {'headers': header_row, 'mapping': mapping, 'sample': sample}
def parse_excel_dropdowns(file_bytes: bytes, mapping: dict = None) -> list:
"""Parse an .xlsx into [{name, status, mediaTypes}]."""
import openpyxl
from io import BytesIO
wb = openpyxl.load_workbook(BytesIO(file_bytes))
ws = wb.active
name_col = mapping['name_col'] if mapping else 0
status_col = mapping['status_col'] if mapping else 4
media_col = mapping['media_col'] if mapping else 6
categories = []
for row in ws.iter_rows(min_row=2, values_only=True):
if len(row) <= name_col or not row[name_col]:
continue
name = str(row[name_col]).strip()
status_raw = str(row[status_col]).strip() if len(row) > status_col and row[status_col] else 'Active'
status = 'Active' if 'active' in status_raw.lower() else 'Archived'
media_raw = str(row[media_col]).strip() if len(row) > media_col and row[media_col] else ''
media_types = [m.strip() for m in media_raw.split(',') if m.strip()] if media_raw else []
categories.append({'name': name, 'status': status, 'mediaTypes': media_types})
return categories
# ── Routes ───────────────────────────────────────────────────────────────────
@dropdowns_bp.route('/categories', methods=['GET'])
async def get_categories():
client_id = request.args.get('client_id') or None
categories = await _load_dropdowns(client_id)
active_only = request.args.get('active', 'true').lower() == 'true'
if active_only:
categories = [c for c in categories if c.get('status') == 'Active']
return jsonify({'categories': categories})
@dropdowns_bp.route('/all', methods=['GET'])
async def get_all():
"""Full dropdown data including archived, for admin preview."""
client_id = request.args.get('client_id') or None
return jsonify({'categories': await _load_dropdowns(client_id)})

View file

@ -0,0 +1,245 @@
"""
CSV export Activation Calendar format.
Supports custom export templates: client > user > global > built-in default.
Template data stored in PostgreSQL export_templates table.
"""
import csv
import io
import logging
from quart import Blueprint, make_response, jsonify, request
from ..auth.middleware import auth_required, get_user_id
from ..sheets.manager import load_sheet_data, get_sheet_client_id
from ..db.pool import get_pool
logger = logging.getLogger(__name__)
export_bp = Blueprint('export', __name__, url_prefix='/api/sheets')
# Internal field names as they appear in sheet row data
INTERNAL_FIELDS = [
'Number', 'Title', 'Status', 'Category', 'Media', 'Sub-media',
'Format', 'Supply date', 'Live date', 'Language', 'Country',
'Quantity', 'Destination', 'End date', 'Special instructions',
]
# Fields cleared on export unless template explicitly maps them
_CLEAR_BY_DEFAULT = {'Number', 'Destination', 'End date', 'Special instructions'}
_DEFAULT_TEMPLATE = [
{'header': 'Number', 'field': 'Number'},
{'header': 'Title', 'field': 'Title'},
{'header': 'Status', 'field': 'Status'},
{'header': 'Category', 'field': 'Category'},
{'header': 'Media', 'field': 'Media'},
{'header': 'Sub media', 'field': 'Sub-media'},
{'header': 'Destination', 'field': 'Destination'},
{'header': 'Format', 'field': 'Format'},
{'header': 'Supply date', 'field': 'Supply date'},
{'header': 'Live date', 'field': 'Live date'},
{'header': 'End date', 'field': 'End date'},
{'header': 'Special instructions', 'field': 'Special instructions'},
{'header': 'Language', 'field': 'Language'},
{'header': 'Country', 'field': 'Country'},
{'header': 'Quantity', 'field': 'Quantity'},
]
async def load_export_template(client_id: str = None, user_id: str = None) -> list:
"""
Priority: client template user template global template built-in default.
"""
pool = get_pool()
async with pool.acquire() as conn:
if client_id:
row = await conn.fetchrow(
'SELECT columns FROM export_templates WHERE scope = $1', f'client:{client_id}'
)
if row:
return row['columns']
if user_id:
row = await conn.fetchrow(
'SELECT columns FROM export_templates WHERE scope = $1', f'user:{user_id}'
)
if row:
return row['columns']
row = await conn.fetchrow(
"SELECT columns FROM export_templates WHERE scope = 'global'"
)
if row:
return row['columns']
return _DEFAULT_TEMPLATE
async def save_export_template(template: list, client_id: str = None, user_id: str = None):
if client_id:
scope = f'client:{client_id}'
elif user_id:
scope = f'user:{user_id}'
else:
scope = 'global'
pool = get_pool()
async with pool.acquire() as conn:
await conn.execute('''
INSERT INTO export_templates (scope, columns)
VALUES ($1, $2)
ON CONFLICT (scope) DO UPDATE SET columns = $2, updated_at = NOW()
''', scope, template)
async def delete_export_template(client_id: str = None, user_id: str = None):
if client_id:
scope = f'client:{client_id}'
elif user_id:
scope = f'user:{user_id}'
else:
scope = 'global'
pool = get_pool()
async with pool.acquire() as conn:
await conn.execute('DELETE FROM export_templates WHERE scope = $1', scope)
async def has_export_template(client_id: str = None, user_id: str = None) -> bool:
if client_id:
scope = f'client:{client_id}'
elif user_id:
scope = f'user:{user_id}'
else:
scope = 'global'
pool = get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow('SELECT 1 FROM export_templates WHERE scope = $1', scope)
return row is not None
def detect_csv_template(file_bytes: bytes) -> dict:
"""Read CSV headers and auto-detect mapping to internal fields."""
text = file_bytes.decode('utf-8-sig', errors='replace')
reader = csv.reader(io.StringIO(text))
headers = [h.strip() for h in (next(reader, [])) if h.strip()]
def _match(h: str):
hl = h.lower().replace('-', ' ').replace('_', ' ')
candidates = {
'number': 'Number', 'job no': 'Number', 'job number': 'Number',
'title': 'Title', 'job title': 'Title', 'name': 'Title', 'campaign': 'Title',
'status': 'Status',
'category': 'Category', 'task': 'Category', 'deliverable': 'Category',
'media': 'Media', 'media type': 'Media', 'channel': 'Media',
'sub media': 'Sub-media', 'sub-media': 'Sub-media', 'submedia': 'Sub-media',
'format': 'Format', 'size': 'Format', 'spec': 'Format',
'supply': 'Supply date', 'supply date': 'Supply date', 'artwork': 'Supply date',
'live': 'Live date', 'live date': 'Live date', 'go live': 'Live date',
'end': 'End date', 'end date': 'End date', 'expiry': 'End date',
'language': 'Language', 'lang': 'Language',
'country': 'Country', 'market': 'Country', 'region': 'Country',
'quantity': 'Quantity', 'qty': 'Quantity', 'units': 'Quantity',
'destination': 'Destination',
'special': 'Special instructions', 'instructions': 'Special instructions', 'notes': 'Special instructions',
}
for key, field in candidates.items():
if key in hl:
return field
return None
template = [{'header': h, 'field': _match(h)} for h in headers]
return {'headers': headers, 'template': template}
def _build_csv(data: list, template: list) -> str:
headers = [col['header'] for col in template]
output = io.StringIO()
writer = csv.DictWriter(output, fieldnames=headers, extrasaction='ignore')
writer.writeheader()
for row in data:
csv_row = {}
for col in template:
field = col.get('field')
header = col['header']
if not field or field in _CLEAR_BY_DEFAULT:
csv_row[header] = ''
elif field == 'Quantity':
csv_row[header] = '1.00'
else:
csv_row[header] = row.get(field, '')
writer.writerow(csv_row)
return output.getvalue()
# ── Export endpoint ────────────────────────────────────────────────────────────
@export_bp.route('/<sheet_id>/export', methods=['GET'])
@auth_required
async def export_csv(sheet_id: str):
user_id = get_user_id()
data = await load_sheet_data(user_id, sheet_id)
if data is None:
return {'error': 'not_found'}, 404
client_id = await get_sheet_client_id(user_id, sheet_id)
template = await load_export_template(client_id=client_id, user_id=user_id)
csv_content = _build_csv(data, template)
response = await make_response(csv_content)
response.headers['Content-Type'] = 'text/csv'
response.headers['Content-Disposition'] = f'attachment; filename="activation_calendar_{sheet_id}.csv"'
return response
# ── User export template endpoints (any logged-in user) ───────────────────────
user_export_bp = Blueprint('user_export', __name__, url_prefix='/api/export')
@user_export_bp.route('/template', methods=['GET'])
@auth_required
async def get_user_template():
user_id = get_user_id()
has_custom = await has_export_template(user_id=user_id)
template = await load_export_template(user_id=user_id)
return jsonify({'template': template, 'hasCustom': has_custom, 'fields': INTERNAL_FIELDS})
@user_export_bp.route('/template/detect', methods=['POST'])
@auth_required
async def detect_user_template():
files = await request.files
file = files.get('file')
if not file:
return jsonify({'error': 'no_file'}), 400
if not (file.filename or '').lower().endswith('.csv'):
return jsonify({'error': 'Only .csv files accepted'}), 400
try:
result = detect_csv_template(file.read())
result['fields'] = INTERNAL_FIELDS
return jsonify(result)
except Exception as e:
return jsonify({'error': str(e)}), 400
@user_export_bp.route('/template', methods=['POST'])
@auth_required
async def save_user_template():
user_id = get_user_id()
body = await request.get_json() or {}
template = body.get('template')
if not template or not isinstance(template, list):
return jsonify({'error': 'invalid_template'}), 400
await save_export_template(template, user_id=user_id)
return jsonify({'success': True, 'columns': len(template)})
@user_export_bp.route('/template', methods=['DELETE'])
@auth_required
async def delete_user_template():
user_id = get_user_id()
await delete_export_template(user_id=user_id)
return jsonify({'success': True})

627
backend/server/api/jobs.py Executable file
View file

@ -0,0 +1,627 @@
"""
Jobs API endpoints for file upload and processing management
"""
import logging
import os
import zipfile
from datetime import datetime
from io import BytesIO
from quart import Blueprint, request, jsonify, send_file, g
import csv
from ..auth.middleware import dev_mode_bypass, auth_required, get_user_id
from ..jobs.models import Job, ModelConfiguration
from ..jobs.manager import JobManager
from ..ws.manager import WebSocketManager
logger = logging.getLogger(__name__)
jobs_bp = Blueprint('jobs', __name__, url_prefix='/api/jobs')
@jobs_bp.route('', methods=['POST'])
@dev_mode_bypass
async def create_jobs():
"""
Create new processing jobs from uploaded files
Accepts multipart/form-data with:
- files: One or more files to process
- modelConfig (optional): JSON string with model configuration
Returns:
List of created job objects
"""
try:
job_manager = JobManager.get_instance()
ws_manager = WebSocketManager()
user_id = get_user_id()
# Get uploaded files
files = await request.files
if not files:
return jsonify({
'error': 'no_files',
'message': 'No files provided for upload'
}), 400
logger.info(f"Received {len(files)} files for upload")
# Get model configuration from form data
form_data = await request.form
model_config_json = form_data.get('modelConfig')
model_config = None
if model_config_json:
try:
import json
model_config_data = json.loads(model_config_json)
model_config = ModelConfiguration.from_dict(model_config_data)
except Exception as e:
return jsonify({
'error': 'invalid_model_config',
'message': f'Invalid model configuration: {e}'
}), 400
created_jobs = []
errors = []
# Process each uploaded file
for field_name, file_storage in files.items():
try:
if not file_storage or not file_storage.filename:
logger.warning(f"Skipping empty file field: {field_name}")
continue
logger.info(f"Processing file: {file_storage.filename}")
# Read file data
file_data = file_storage.read()
file_size = len(file_data)
# Create job
job = await job_manager.create_job(
file_name=file_storage.filename,
file_size=file_size,
file_data=file_data,
user_id=user_id,
model_config=model_config
)
created_jobs.append(job)
logger.info(f"Created and queued job {job.id} for {file_storage.filename}")
# Broadcast job creation
await ws_manager.broadcast_to_user(user_id, {
'type': 'job.created',
'job': job.to_dict()
})
# Broadcast job accepted (when it enters the queue)
await ws_manager.broadcast_to_user(user_id, {
'type': 'job.accepted',
'jobId': job.id
})
logger.info(f"Created job {job.id} for file {file_storage.filename} (user: {user_id})")
except Exception as e:
error_msg = f"Failed to process file {file_storage.filename}: {str(e)}"
errors.append(error_msg)
logger.error(error_msg)
if not created_jobs and errors:
return jsonify({
'error': 'upload_failed',
'message': 'Failed to process any files',
'details': errors
}), 400
return jsonify({
'jobs': [job.to_dict() for job in created_jobs],
'errors': errors
})
except Exception as e:
logger.error(f"Job creation failed: {e}", exc_info=True)
return jsonify({
'error': 'server_error',
'message': 'Failed to create jobs'
}), 500
@jobs_bp.route('', methods=['GET'])
@dev_mode_bypass
async def list_jobs():
"""
List jobs for the current user
Query parameters:
- limit: Maximum number of jobs to return (default: 50, max: 100)
- offset: Number of jobs to skip (default: 0)
- status: Filter by job status (optional)
Returns:
Paginated list of jobs
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
# Parse query parameters
limit = min(int(request.args.get('limit', 50)), 100)
offset = int(request.args.get('offset', 0))
status_filter = request.args.get('status')
# Get user jobs
jobs = await job_manager.get_user_jobs(user_id, limit, offset)
# Apply status filter if provided
if status_filter:
jobs = [job for job in jobs if job.phase.value.lower() == status_filter.lower()]
return jsonify({
'jobs': [job.to_dict() for job in jobs],
'pagination': {
'limit': limit,
'offset': offset,
'count': len(jobs)
}
})
except Exception as e:
logger.error(f"Failed to list jobs: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to retrieve jobs'
}), 500
@jobs_bp.route('/<job_id>', methods=['GET'])
@dev_mode_bypass
async def get_job(job_id: str):
"""
Get details for a specific job
Returns:
Job details including progress, logs, and results
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
job = await job_manager.get_job(job_id)
if not job:
return jsonify({
'error': 'not_found',
'message': 'Job not found'
}), 404
# Check if user owns this job (skip check in dev mode)
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
return jsonify({
'error': 'forbidden',
'message': 'Access denied'
}), 403
return jsonify({
'job': job.to_dict()
})
except Exception as e:
logger.error(f"Failed to get job {job_id}: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to retrieve job'
}), 500
@jobs_bp.route('/<job_id>/download', methods=['GET'])
@dev_mode_bypass
async def download_job_result(job_id: str):
"""
Download the CSV result file for a completed job
Returns:
CSV file as download attachment
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
job = await job_manager.get_job(job_id)
if not job:
return jsonify({
'error': 'not_found',
'message': 'Job not found'
}), 404
# Check if user owns this job (skip check in dev mode)
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
return jsonify({
'error': 'forbidden',
'message': 'Access denied'
}), 403
# Check if job is completed and has output
if not job.output_path or not os.path.exists(job.output_path):
return jsonify({
'error': 'not_ready',
'message': 'Job result not available'
}), 400
# Generate download filename
base_name = os.path.splitext(job.file_name)[0]
download_filename = f"{base_name}-results.csv"
return await send_file(
job.output_path,
as_attachment=True,
attachment_filename=download_filename,
mimetype='text/csv'
)
except Exception as e:
logger.error(f"Download failed for job {job_id}: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to download result'
}), 500
@jobs_bp.route('/<job_id>/logs', methods=['GET'])
@dev_mode_bypass
async def get_job_logs(job_id: str):
"""
Get logs for a specific job
Query parameters:
- limit: Maximum number of log entries (default: 100)
- level: Filter by log level (optional)
Returns:
List of log entries
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
job = await job_manager.get_job(job_id)
if not job:
return jsonify({
'error': 'not_found',
'message': 'Job not found'
}), 404
# Check if user owns this job (skip check in dev mode)
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
return jsonify({
'error': 'forbidden',
'message': 'Access denied'
}), 403
# Parse query parameters
limit = min(int(request.args.get('limit', 100)), 1000)
level_filter = request.args.get('level')
# Get logs
logs = job.logs
# Apply level filter if provided
if level_filter:
logs = [log for log in logs if log.level.lower() == level_filter.lower()]
# Apply limit
logs = logs[-limit:] if len(logs) > limit else logs
return jsonify({
'logs': [log.to_dict() for log in logs],
'count': len(logs)
})
except Exception as e:
logger.error(f"Failed to get logs for job {job_id}: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to retrieve job logs'
}), 500
@jobs_bp.route('/<job_id>', methods=['DELETE'])
@dev_mode_bypass
async def delete_job(job_id: str):
"""
Delete a job and clean up its files
Returns:
Success confirmation
"""
try:
job_manager = JobManager.get_instance()
ws_manager = WebSocketManager()
user_id = get_user_id()
job = await job_manager.get_job(job_id)
if not job:
return jsonify({
'error': 'not_found',
'message': 'Job not found'
}), 404
# Check if user owns this job (skip check in dev mode)
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
return jsonify({
'error': 'forbidden',
'message': 'Access denied'
}), 403
# Delete job
success = await job_manager.delete_job(job_id)
if success:
# Broadcast deletion
await ws_manager.broadcast_to_user(user_id, {
'type': 'job.deleted',
'jobId': job_id
})
logger.info(f"Deleted job {job_id} (user: {user_id})")
return jsonify({
'message': 'Job deleted successfully'
})
else:
return jsonify({
'error': 'deletion_failed',
'message': 'Failed to delete job'
}), 500
except Exception as e:
logger.error(f"Failed to delete job {job_id}: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to delete job'
}), 500
@jobs_bp.route('/batch-download', methods=['POST'])
@dev_mode_bypass
async def batch_download():
"""
Download multiple job results as a ZIP file
Expects:
{
"jobIds": ["job1", "job2", "job3"]
}
Returns:
ZIP file containing CSV results
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
data = await request.get_json()
job_ids = data.get('jobIds', [])
if not job_ids:
return jsonify({
'error': 'invalid_request',
'message': 'No job IDs provided'
}), 400
# Create ZIP file in memory
zip_buffer = BytesIO()
csv_files = []
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for job_id in job_ids:
job = await job_manager.get_job(job_id)
if not job:
logger.warning(f"Job {job_id} not found for batch download")
continue
# Check if user owns this job (skip check in dev mode)
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
logger.warning(f"User {user_id} denied access to job {job_id}")
continue
# Check if job has output
if not job.output_path or not os.path.exists(job.output_path):
logger.warning(f"Job {job_id} has no output file")
continue
# Add CSV to ZIP asynchronously
base_name = os.path.splitext(job.file_name)[0]
csv_filename = f"{base_name}-{job_id[:8]}.csv"
# Read file in thread pool to avoid blocking
def _read_csv():
with open(job.output_path, 'rb') as csv_file:
return csv_file.read()
import asyncio
loop = asyncio.get_running_loop()
csv_content = await loop.run_in_executor(None, _read_csv)
zip_file.writestr(csv_filename, csv_content)
csv_files.append(csv_filename)
if not csv_files:
return jsonify({
'error': 'no_results',
'message': 'No completed jobs found for download'
}), 400
zip_buffer.seek(0)
# Generate download filename
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
zip_filename = f"batch_results_{timestamp}.zip"
return await send_file(
zip_buffer,
as_attachment=True,
attachment_filename=zip_filename,
mimetype='application/zip'
)
except Exception as e:
logger.error(f"Batch download failed: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to create batch download'
}), 500
@jobs_bp.route('/cleanup', methods=['POST'])
@dev_mode_bypass
async def cleanup_expired():
"""
Manually trigger cleanup of expired jobs and files
(Admin/maintenance endpoint)
Returns:
Number of items cleaned up
"""
try:
job_manager = JobManager.get_instance()
# Perform cleanup
cleaned_count = await job_manager.cleanup_expired_jobs()
logger.info(f"Manual cleanup completed: {cleaned_count} items removed")
return jsonify({
'message': 'Cleanup completed',
'itemsRemoved': cleaned_count
})
except Exception as e:
logger.error(f"Cleanup failed: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to perform cleanup'
}), 500
@jobs_bp.route('/stats', methods=['GET'])
@dev_mode_bypass
async def get_job_stats():
"""
Get job processing statistics for the current user
Returns:
Statistics about job processing
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
# Get all user jobs
all_jobs = await job_manager.get_user_jobs(user_id, limit=1000)
# Calculate statistics
total_jobs = len(all_jobs)
completed_jobs = len([j for j in all_jobs if j.phase.value == 'COMPLETED'])
failed_jobs = len([j for j in all_jobs if j.phase.value == 'FAILED'])
active_jobs = len([j for j in all_jobs if j.phase.value not in ['COMPLETED', 'FAILED']])
total_assets = sum(j.summary.assets_extracted for j in all_jobs if j.summary)
total_cost = sum(j.summary.cost_usd_total for j in all_jobs if j.summary)
return jsonify({
'stats': {
'totalJobs': total_jobs,
'completedJobs': completed_jobs,
'failedJobs': failed_jobs,
'activeJobs': active_jobs,
'successRate': completed_jobs / total_jobs if total_jobs > 0 else 0,
'totalAssetsExtracted': total_assets,
'totalCostUsd': round(total_cost, 4)
}
})
except Exception as e:
logger.error(f"Failed to get job stats: {e}")
return jsonify({
'error': 'server_error',
'message': 'Failed to retrieve statistics'
}), 500
@jobs_bp.route('/<job_id>/deliverables', methods=['GET'])
@auth_required
async def get_job_deliverables(job_id: str):
"""
Return extracted deliverables from a completed job as JSON rows
ready for the Review Import flow.
Reads the output CSV and maps columns to AC Deliverable schema.
"""
try:
job_manager = JobManager.get_instance()
user_id = get_user_id()
job = await job_manager.get_job(job_id)
if not job:
return jsonify({'error': 'not_found'}), 404
from ..config_runtime import server_config
if not server_config.DEV_MODE and job.user_id != user_id:
return jsonify({'error': 'forbidden'}), 403
if not job.output_path or not os.path.exists(job.output_path):
return jsonify({'error': 'not_ready', 'message': 'Job not completed yet'}), 400
deliverables = []
with open(job.output_path, newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
# Map brief-extractor CSV columns → AC Deliverable schema
market = row.get('language_country_market', '').strip()
lang, country = ('', '')
if '-' in market:
parts = market.split('-', 1)
lang = parts[0].strip().upper()
country = parts[1].strip().upper()
elif len(market) == 2:
# Plain 2-letter code — could be language (EN, DE, FR) or country (UK, US)
# Common language codes get mapped to Language; country-only codes to Country
LANG_CODES = {'EN', 'DE', 'FR', 'NL', 'ES', 'IT', 'PT', 'PL', 'RU', 'JA', 'ZH', 'AR', 'KO'}
m = market.upper()
if m in LANG_CODES:
lang = m
else:
country = m
elif market:
# Longer value like "English" or "Dutch" — put in Language
lang = market
deliverables.append({
'Number': '',
'Title': row.get('title', ''),
'Status': row.get('status', 'Booked') or 'Booked',
'Category': row.get('category', ''),
'Media': row.get('media', ''),
'Sub-media': row.get('asset_type', ''),
'Format': row.get('technical_specifications', ''),
'Supply date': row.get('review_date', ''),
'Live date': row.get('live_date', ''),
'Language': lang,
'Country': country,
'Quantity': int(row.get('quantity', 1) or 1),
# Extra brief fields kept for review UI
'_brief_title': row.get('title', ''),
'_brand_identifier': row.get('brand_identifier', ''),
'_priority': row.get('priority_level', ''),
})
return jsonify({'deliverables': deliverables, 'count': len(deliverables)})
except Exception as e:
logger.error(f"Failed to get deliverables for job {job_id}: {e}")
return jsonify({'error': 'server_error'}), 500

View file

@ -0,0 +1,125 @@
"""
Sheet CRUD API PostgreSQL-backed.
All routes scoped to the authenticated user.
"""
import logging
from quart import Blueprint, jsonify, request
from ..auth.middleware import auth_required, get_user_id
from ..sheets.manager import (
get_user_sheets, create_sheet, load_sheet_data,
update_sheet, delete_sheet, rename_sheet, duplicate_sheet,
generate_next_id, set_sheet_client_id,
)
logger = logging.getLogger(__name__)
sheets_bp = Blueprint('sheets', __name__, url_prefix='/api/sheets')
@sheets_bp.route('', methods=['GET'])
@auth_required
async def list_sheets():
user_id = get_user_id()
sheets = await get_user_sheets(user_id)
return jsonify({'sheets': sheets})
@sheets_bp.route('', methods=['POST'])
@auth_required
async def create_new_sheet():
user_id = get_user_id()
body = await request.get_json() or {}
name = body.get('name', '')
data = body.get('data', [])
client_id = body.get('client_id', '')
sheet = await create_sheet(user_id, name, data, client_id)
return jsonify({'sheet': sheet}), 201
@sheets_bp.route('/<sheet_id>/client', methods=['PATCH'])
@auth_required
async def update_sheet_client(sheet_id: str):
user_id = get_user_id()
body = await request.get_json() or {}
client_id = body.get('client_id', '')
await set_sheet_client_id(user_id, sheet_id, client_id)
return jsonify({'success': True})
@sheets_bp.route('/<sheet_id>', methods=['GET'])
@auth_required
async def get_sheet(sheet_id: str):
user_id = get_user_id()
data = await load_sheet_data(user_id, sheet_id)
if data is None:
return jsonify({'error': 'not_found'}), 404
return jsonify({'data': data})
@sheets_bp.route('/<sheet_id>', methods=['PUT'])
@auth_required
async def update_sheet_data(sheet_id: str):
user_id = get_user_id()
body = await request.get_json() or {}
data = body.get('data', [])
await update_sheet(user_id, sheet_id, data)
return jsonify({'success': True})
@sheets_bp.route('/<sheet_id>', methods=['DELETE'])
@auth_required
async def delete_sheet_route(sheet_id: str):
user_id = get_user_id()
await delete_sheet(user_id, sheet_id)
return jsonify({'success': True})
@sheets_bp.route('/<sheet_id>', methods=['PATCH'])
@auth_required
async def rename_sheet_route(sheet_id: str):
user_id = get_user_id()
body = await request.get_json() or {}
name = body.get('name', '')
success = await rename_sheet(user_id, sheet_id, name)
if not success:
return jsonify({'error': 'not_found'}), 404
return jsonify({'success': True})
@sheets_bp.route('/<sheet_id>/duplicate', methods=['POST'])
@auth_required
async def duplicate_sheet_route(sheet_id: str):
user_id = get_user_id()
sheet = await duplicate_sheet(user_id, sheet_id)
if sheet is None:
return jsonify({'error': 'not_found'}), 404
return jsonify({'sheet': sheet}), 201
@sheets_bp.route('/<sheet_id>/import', methods=['POST'])
@auth_required
async def import_deliverables(sheet_id: str):
user_id = get_user_id()
body = await request.get_json() or {}
incoming = body.get('deliverables', [])
mode = body.get('mode', 'append')
existing = await load_sheet_data(user_id, sheet_id)
if existing is None:
return jsonify({'error': 'not_found'}), 404
base = [] if mode == 'replace' else list(existing)
for row in incoming:
row['Number'] = generate_next_id(base)
row.setdefault('Status', 'Booked')
row.setdefault('Quantity', 1)
for k in list(row.keys()):
if k.startswith('_'):
del row[k]
base.append(row)
await update_sheet(user_id, sheet_id, base)
return jsonify({'success': True, 'imported': len(incoming), 'total': len(base)})

234
backend/server/app.py Normal file
View file

@ -0,0 +1,234 @@
"""
Main Quart application AC Tool (AC Helper + Brief Extractor unified)
"""
import asyncio
import json
import logging
import os
import signal
from datetime import datetime
from typing import List
from quart import Quart, websocket, jsonify
from quart_cors import cors
import structlog
from .config_runtime import server_config
from .auth import msal_auth
from .jobs import JobManager
from .ws import ws_manager
from .runners.job_runner import start_background_workers, stop_background_workers
from .db import init_pool, close_pool
# API blueprints
from .api.auth import auth_bp
from .api.jobs import jobs_bp
from .api.config import config_bp
from .api.sheets import sheets_bp
from .api.export import export_bp, user_export_bp
from .api.ai_command import ai_bp
from .api.dropdowns import dropdowns_bp
from .api.admin import admin_bp
from .api.clients import clients_bp
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="ISO"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.JSONRenderer(),
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
logger = structlog.get_logger(__name__)
background_workers: List[asyncio.Task] = []
def create_app() -> Quart:
app = Quart(__name__)
cors_cfg = server_config.get_cors_config()
cors(app, **cors_cfg)
app.config.update({
'MAX_CONTENT_LENGTH': server_config.MAX_CONTENT_LENGTH,
'SECRET_KEY': server_config.SESSION_SECRET,
})
server_config.ensure_directories()
job_manager = JobManager.get_instance()
# Register blueprints
for bp in [auth_bp, jobs_bp, config_bp, sheets_bp, export_bp, user_export_bp, ai_bp, dropdowns_bp, admin_bp, clients_bp]:
app.register_blueprint(bp)
# Serve React SPA static files (built by Vite into /app/frontend/dist)
_register_spa(app)
@app.before_serving
async def startup():
logger.info("Starting AC Tool server...")
# Connect to PostgreSQL and apply schema
await init_pool(server_config.DATABASE_URL)
await _apply_schema()
await _seed_dropdowns_if_needed()
await ws_manager.start_background_tasks()
global background_workers
background_workers = await start_background_workers(
job_manager, ws_manager, num_workers=server_config.MAX_CONCURRENT_JOBS
)
background_workers.append(asyncio.create_task(periodic_cleanup(job_manager)))
logger.info("Server started", dev_mode=server_config.DEV_MODE)
@app.after_serving
async def shutdown():
logger.info("Shutting down AC Tool server...")
global background_workers
if background_workers:
await stop_background_workers(background_workers)
await ws_manager.stop_background_tasks()
await close_pool()
@app.route('/health')
async def health():
queue_size = await job_manager.get_queue_size()
active_jobs = await job_manager.get_active_jobs_count()
ws_stats = await ws_manager.get_connection_stats()
return jsonify({
'status': 'healthy',
'timestamp': datetime.utcnow().isoformat(),
'queue': {'pending': queue_size, 'active': active_jobs},
'websockets': ws_stats,
'devMode': server_config.DEV_MODE,
})
@app.websocket('/ws')
async def websocket_handler():
client = None
try:
if server_config.DEV_MODE:
user_id = server_config.DEV_USER_ID
else:
user_id = None
token = websocket.args.get('token') or (websocket.headers.get('Authorization', '')[7:])
if token:
from .auth.msal_auth import msal_auth as _msal
info = await _msal.validate_token(token)
if info:
user_id = info['oid']
if not user_id:
await websocket.send(json.dumps({'error': 'unauthorized'}))
return
client = await ws_manager.register_client(user_id)
jobs_data = job_manager.serialize_all()
await ws_manager.send_queue_snapshot(client, jobs_data)
while True:
try:
msg = await websocket.receive()
if msg:
data = json.loads(msg)
if data.get('type') == 'ping':
await client.send({'type': 'pong'})
except Exception:
break
except Exception as e:
logger.error(f"WebSocket error: {e}")
finally:
if client:
await ws_manager.unregister_client(client.client_id)
# Error handlers
@app.errorhandler(401)
async def unauthorized(e):
return jsonify({'error': 'unauthorized'}), 401
@app.errorhandler(403)
async def forbidden(e):
return jsonify({'error': 'forbidden'}), 403
@app.errorhandler(404)
async def not_found(e):
return jsonify({'error': 'not_found'}), 404
@app.errorhandler(413)
async def too_large(e):
return jsonify({'error': 'file_too_large', 'message': f'Max {server_config.MAX_UPLOAD_SIZE_MB}MB'}), 413
@app.errorhandler(500)
async def internal(e):
return jsonify({'error': 'internal_error'}), 500
return app
async def _apply_schema():
"""Create tables if they don't exist (idempotent)."""
from .db.pool import get_pool
schema_path = os.path.join(os.path.dirname(__file__), 'db', 'schema.sql')
with open(schema_path, 'r') as f:
sql = f.read()
pool = get_pool()
async with pool.acquire() as conn:
await conn.execute(sql)
logger.info("Database schema applied")
async def _seed_dropdowns_if_needed():
"""Seed global dropdown categories if the DB table is empty."""
from .db.pool import get_pool
pool = get_pool()
async with pool.acquire() as conn:
count = await conn.fetchval(
'SELECT COUNT(*) FROM dropdown_categories WHERE client_id IS NULL'
)
if count == 0:
from .api.dropdowns import SEED_CATEGORIES, save_dropdowns
await save_dropdowns(SEED_CATEGORIES)
logger.info(f"Seeded {len(SEED_CATEGORIES)} global dropdown categories")
def _register_spa(app: Quart):
"""Serve the Vite-built React frontend for all non-API routes."""
import os
from quart import send_from_directory, send_file
dist = os.environ.get('FRONTEND_DIST', os.path.join(os.path.dirname(os.path.dirname(__file__)), '..', 'frontend', 'dist'))
dist = os.path.abspath(dist)
if not os.path.isdir(dist):
logger.warning(f"Frontend dist not found at {dist} — API-only mode")
return
@app.route('/', defaults={'path': ''})
@app.route('/<path:path>')
async def serve_spa(path):
full = os.path.join(dist, path)
if path and os.path.isfile(full):
return await send_from_directory(dist, path)
return await send_from_directory(dist, 'index.html')
async def periodic_cleanup(job_manager: JobManager):
while True:
try:
await asyncio.sleep(3600)
cleaned = await job_manager.cleanup_expired_jobs()
if cleaned:
logger.info(f"Periodic cleanup: {cleaned} items removed")
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Cleanup error: {e}")

View file

View file

@ -0,0 +1,140 @@
"""
Authentication middleware decorators for Quart routes.
Includes @auth_required, @admin_required, @dev_mode_bypass.
"""
import logging
from functools import wraps
from typing import Optional, Dict, Any, Callable
from quart import request, jsonify, g
from .msal_auth import msal_auth
from .user_store import upsert_user, get_user
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
def _check_emergency_token(token: str) -> Optional[Dict[str, Any]]:
"""If EMERGENCY_TOKEN is configured and matches, return a synthetic admin user."""
et = server_config.EMERGENCY_TOKEN
if not et or not token:
return None
import hmac
if hmac.compare_digest(token, et):
email = server_config.EMERGENCY_USER_EMAIL
return {
'oid': f'emergency-{email}',
'preferred_username': email,
'name': server_config.EMERGENCY_USER_NAME,
}
return None
async def _extract_token_user() -> Optional[Dict[str, Any]]:
"""Extract and validate Bearer token from Authorization header or ?_token= query param."""
auth_header = request.headers.get('Authorization', '')
if auth_header.startswith('Bearer '):
token = auth_header[7:]
else:
token = request.args.get('_token', '')
if not token:
return None
emergency = _check_emergency_token(token)
if emergency:
return emergency
return await msal_auth.validate_token(token)
async def _resolve_user(token_user: Dict) -> Dict:
"""
Merge token claims with DB user store.
Creates the user record on first login; enriches token info with role.
"""
user_id = token_user['oid']
email = token_user.get('preferred_username', '')
name = token_user.get('name', '')
stored = await upsert_user(user_id, email, name)
return {**token_user, 'role': stored.get('role', 'user'), 'active': stored.get('active', True)}
def auth_required(f: Callable) -> Callable:
"""Require a valid Bearer token. Sets g.current_user."""
@wraps(f)
async def wrapper(*args, **kwargs):
if server_config.DEV_MODE:
role = server_config.DEV_USER_ROLE
g.current_user = {
'oid': server_config.DEV_USER_ID,
'preferred_username': server_config.DEV_USER_EMAIL,
'name': server_config.DEV_USER_NAME,
'role': role,
'active': True,
}
await upsert_user(
server_config.DEV_USER_ID,
server_config.DEV_USER_EMAIL,
server_config.DEV_USER_NAME,
role=role,
)
else:
token_user = await _extract_token_user()
if not token_user:
return jsonify({'error': 'unauthorized', 'message': 'Authentication required'}), 401
user = await _resolve_user(token_user)
if not user.get('active', True):
return jsonify({'error': 'forbidden', 'message': 'Account deactivated'}), 403
g.current_user = user
return await f(*args, **kwargs)
return wrapper
# Keep old name for compatibility with brief-extractor blueprints
dev_mode_bypass = auth_required
def admin_required(f: Callable) -> Callable:
"""Require admin role. Must be used after @auth_required."""
@wraps(f)
async def wrapper(*args, **kwargs):
if server_config.DEV_MODE:
role = server_config.DEV_USER_ROLE
g.current_user = {
'oid': server_config.DEV_USER_ID,
'preferred_username': server_config.DEV_USER_EMAIL,
'name': server_config.DEV_USER_NAME,
'role': role,
'active': True,
}
await upsert_user(
server_config.DEV_USER_ID,
server_config.DEV_USER_EMAIL,
server_config.DEV_USER_NAME,
role=role,
)
else:
token_user = await _extract_token_user()
if not token_user:
return jsonify({'error': 'unauthorized', 'message': 'Authentication required'}), 401
user = await _resolve_user(token_user)
if not user.get('active', True):
return jsonify({'error': 'forbidden', 'message': 'Account deactivated'}), 403
g.current_user = user
if g.current_user.get('role') != 'admin':
return jsonify({'error': 'forbidden', 'message': 'Admin access required'}), 403
return await f(*args, **kwargs)
return wrapper
def get_user_id() -> str:
user = getattr(g, 'current_user', None)
return user.get('oid', 'anonymous') if user else 'anonymous'
async def get_current_user() -> Optional[Dict[str, Any]]:
return getattr(g, 'current_user', None)

View file

@ -0,0 +1,107 @@
"""
MSAL / Azure AD token validator (SPA PKCE flow).
Backend only validates incoming Bearer JWTs no server-side MSAL client needed.
Frontend sends the MSAL idToken (aud = clientId) for user identification.
"""
import logging
from typing import Optional, Dict, Any
import jwt
from jwt import PyJWKClient
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
# JWKS client caches keys after first fetch
_jwks_client: Optional[PyJWKClient] = None
def _get_jwks_client() -> PyJWKClient:
global _jwks_client
if _jwks_client is None:
jwks_uri = (
f"https://login.microsoftonline.com/"
f"{server_config.MSAL_TENANT_ID}/discovery/v2.0/keys"
)
_jwks_client = PyJWKClient(jwks_uri, cache_keys=True)
return _jwks_client
class MSALAuthenticator:
def __init__(self):
if server_config.DEV_MODE:
logger.info("Running in DEV_MODE — MSAL authentication bypassed")
async def validate_token(self, access_token: str) -> Optional[Dict[str, Any]]:
if server_config.DEV_MODE:
return {
'oid': server_config.DEV_USER_ID,
'preferred_username': server_config.DEV_USER_EMAIL,
'name': server_config.DEV_USER_NAME,
}
if not access_token:
return None
try:
jwks_client = _get_jwks_client()
signing_key = jwks_client.get_signing_key_from_jwt(access_token)
claims = jwt.decode(
access_token,
signing_key.key,
algorithms=["RS256"],
audience=server_config.MSAL_CLIENT_ID,
issuer=f"https://login.microsoftonline.com/{server_config.MSAL_TENANT_ID}/v2.0",
)
user_id = claims.get('oid')
if not user_id:
logger.warning("Token missing 'oid' claim")
return None
return {
'oid': user_id,
'preferred_username': claims.get('preferred_username') or claims.get('upn', ''),
'name': claims.get('name', ''),
}
except jwt.ExpiredSignatureError:
logger.warning("Token expired")
return None
except jwt.InvalidTokenError as e:
logger.warning(f"Invalid JWT: {e}")
return None
except Exception as e:
logger.error(f"Token validation error: {e}", exc_info=True)
return None
async def get_logout_url(self, post_logout_redirect_uri: Optional[str] = None) -> str:
if server_config.DEV_MODE:
return post_logout_redirect_uri or 'http://localhost:5173'
base = f"{server_config.MSAL_AUTHORITY}/oauth2/v2.0/logout"
if post_logout_redirect_uri:
return f"{base}?post_logout_redirect_uri={post_logout_redirect_uri}"
return base
def get_client_config(self) -> Dict[str, Any]:
if server_config.DEV_MODE:
return {
'clientId': server_config.MSAL_CLIENT_ID,
'authority': server_config.MSAL_AUTHORITY,
'redirectUri': server_config.MSAL_REDIRECT_URI,
'devMode': True,
}
return {
'clientId': server_config.MSAL_CLIENT_ID,
'authority': server_config.MSAL_AUTHORITY,
'redirectUri': server_config.MSAL_REDIRECT_URI,
'devMode': False,
}
def is_dev_mode(self) -> bool:
return server_config.DEV_MODE
msal_auth = MSALAuthenticator()

View file

@ -0,0 +1,86 @@
"""
User store PostgreSQL-backed.
Keyed by Azure AD oid (object ID).
"""
import logging
from datetime import datetime, timezone
from typing import Dict, Optional
from ..config_runtime import server_config
from ..db.pool import get_pool
logger = logging.getLogger(__name__)
def _row_to_dict(row) -> Dict:
return {
'id': row['id'],
'email': row['email'],
'name': row['name'],
'role': row['role'],
'active': row['active'],
'created': row['created_at'].isoformat() if row['created_at'] else None,
'last_seen': row['last_seen_at'].isoformat() if row['last_seen_at'] else None,
}
async def get_user(user_id: str) -> Optional[Dict]:
pool = get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow('SELECT * FROM users WHERE id = $1', user_id)
return _row_to_dict(row) if row else None
async def upsert_user(user_id: str, email: str, name: str, role: Optional[str] = None) -> Dict:
"""
Create or update user. On first creation, grants admin if email is in ADMIN_EMAILS.
"""
pool = get_pool()
async with pool.acquire() as conn:
existing = await conn.fetchrow('SELECT * FROM users WHERE id = $1', user_id)
if existing is None:
default_role = 'admin' if email and email.lower() in server_config.ADMIN_EMAILS else 'user'
row = await conn.fetchrow('''
INSERT INTO users (id, email, name, role, active)
VALUES ($1, $2, $3, $4, TRUE)
RETURNING *
''', user_id, email or '', name or '', role or default_role)
else:
new_role = role if role is not None else existing['role']
row = await conn.fetchrow('''
UPDATE users
SET email = $2, name = $3, role = $4, last_seen_at = NOW()
WHERE id = $1
RETURNING *
''', user_id, email or existing['email'], name or existing['name'], new_role)
return _row_to_dict(row)
async def list_users() -> list:
pool = get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch('SELECT * FROM users ORDER BY last_seen_at DESC')
return [_row_to_dict(r) for r in rows]
async def set_role(user_id: str, role: str) -> Optional[Dict]:
if role not in ('user', 'admin'):
return None
pool = get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
'UPDATE users SET role = $2 WHERE id = $1 RETURNING *', user_id, role
)
return _row_to_dict(row) if row else None
async def set_active(user_id: str, active: bool) -> Optional[Dict]:
pool = get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
'UPDATE users SET active = $2 WHERE id = $1 RETURNING *', user_id, active
)
return _row_to_dict(row) if row else None

116
backend/server/config_runtime.py Executable file
View file

@ -0,0 +1,116 @@
"""
Runtime configuration for AC Tool server
"""
import os
from typing import List
from dotenv import load_dotenv
load_dotenv()
class ServerConfig:
# Server
HOST: str = os.getenv('SERVER_HOST', '0.0.0.0')
PORT: int = int(os.getenv('SERVER_PORT', '8000'))
WORKERS: int = int(os.getenv('SERVER_WORKERS', '2'))
DEBUG: bool = os.getenv('DEBUG', 'false').lower() == 'true'
# Development Mode
DEV_MODE: bool = os.getenv('DEV_MODE', 'true').lower() == 'true'
DEV_USER_ID: str = os.getenv('DEV_USER_ID', 'dev-user-id')
DEV_USER_EMAIL: str = os.getenv('DEV_USER_EMAIL', 'dev@localhost')
DEV_USER_NAME: str = os.getenv('DEV_USER_NAME', 'Dev User')
DEV_USER_ROLE: str = os.getenv('DEV_USER_ROLE', 'admin') # 'user' or 'admin'
# CORS
ALLOWED_ORIGINS: List[str] = [
origin.strip()
for origin in os.getenv(
'ALLOWED_ORIGINS',
'http://localhost:3000,http://localhost:5173,https://ai-sandbox.oliver.solutions'
).split(',')
]
# Azure AD / MSAL (SPA PKCE flow — no client secret needed)
MSAL_CLIENT_ID: str = os.getenv('MSAL_CLIENT_ID', '9079054c-9620-4757-a256-23413042f1ef')
MSAL_TENANT_ID: str = os.getenv('MSAL_TENANT_ID', 'e519c2e6-bc6d-4fdf-8d9c-923c2f002385')
MSAL_REDIRECT_URI: str = os.getenv('MSAL_REDIRECT_URI', 'https://ai-sandbox.oliver.solutions/ac-helper/')
MSAL_AUTHORITY: str = f'https://login.microsoftonline.com/{os.getenv("MSAL_TENANT_ID", "e519c2e6-bc6d-4fdf-8d9c-923c2f002385")}'
# Admin bootstrap — emails that always get admin role on first login
ADMIN_EMAIL: str = os.getenv('ADMIN_EMAIL', 'daveporter@oliver.agency')
ADMIN_EMAILS: list = [
e.strip().lower()
for e in os.getenv('ADMIN_EMAILS', 'daveporter@oliver.agency,vadymsamoilenko@oliver.agency').split(',')
if e.strip()
]
# Emergency access — set EMERGENCY_TOKEN to a long random string to allow
# token-based login bypassing SSO (useful when 2FA / Azure AD is unavailable).
# Leave blank to disable this bypass entirely.
EMERGENCY_TOKEN: str = os.getenv('EMERGENCY_TOKEN', '')
EMERGENCY_USER_EMAIL: str = os.getenv('EMERGENCY_USER_EMAIL', 'daveporter@oliver.agency')
EMERGENCY_USER_NAME: str = os.getenv('EMERGENCY_USER_NAME', 'Emergency Access')
# Security
SESSION_SECRET: str = os.getenv('SESSION_SECRET', 'change-me-in-production')
SECURE_COOKIES: bool = os.getenv('SECURE_COOKIES', 'false').lower() == 'true'
HTTPS_ONLY: bool = os.getenv('HTTPS_ONLY', 'false').lower() == 'true'
# File Upload
MAX_UPLOAD_SIZE_MB: int = int(os.getenv('MAX_UPLOAD_SIZE_MB', '200'))
MAX_CONTENT_LENGTH: int = MAX_UPLOAD_SIZE_MB * 1024 * 1024
ALLOWED_EXTENSIONS: set = {'.pdf', '.pptx', '.docx', '.xlsx', '.ppt', '.doc', '.xls'}
# Job Management
MAX_CONCURRENT_JOBS: int = int(os.getenv('MAX_CONCURRENT_JOBS', '2'))
FILE_RETENTION_HOURS: int = int(os.getenv('FILE_RETENTION_HOURS', '24'))
# WebSocket
WS_PING_INTERVAL_SECONDS: int = int(os.getenv('WS_PING_INTERVAL_SECONDS', '30'))
# AI
GEMINI_API_KEY: str = os.getenv('GEMINI_API_KEY', '')
GEMINI_MODEL: str = os.getenv('GEMINI_MODEL', 'gemini-3-flash-preview')
# PostgreSQL
DATABASE_URL: str = os.getenv('DATABASE_URL', 'postgresql://achelper:achelper@localhost:5432/achelper')
# Data paths — mounted as Docker volume
DATA_DIR: str = os.getenv(
'DATA_DIR',
os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
)
UPLOAD_DIR: str = os.path.join(DATA_DIR, 'uploads')
OUTPUT_DIR: str = os.path.join(DATA_DIR, 'outputs')
SHEETS_DIR: str = os.path.join(DATA_DIR, 'sheets')
USERS_FILE: str = os.path.join(DATA_DIR, 'users.json')
DROPDOWNS_FILE: str = os.path.join(DATA_DIR, 'dropdowns.json')
CLIENTS_FILE: str = os.path.join(DATA_DIR, 'clients.json')
CLIENTS_DROPDOWNS_DIR: str = os.path.join(DATA_DIR, 'client_dropdowns')
EXPORT_TEMPLATE_FILE: str = os.path.join(DATA_DIR, 'export_template.json')
USER_EXPORT_TEMPLATES_DIR: str = os.path.join(DATA_DIR, 'user_export_templates')
@classmethod
def ensure_directories(cls):
for d in [cls.DATA_DIR, cls.UPLOAD_DIR, cls.OUTPUT_DIR, cls.SHEETS_DIR, cls.CLIENTS_DROPDOWNS_DIR, cls.USER_EXPORT_TEMPLATES_DIR]:
os.makedirs(d, exist_ok=True)
@classmethod
def validate_auth_config(cls) -> bool:
if cls.DEV_MODE:
return True
return bool(cls.MSAL_CLIENT_ID and cls.MSAL_TENANT_ID)
@classmethod
def get_cors_config(cls) -> dict:
return {
'allow_origin': cls.ALLOWED_ORIGINS,
'allow_methods': ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'],
'allow_headers': ['Content-Type', 'Authorization', 'Accept'],
'allow_credentials': True,
}
server_config = ServerConfig()

View file

@ -0,0 +1,4 @@
# Database module — asyncpg PostgreSQL connection pool
from .pool import init_pool, close_pool, get_pool
__all__ = ['init_pool', 'close_pool', 'get_pool']

View file

@ -0,0 +1,193 @@
"""
One-time migration: import existing JSON file data into PostgreSQL.
Run inside the container:
python -m server.db.migrate_json
Or from the project root:
cd backend && python -m server.db.migrate_json
"""
import asyncio
import json
import logging
import os
import sys
from datetime import datetime, timezone
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def _parse_dt(value) -> datetime | None:
"""Parse an ISO timestamp string (or None) into a timezone-aware datetime."""
if not value:
return None
try:
dt = datetime.fromisoformat(value)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt
except (ValueError, TypeError):
return None
async def migrate():
# Import here to avoid circular issues at module level
from ..config_runtime import server_config
from ..db.pool import init_pool, get_pool
dsn = server_config.DATABASE_URL
if not dsn:
logger.error("DATABASE_URL not set — nothing to migrate")
sys.exit(1)
await init_pool(dsn)
pool = get_pool()
async with pool.acquire() as conn:
# ── Users ─────────────────────────────────────────────────────────────
users_file = server_config.USERS_FILE
if os.path.exists(users_file):
with open(users_file) as f:
users = json.load(f)
count = 0
now = datetime.now(timezone.utc)
for uid, u in users.items():
await conn.execute('''
INSERT INTO users (id, email, name, role, active, created_at, last_seen_at)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (id) DO NOTHING
''', uid, u.get('email', ''), u.get('name', ''),
u.get('role', 'user'), u.get('active', True),
_parse_dt(u.get('created')) or now,
_parse_dt(u.get('last_seen')) or now)
count += 1
logger.info(f"Migrated {count} users")
# ── Clients ────────────────────────────────────────────────────────────
clients_file = server_config.CLIENTS_FILE
if os.path.exists(clients_file):
with open(clients_file) as f:
clients = json.load(f)
now = datetime.now(timezone.utc)
for c in clients:
await conn.execute('''
INSERT INTO clients (id, name, has_custom_dropdowns, created_at)
VALUES ($1, $2, $3, $4)
ON CONFLICT (id) DO NOTHING
''', c['id'], c['name'], c.get('hasCustomDropdowns', False),
_parse_dt(c.get('created')) or now)
logger.info(f"Migrated {len(clients)} clients")
# ── Global dropdowns ───────────────────────────────────────────────────
dropdowns_file = server_config.DROPDOWNS_FILE
if os.path.exists(dropdowns_file):
with open(dropdowns_file) as f:
categories = json.load(f)
# Delete existing global and re-insert (idempotent)
await conn.execute("DELETE FROM dropdown_categories WHERE client_id IS NULL")
for cat in categories:
await conn.execute('''
INSERT INTO dropdown_categories (client_id, name, status, media_types)
VALUES (NULL, $1, $2, $3)
''', cat['name'], cat.get('status', 'Active'), cat.get('mediaTypes', []))
logger.info(f"Migrated {len(categories)} global categories")
# ── Per-client dropdowns ───────────────────────────────────────────────
client_dd_dir = server_config.CLIENTS_DROPDOWNS_DIR
if os.path.isdir(client_dd_dir):
for fname in os.listdir(client_dd_dir):
if not fname.endswith('.json') or '_export' in fname:
continue
client_id = fname[:-5]
with open(os.path.join(client_dd_dir, fname)) as f:
cats = json.load(f)
await conn.execute("DELETE FROM dropdown_categories WHERE client_id = $1", client_id)
for cat in cats:
await conn.execute('''
INSERT INTO dropdown_categories (client_id, name, status, media_types)
VALUES ($1, $2, $3, $4)
''', client_id, cat['name'], cat.get('status', 'Active'), cat.get('mediaTypes', []))
logger.info(f"Migrated {len(cats)} categories for client {client_id}")
# ── Global export template ─────────────────────────────────────────────
tpl_file = server_config.EXPORT_TEMPLATE_FILE
if os.path.exists(tpl_file):
with open(tpl_file) as f:
tpl = json.load(f)
await conn.execute('''
INSERT INTO export_templates (scope, columns) VALUES ('global', $1)
ON CONFLICT (scope) DO UPDATE SET columns = $1, updated_at = NOW()
''', tpl)
logger.info("Migrated global export template")
# ── Per-client export templates ────────────────────────────────────────
if os.path.isdir(client_dd_dir):
for fname in os.listdir(client_dd_dir):
if not fname.endswith('_export.json'):
continue
client_id = fname[:-len('_export.json')]
with open(os.path.join(client_dd_dir, fname)) as f:
tpl = json.load(f)
scope = f'client:{client_id}'
await conn.execute('''
INSERT INTO export_templates (scope, columns) VALUES ($1, $2)
ON CONFLICT (scope) DO UPDATE SET columns = $2, updated_at = NOW()
''', scope, tpl)
logger.info(f"Migrated export template for client {client_id}")
# ── Per-user export templates ──────────────────────────────────────────
user_tpl_dir = server_config.USER_EXPORT_TEMPLATES_DIR
if os.path.isdir(user_tpl_dir):
for fname in os.listdir(user_tpl_dir):
if not fname.endswith('.json'):
continue
user_id = fname[:-5]
with open(os.path.join(user_tpl_dir, fname)) as f:
tpl = json.load(f)
scope = f'user:{user_id}'
await conn.execute('''
INSERT INTO export_templates (scope, columns) VALUES ($1, $2)
ON CONFLICT (scope) DO UPDATE SET columns = $2, updated_at = NOW()
''', scope, tpl)
logger.info(f"Migrated export template for user {user_id}")
# ── Sheets ─────────────────────────────────────────────────────────────
metadata_file = os.path.join(server_config.DATA_DIR, 'sheets_metadata.json')
sheets_dir = server_config.SHEETS_DIR
if os.path.exists(metadata_file):
with open(metadata_file) as f:
meta = json.load(f)
total = 0
for user_id, user_sheets in meta.items():
for sheet_meta in user_sheets:
sid = sheet_meta['id']
# Build safe filename (mirror manager.py logic)
import re
safe_uid = re.sub(r'[^a-zA-Z0-9_\-]', '_', user_id)
data_file = os.path.join(sheets_dir, f"{safe_uid}_{sid}.json")
data = []
if os.path.exists(data_file):
with open(data_file) as f:
data = json.load(f)
client_id = sheet_meta.get('client_id') or None
now = datetime.now(timezone.utc)
await conn.execute('''
INSERT INTO sheets
(id, user_id, name, client_id, data, item_count, created_at, modified_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (id) DO NOTHING
''', sid, user_id, sheet_meta.get('name', 'Untitled'),
client_id, data, len(data),
_parse_dt(sheet_meta.get('created')) or now,
_parse_dt(sheet_meta.get('modified')) or now)
total += 1
logger.info(f"Migrated {total} sheets")
logger.info("Migration complete!")
if __name__ == '__main__':
asyncio.run(migrate())

44
backend/server/db/pool.py Normal file
View file

@ -0,0 +1,44 @@
"""
asyncpg connection pool with JSONB codec registration.
Call init_pool() once at startup and close_pool() at shutdown.
"""
import json
import logging
import asyncpg
logger = logging.getLogger(__name__)
_pool: asyncpg.Pool | None = None
async def _init_conn(conn: asyncpg.Connection):
"""Register JSONB/JSON codecs so Python dicts/lists are passed transparently."""
await conn.set_type_codec('jsonb', encoder=json.dumps, decoder=json.loads, schema='pg_catalog')
await conn.set_type_codec('json', encoder=json.dumps, decoder=json.loads, schema='pg_catalog')
async def init_pool(dsn: str):
global _pool
_pool = await asyncpg.create_pool(
dsn,
min_size=2,
max_size=10,
command_timeout=30,
init=_init_conn,
)
logger.info("PostgreSQL pool initialized")
async def close_pool():
global _pool
if _pool:
await _pool.close()
_pool = None
logger.info("PostgreSQL pool closed")
def get_pool() -> asyncpg.Pool:
if _pool is None:
raise RuntimeError("Database pool not initialized — call init_pool() first")
return _pool

View file

@ -0,0 +1,48 @@
-- AC Tool PostgreSQL schema
-- Run this once (idempotent — safe to re-run).
CREATE TABLE IF NOT EXISTS users (
id TEXT PRIMARY KEY,
email TEXT NOT NULL DEFAULT '',
name TEXT NOT NULL DEFAULT '',
role TEXT NOT NULL DEFAULT 'user',
active BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS clients (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
has_custom_dropdowns BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- NULL client_id = global hierarchy; non-null = per-client override
CREATE TABLE IF NOT EXISTS dropdown_categories (
id SERIAL PRIMARY KEY,
client_id TEXT REFERENCES clients(id) ON DELETE CASCADE,
name TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'Active',
media_types JSONB NOT NULL DEFAULT '[]'
);
CREATE INDEX IF NOT EXISTS idx_dropdown_cat_client ON dropdown_categories(client_id);
-- scope: 'global' | 'client:<id>' | 'user:<id>'
CREATE TABLE IF NOT EXISTS export_templates (
scope TEXT PRIMARY KEY,
columns JSONB NOT NULL,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS sheets (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
name TEXT NOT NULL,
client_id TEXT REFERENCES clients(id) ON DELETE SET NULL,
data JSONB NOT NULL DEFAULT '[]',
item_count INTEGER NOT NULL DEFAULT 0,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
modified_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_sheets_user ON sheets(user_id);

18
backend/server/jobs/__init__.py Executable file
View file

@ -0,0 +1,18 @@
"""
Job management module for Brief Extractor GUI
"""
from .models import Job, JobPhase, ProviderUpdate, JobSummary, ModelConfiguration, ModelInfo
from .manager import JobManager
from .storage import StorageManager
__all__ = [
'Job',
'JobPhase',
'ProviderUpdate',
'JobSummary',
'ModelConfiguration',
'ModelInfo',
'JobManager',
'StorageManager'
]

338
backend/server/jobs/manager.py Executable file
View file

@ -0,0 +1,338 @@
"""
Job manager for handling job queue, registry, and lifecycle
"""
import asyncio
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Optional
from threading import RLock
from .models import Job, JobPhase, ModelConfiguration, ModelInfo
from .storage import StorageManager
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
class JobManager:
"""
Manages job lifecycle, queue, and in-memory registry
Thread-safe singleton for job management
"""
_instance: Optional['JobManager'] = None
_lock = RLock()
def __new__(cls):
if cls._instance is None:
with cls._lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
def __init__(self):
if hasattr(self, '_initialized'):
return
self._initialized = True
self.jobs: Dict[str, Job] = {}
self.queue: asyncio.Queue = asyncio.Queue()
self.processing_semaphore = asyncio.Semaphore(server_config.MAX_CONCURRENT_JOBS)
self.storage = StorageManager()
self._lock = asyncio.Lock()
logger.info(f"JobManager initialized with concurrency limit: {server_config.MAX_CONCURRENT_JOBS}")
@classmethod
def get_instance(cls) -> 'JobManager':
"""Get the singleton instance"""
return cls()
async def create_job(
self,
file_name: str,
file_size: int,
file_data: bytes,
user_id: str,
model_config: Optional[ModelConfiguration] = None
) -> Job:
"""
Create a new job from uploaded file
Args:
file_name: Original filename
file_size: Size in bytes
file_data: Binary file content
user_id: User identifier
model_config: Model configuration for processing
Returns:
Created job object
"""
# Validate file
is_valid, error_msg = self.storage.validate_file(file_name, file_size)
if not is_valid:
raise ValueError(f"File validation failed: {error_msg}")
# Create job
job = Job.create(
file_name=file_name,
file_size=file_size,
user_id=user_id,
upload_path="", # Will be set after saving
model_config=model_config
)
try:
# Save uploaded file
upload_path = await self.storage.save_uploaded_file(
file_data=file_data,
filename=file_name,
job_id=job.id
)
job.upload_path = upload_path
# Add to registry
async with self._lock:
self.jobs[job.id] = job
# Add to queue
await self.queue.put(job.id)
logger.info(f"Created job {job.id} for file {file_name} (user: {user_id})")
return job
except Exception as e:
logger.error(f"Failed to create job for {file_name}: {e}")
# Cleanup on failure
if job.upload_path:
await self.storage.cleanup_job_files(job.upload_path, None)
raise
async def get_job(self, job_id: str) -> Optional[Job]:
"""Get job by ID"""
async with self._lock:
return self.jobs.get(job_id)
async def update_job(self, job_id: str, **updates) -> bool:
"""
Update job attributes
Args:
job_id: Job identifier
**updates: Attributes to update
Returns:
True if job was found and updated
"""
async with self._lock:
job = self.jobs.get(job_id)
if not job:
return False
for attr, value in updates.items():
if hasattr(job, attr):
setattr(job, attr, value)
job.updated_at = datetime.utcnow()
return True
async def get_user_jobs(
self,
user_id: str,
limit: int = 100,
offset: int = 0
) -> List[Job]:
"""
Get jobs for a specific user
Args:
user_id: User identifier
limit: Maximum number of jobs to return
offset: Number of jobs to skip
Returns:
List of user's jobs, newest first
"""
async with self._lock:
user_jobs = [
job for job in self.jobs.values()
if job.user_id == user_id
]
# Sort by creation time, newest first
user_jobs.sort(key=lambda j: j.created_at, reverse=True)
# Apply pagination
return user_jobs[offset:offset + limit]
async def get_all_jobs(self, limit: int = 100, offset: int = 0) -> List[Job]:
"""
Get all jobs (admin function)
Args:
limit: Maximum number of jobs to return
offset: Number of jobs to skip
Returns:
List of all jobs, newest first
"""
async with self._lock:
all_jobs = list(self.jobs.values())
# Sort by creation time, newest first
all_jobs.sort(key=lambda j: j.created_at, reverse=True)
# Apply pagination
return all_jobs[offset:offset + limit]
async def delete_job(self, job_id: str) -> bool:
"""
Delete a job and clean up its files
Args:
job_id: Job identifier
Returns:
True if job was found and deleted
"""
async with self._lock:
job = self.jobs.get(job_id)
if not job:
return False
# Clean up files
await self.storage.cleanup_job_files(job.upload_path, job.output_path)
# Remove from registry
del self.jobs[job_id]
logger.info(f"Deleted job {job_id}")
return True
async def get_queue_size(self) -> int:
"""Get current queue size"""
return self.queue.qsize()
async def get_active_jobs_count(self) -> int:
"""Get number of jobs currently being processed"""
async with self._lock:
return len([
job for job in self.jobs.values()
if job.phase in [JobPhase.EXTRACT_CONTENT, JobPhase.LLM_ANALYSIS,
JobPhase.CONSOLIDATION, JobPhase.CSV_GENERATION]
])
def serialize_all(self) -> List[Dict]:
"""Serialize all jobs for WebSocket broadcast"""
return [job.to_dict() for job in self.jobs.values()]
async def cleanup_expired_jobs(self) -> int:
"""
Clean up expired jobs and their files
Returns:
Number of jobs cleaned up
"""
cutoff_time = datetime.utcnow() - timedelta(hours=server_config.FILE_RETENTION_HOURS)
cleanup_count = 0
# Get jobs to cleanup
jobs_to_cleanup = []
async with self._lock:
for job_id, job in list(self.jobs.items()):
# Clean up completed/failed jobs older than retention period
if (job.phase in [JobPhase.COMPLETED, JobPhase.FAILED] and
job.updated_at < cutoff_time):
jobs_to_cleanup.append(job_id)
# Clean up identified jobs
for job_id in jobs_to_cleanup:
if await self.delete_job(job_id):
cleanup_count += 1
# Also clean up orphaned files
orphaned_count = await self.storage.cleanup_expired_files()
total_cleaned = cleanup_count + orphaned_count
if total_cleaned > 0:
logger.info(f"Cleaned up {cleanup_count} expired jobs and {orphaned_count} orphaned files")
return total_cleaned
@staticmethod
def get_available_models() -> List[ModelInfo]:
"""
Get list of available models with their information
Returns:
List of available model information
"""
# Import here to avoid circular imports
from core.config import config as core_config
models = []
# Define model information based on existing configuration
model_info_map = {
'openai-gpt51': ModelInfo(
key='openai-gpt51',
name='GPT-5.1',
provider='OpenAI',
description='Latest OpenAI model with advanced reasoning capabilities',
cost_per_1m_input=1.25,
cost_per_1m_output=10.00,
can_be_primary=True,
can_be_consolidation=True
),
'anthropic-opus45': ModelInfo(
key='anthropic-opus45',
name='Claude Opus 4.5',
provider='Anthropic',
description='Highest quality model for complex analysis',
cost_per_1m_input=5.00,
cost_per_1m_output=25.00,
can_be_primary=True,
can_be_consolidation=True
),
'anthropic-sonnet45': ModelInfo(
key='anthropic-sonnet45',
name='Claude Sonnet 4.5',
provider='Anthropic',
description='Balanced performance and cost',
cost_per_1m_input=3.00,
cost_per_1m_output=15.00,
can_be_primary=True,
can_be_consolidation=True
),
'google-gemini31': ModelInfo(
key='google-gemini31',
name='Gemini 3.1 Pro',
provider='Google',
description='Cost-effective model with high context limit',
cost_per_1m_input=1.25,
cost_per_1m_output=5.00,
can_be_primary=True,
can_be_consolidation=True
)
}
# Return models that exist in the configuration
for model_key in core_config.MODEL_MAPPINGS.keys():
if model_key in model_info_map:
models.append(model_info_map[model_key])
return models
def get_default_model_config() -> ModelConfiguration:
"""Get default model configuration"""
from core.config import config as core_config
return ModelConfiguration(
primary_models=core_config.get_default_primary_models(),
consolidation_model=core_config.DEFAULT_CONSOLIDATION_MODEL,
minimum_success_threshold=core_config.MINIMUM_SUCCESS_THRESHOLD
)
# Global instance
job_manager = JobManager.get_instance()

282
backend/server/jobs/models.py Executable file
View file

@ -0,0 +1,282 @@
"""
Data models for job management and processing
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Dict, List, Optional, Any
import uuid
def _default_primary_models() -> List[str]:
try:
from core.config import config
return config.get_default_primary_models()
except Exception:
return ['anthropic-sonnet45', 'google-gemini31']
def _default_consolidation_model() -> str:
try:
from core.config import config
return config.DEFAULT_CONSOLIDATION_MODEL
except Exception:
return 'anthropic-sonnet45'
class JobPhase(Enum):
"""Processing phases for a job"""
QUEUED = "QUEUED"
EXTRACT_CONTENT = "EXTRACT_CONTENT"
LLM_ANALYSIS = "LLM_ANALYSIS"
CONSOLIDATION = "CONSOLIDATION"
CSV_GENERATION = "CSV_GENERATION"
COMPLETED = "COMPLETED"
FAILED = "FAILED"
@dataclass
class ProviderUpdate:
"""Update information for a specific LLM provider during processing"""
provider: str # 'openai', 'anthropic', 'google'
model: str # e.g., "gpt-5.1", "claude-sonnet-4-5", "gemini-3.1-pro"
status: str # 'started', 'success', 'error'
started_at: Optional[str] = None
completed_at: Optional[str] = None
latency_ms: Optional[float] = None
tokens_in: Optional[int] = None
tokens_out: Optional[int] = None
tokens_cached: Optional[int] = None
cost_usd: Optional[float] = None
error: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization"""
return {
'provider': self.provider,
'model': self.model,
'status': self.status,
'startedAt': self.started_at,
'completedAt': self.completed_at,
'latencyMs': self.latency_ms,
'tokensIn': self.tokens_in,
'tokensOut': self.tokens_out,
'tokensCached': self.tokens_cached,
'costUsd': self.cost_usd,
'error': self.error
}
@dataclass
class LogEntry:
"""Individual log entry for job processing"""
timestamp: str
level: str # 'DEBUG', 'INFO', 'WARNING', 'ERROR'
message: str
def to_dict(self) -> Dict[str, Any]:
return {
'timestamp': self.timestamp,
'level': self.level,
'message': self.message
}
@dataclass
class JobSummary:
"""Summary information for a completed job"""
doc_type: str
assets_extracted: int
confidence_score: float
notes: List[str]
cost_usd_total: float
tokens_total: int
primary_models: List[str]
consolidation_model: str
processing_time_seconds: Optional[float] = None
def to_dict(self) -> Dict[str, Any]:
return {
'docType': self.doc_type,
'assetsExtracted': self.assets_extracted,
'confidenceScore': self.confidence_score,
'notes': self.notes,
'costUsdTotal': self.cost_usd_total,
'tokensTotal': self.tokens_total,
'primaryModels': self.primary_models,
'consolidationModel': self.consolidation_model,
'processingTimeSeconds': self.processing_time_seconds
}
@dataclass
class ModelInfo:
"""Information about an available LLM model"""
key: str
name: str
provider: str
description: str
cost_per_1m_input: float
cost_per_1m_output: float
can_be_primary: bool = True
can_be_consolidation: bool = True
def to_dict(self) -> Dict[str, Any]:
return {
'key': self.key,
'name': self.name,
'provider': self.provider,
'description': self.description,
'costPer1mInput': self.cost_per_1m_input,
'costPer1mOutput': self.cost_per_1m_output,
'canBePrimary': self.can_be_primary,
'canBeConsolidation': self.can_be_consolidation
}
@dataclass
class ModelConfiguration:
"""Model selection configuration for a job"""
primary_models: List[str] = field(default_factory=_default_primary_models)
consolidation_model: str = field(default_factory=_default_consolidation_model)
minimum_success_threshold: int = 1
def to_dict(self) -> Dict[str, Any]:
return {
'primaryModels': self.primary_models,
'consolidationModel': self.consolidation_model,
'minimumSuccessThreshold': self.minimum_success_threshold
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'ModelConfiguration':
return cls(
primary_models=data.get('primaryModels', []),
consolidation_model=data.get('consolidationModel', 'openai-gpt51'),
minimum_success_threshold=data.get('minimumSuccessThreshold', 1)
)
@dataclass
class Job:
"""Main job model representing a document processing job"""
id: str
file_name: str
file_size: int
created_at: datetime
updated_at: datetime
user_id: str
phase: JobPhase
progress_pct: int # 0-100
step_label: str
provider_updates: Dict[str, ProviderUpdate] = field(default_factory=dict)
error: Optional[str] = None
result_csv_url: Optional[str] = None
summary: Optional[JobSummary] = None
logs: List[LogEntry] = field(default_factory=list)
upload_path: Optional[str] = None
output_path: Optional[str] = None
model_config: ModelConfiguration = field(default_factory=ModelConfiguration)
@classmethod
def create(
cls,
file_name: str,
file_size: int,
user_id: str,
upload_path: str,
model_config: Optional[ModelConfiguration] = None
) -> 'Job':
"""Create a new job with default values"""
now = datetime.utcnow()
return cls(
id=str(uuid.uuid4()),
file_name=file_name,
file_size=file_size,
created_at=now,
updated_at=now,
user_id=user_id,
phase=JobPhase.QUEUED,
progress_pct=0,
step_label='Queued for processing',
upload_path=upload_path,
model_config=model_config or ModelConfiguration()
)
def update_progress(
self,
phase: JobPhase,
progress_pct: int,
step_label: str = ""
):
"""Update job progress"""
self.phase = phase
self.progress_pct = min(100, max(0, progress_pct)) # Clamp to [0, 100]
self.updated_at = datetime.utcnow()
if step_label:
self.step_label = step_label
else:
# Default step labels based on phase
phase_labels = {
JobPhase.QUEUED: 'Queued for processing',
JobPhase.EXTRACT_CONTENT: 'Extracting document content',
JobPhase.LLM_ANALYSIS: 'Parallel LLM analysis',
JobPhase.CONSOLIDATION: 'Consolidating results',
JobPhase.CSV_GENERATION: 'Generating CSV output',
JobPhase.COMPLETED: 'Processing completed',
JobPhase.FAILED: 'Processing failed'
}
self.step_label = phase_labels.get(phase, 'Processing')
def add_log(self, level: str, message: str):
"""Add a log entry to this job"""
log_entry = LogEntry(
timestamp=datetime.utcnow().isoformat(),
level=level,
message=message
)
self.logs.append(log_entry)
self.updated_at = datetime.utcnow()
def update_provider(self, model_key: str, update: ProviderUpdate):
"""Update status for a specific provider"""
self.provider_updates[model_key] = update
self.updated_at = datetime.utcnow()
def mark_completed(
self,
result_csv_url: str,
summary: JobSummary,
output_path: str
):
"""Mark job as completed with results"""
self.phase = JobPhase.COMPLETED
self.progress_pct = 100
self.step_label = 'Processing completed'
self.result_csv_url = result_csv_url
self.summary = summary
self.output_path = output_path
self.updated_at = datetime.utcnow()
def mark_failed(self, error: str):
"""Mark job as failed with error message"""
self.phase = JobPhase.FAILED
self.error = error
self.step_label = 'Processing failed'
self.updated_at = datetime.utcnow()
def to_dict(self) -> Dict[str, Any]:
"""Convert job to dictionary for JSON serialization"""
# Handle phase - might be string or enum
phase_value = self.phase.value if isinstance(self.phase, JobPhase) else self.phase
return {
'id': self.id,
'fileName': self.file_name,
'fileSize': self.file_size,
'createdAt': self.created_at.isoformat(),
'updatedAt': self.updated_at.isoformat(),
'userId': self.user_id,
'phase': phase_value,
'progressPct': self.progress_pct,
'stepLabel': self.step_label,
'providerUpdates': {k: v.to_dict() for k, v in self.provider_updates.items()},
'error': self.error,
'resultCsvUrl': self.result_csv_url,
'summary': self.summary.to_dict() if self.summary else None,
'logs': [log.to_dict() for log in self.logs],
'modelConfig': self.model_config.to_dict()
}

231
backend/server/jobs/storage.py Executable file
View file

@ -0,0 +1,231 @@
"""
File storage management for uploads and outputs
"""
import os
import hashlib
import asyncio
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional, List
import logging
import uuid
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
class StorageManager:
"""Manages file storage, cleanup, and safe file operations"""
def __init__(self):
self.upload_dir = Path(server_config.UPLOAD_DIR)
self.output_dir = Path(server_config.OUTPUT_DIR)
# Ensure directories exist
self.upload_dir.mkdir(parents=True, exist_ok=True)
self.output_dir.mkdir(parents=True, exist_ok=True)
def generate_safe_filename(self, original_filename: str, job_id: str) -> str:
"""
Generate a safe filename for uploaded files
Args:
original_filename: Original filename from upload
job_id: Unique job identifier
Returns:
Safe filename with job ID prefix
"""
# Extract extension
name, ext = os.path.splitext(original_filename)
# Sanitize the filename
safe_name = "".join(c for c in name if c.isalnum() or c in (' ', '-', '_')).strip()
safe_name = safe_name[:50] # Limit length
# Generate unique filename
return f"{job_id}_{safe_name}{ext}"
def get_upload_path(self, filename: str) -> str:
"""Get full path for uploaded file"""
return str(self.upload_dir / filename)
def get_output_path(self, job_id: str, original_filename: str) -> str:
"""
Generate output CSV path for a job
Args:
job_id: Job identifier
original_filename: Original uploaded filename
Returns:
Path for output CSV file
"""
# Generate timestamp
timestamp = datetime.utcnow().strftime("%Y%m%d%H%M%S")
# Extract base name without extension
base_name = os.path.splitext(original_filename)[0]
safe_base = "".join(c for c in base_name if c.isalnum() or c in (' ', '-', '_')).strip()
safe_base = safe_base[:30] # Limit length
# Generate output filename
output_filename = f"{safe_base}-{timestamp}.csv"
return str(self.output_dir / output_filename)
async def save_uploaded_file(self, file_data: bytes, filename: str, job_id: str) -> str:
"""
Save uploaded file data to disk
Args:
file_data: Binary file data
filename: Original filename
job_id: Job identifier
Returns:
Path to saved file
"""
safe_filename = self.generate_safe_filename(filename, job_id)
file_path = self.get_upload_path(safe_filename)
try:
# Write file asynchronously using thread pool
def _write_file():
"""Blocking file write operation for thread pool"""
with open(file_path, 'wb') as f:
f.write(file_data)
loop = asyncio.get_running_loop()
await loop.run_in_executor(None, _write_file)
logger.info(f"Saved uploaded file: {file_path}")
return file_path
except Exception as e:
logger.error(f"Failed to save uploaded file {filename}: {e}")
raise
def validate_file(self, filename: str, file_size: int) -> tuple[bool, Optional[str]]:
"""
Validate uploaded file
Args:
filename: Original filename
file_size: File size in bytes
Returns:
Tuple of (is_valid, error_message)
"""
# Check file extension
_, ext = os.path.splitext(filename.lower())
if ext not in server_config.ALLOWED_EXTENSIONS:
allowed = ', '.join(server_config.ALLOWED_EXTENSIONS)
return False, f"File type {ext} not allowed. Allowed types: {allowed}"
# Check file size
max_size = server_config.MAX_CONTENT_LENGTH
if file_size > max_size:
max_mb = max_size / (1024 * 1024)
actual_mb = file_size / (1024 * 1024)
return False, f"File size {actual_mb:.1f}MB exceeds limit of {max_mb:.1f}MB"
# Check filename length and characters
if len(filename) > 255:
return False, "Filename too long (max 255 characters)"
return True, None
async def cleanup_job_files(self, upload_path: Optional[str], output_path: Optional[str]):
"""
Clean up files associated with a job
Args:
upload_path: Path to uploaded file
output_path: Path to output CSV file
"""
for file_path in [upload_path, output_path]:
if file_path and os.path.exists(file_path):
try:
os.remove(file_path)
logger.info(f"Cleaned up file: {file_path}")
except Exception as e:
logger.warning(f"Failed to clean up file {file_path}: {e}")
async def cleanup_expired_files(self) -> int:
"""
Clean up files older than the retention period
Returns:
Number of files cleaned up
"""
cutoff_time = datetime.utcnow() - timedelta(hours=server_config.FILE_RETENTION_HOURS)
cleanup_count = 0
# Clean upload directory
cleanup_count += await self._cleanup_directory(self.upload_dir, cutoff_time)
# Clean output directory
cleanup_count += await self._cleanup_directory(self.output_dir, cutoff_time)
if cleanup_count > 0:
logger.info(f"Cleaned up {cleanup_count} expired files")
return cleanup_count
async def _cleanup_directory(self, directory: Path, cutoff_time: datetime) -> int:
"""Clean up files in a specific directory older than cutoff time"""
cleanup_count = 0
try:
for file_path in directory.iterdir():
if file_path.is_file():
# Get file modification time
mtime = datetime.fromtimestamp(file_path.stat().st_mtime)
if mtime < cutoff_time:
try:
file_path.unlink()
cleanup_count += 1
logger.debug(f"Cleaned up expired file: {file_path}")
except Exception as e:
logger.warning(f"Failed to delete expired file {file_path}: {e}")
except Exception as e:
logger.error(f"Error during directory cleanup {directory}: {e}")
return cleanup_count
def get_file_info(self, file_path: str) -> Optional[dict]:
"""
Get information about a file
Args:
file_path: Path to file
Returns:
Dictionary with file info or None if file doesn't exist
"""
if not os.path.exists(file_path):
return None
try:
stat = os.stat(file_path)
return {
'path': file_path,
'size': stat.st_size,
'created': datetime.fromtimestamp(stat.st_ctime).isoformat(),
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat()
}
except Exception as e:
logger.error(f"Failed to get file info for {file_path}: {e}")
return None
def ensure_directories(self):
"""Ensure all required directories exist"""
self.upload_dir.mkdir(parents=True, exist_ok=True)
self.output_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Storage directories ready: {self.upload_dir}, {self.output_dir}")
# Global instance
storage_manager = StorageManager()

View file

@ -0,0 +1,16 @@
"""
Job runners module for processing document analysis jobs
"""
from .progress import ProgressReporter, JobLogHandler, create_job_logger
from .job_runner import run_job, process_job_queue, start_background_workers, stop_background_workers
__all__ = [
'ProgressReporter',
'JobLogHandler',
'create_job_logger',
'run_job',
'process_job_queue',
'start_background_workers',
'stop_background_workers'
]

View file

@ -0,0 +1,368 @@
"""
Enhanced DocumentAnalyzer with progress reporting for GUI integration
Extends the existing analyzer with progress hooks and WebSocket updates
"""
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '../../'))
from typing import Optional, List, Dict, Any
import logging
from core.process_brief_enhanced import DocumentAnalyzer, ProcessingResult
from ..jobs.models import JobPhase, ModelConfiguration, JobSummary
from .progress import ProgressReporter
logger = logging.getLogger(__name__)
class EnhancedDocumentAnalyzer(DocumentAnalyzer):
"""
Enhanced DocumentAnalyzer with progress reporting capabilities
Extends the base analyzer with WebSocket progress updates
"""
def __init__(
self,
model_config: ModelConfiguration,
progress_reporter: Optional[ProgressReporter] = None
):
# Initialize base analyzer with model configuration
primary_models = model_config.primary_models
consolidation_model = model_config.consolidation_model
super().__init__(primary_models, consolidation_model)
self.progress = progress_reporter
self.model_config = model_config
async def process_document_with_progress(self, filepath: str) -> ProcessingResult:
"""
Process document with progress reporting integration
Args:
filepath: Path to document file
Returns:
ProcessingResult with extracted data
"""
try:
if self.progress:
await self.progress.emit(
JobPhase.EXTRACT_CONTENT,
10,
f"Starting analysis of {os.path.basename(filepath)}"
)
# Stage 1: Extract document content
if self.progress:
await self.progress.emit_log('INFO', "=== STAGE 1: Document Content Extraction ===")
try:
document_content = self._extract_document_content(filepath)
if self.progress:
await self.progress.emit(
JobPhase.EXTRACT_CONTENT,
25,
"Document content extracted successfully"
)
await self.progress.emit_log('INFO', f"Extracted {len(document_content)} characters of content")
except Exception as e:
error_msg = f"Content extraction failed: {e}"
if self.progress:
await self.progress.emit_failure(error_msg)
return ProcessingResult([], {}, 0.0, [error_msg], self.token_usage)
# Stage 2: Parallel multi-model analysis
if self.progress:
await self.progress.emit(
JobPhase.LLM_ANALYSIS,
30,
"Starting parallel multi-model analysis"
)
await self.progress.emit_log('INFO', "=== STAGE 2: Parallel Multi-Model Analysis ===")
await self.progress.emit_log('INFO', f"Using models: {', '.join(self.primary_models)}")
doc_type = self.classify_document(filepath)
try:
analysis_responses, analysis_metadata = await self._perform_parallel_analysis_with_progress(
document_content, doc_type
)
if self.progress:
await self.progress.emit(
JobPhase.LLM_ANALYSIS,
75,
f"Parallel analysis completed - {len(analysis_responses)} successful models"
)
await self.progress.emit_log('INFO',
f"Analysis complete: {len(analysis_responses)}/{len(self.primary_models)} models succeeded"
)
except Exception as e:
error_msg = f"Parallel analysis failed: {e}"
if self.progress:
await self.progress.emit_failure(error_msg)
return ProcessingResult([], {}, 0.0, [error_msg], self.token_usage)
# Stage 3: Consolidation
if self.progress:
await self.progress.emit(
JobPhase.CONSOLIDATION,
80,
"Starting result consolidation"
)
await self.progress.emit_log('INFO', "=== STAGE 3: Result Consolidation ===")
await self.progress.emit_log('INFO', f"Using consolidation model: {self.consolidation_model}")
try:
consolidation_result = await self.consolidation_processor.consolidate_results(
analysis_responses, self.consolidation_model, document_content
)
if self.progress:
await self.progress.emit(
JobPhase.CONSOLIDATION,
90,
f"Consolidation completed: {len(consolidation_result.expanded_assets)} final assets"
)
await self.progress.emit_log('INFO',
f"Consolidation complete: {len(consolidation_result.expanded_assets)} final deliverables"
)
except Exception as e:
error_msg = f"Consolidation failed: {e}"
if self.progress:
await self.progress.emit_failure(error_msg)
return ProcessingResult([], {}, 0.0, [error_msg], self.token_usage)
# Stage 4: Prepare results
if self.progress:
await self.progress.emit(
JobPhase.CSV_GENERATION,
95,
"Preparing results for output"
)
# Convert expanded assets to dict format for compatibility
extracted_data = [asset.model_dump() for asset in consolidation_result.expanded_assets]
# Aggregate token usage from all models
total_token_usage = self.provider_manager.get_aggregated_token_usage(analysis_responses)
# Combine processing notes
successful_count = analysis_metadata.get('successful_models', len(analysis_responses))
total_count = analysis_metadata.get('total_models_attempted', len(self.primary_models))
processing_notes = [f"Parallel analysis: {successful_count}/{total_count} models"]
processing_notes.extend(consolidation_result.warnings)
# Merge metadata
combined_metadata = {
'doc_type': doc_type.value,
'primary_models_used': self.primary_models,
'consolidation_model': self.consolidation_model,
'analysis_metadata': analysis_metadata,
'consolidation_metadata': consolidation_result.consolidation_metadata
}
result = ProcessingResult(
raw_data=extracted_data,
metadata=combined_metadata,
confidence_score=0.9, # Higher confidence due to multi-model consensus
processing_notes=processing_notes,
token_usage=total_token_usage
)
if self.progress:
await self.progress.emit(
JobPhase.CSV_GENERATION,
100,
"Analysis completed successfully"
)
await self.progress.emit_log('INFO', "=== PROCESSING COMPLETED SUCCESSFULLY ===")
return result
except Exception as e:
error_msg = f"Unexpected error during processing: {e}"
logger.error(error_msg, exc_info=True)
if self.progress:
await self.progress.emit_failure(error_msg)
return ProcessingResult([], {}, 0.0, [error_msg], self.token_usage)
async def _perform_parallel_analysis_with_progress(
self,
document_content: str,
doc_type
) -> tuple:
"""
Perform parallel analysis with progress reporting
Args:
document_content: Extracted document text
doc_type: Document type classification
Returns:
Tuple of (successful_responses, metadata)
"""
# Load prompt from external file
multi_perspective_prompt_template = self._load_prompt('multi_perspective_analysis')
multi_perspective_prompt = multi_perspective_prompt_template.format(doc_type=doc_type.value)
# Load system message from external file
system_message = self._load_prompt('system_multi_perspective')
# Prepare combined prompt
combined_prompt = f"{multi_perspective_prompt}\n\nDocument Content:\n{document_content}"
# Prepare messages for all providers
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": combined_prompt}
]
# Get schema for structured output
from core.process_brief_enhanced import UNIVERSAL_BASE_DELIVERABLE_SCHEMA
# Create progress callback for provider updates
progress_callback = None
if self.progress:
progress_callback = self._create_provider_progress_callback()
# Execute parallel analysis with progress reporting
successful_responses, metadata = await self.provider_manager.execute_parallel_analysis(
model_keys=self.primary_models,
messages=messages,
schema=UNIVERSAL_BASE_DELIVERABLE_SCHEMA,
minimum_success_threshold=self.model_config.minimum_success_threshold,
on_model_event=progress_callback
)
return successful_responses, metadata
def _create_provider_progress_callback(self):
"""
Create callback function for provider progress updates
Returns:
Async callback function
"""
async def on_model_event(model_key: str, stage: str, data: Any):
if not self.progress:
return
try:
if stage == 'start':
await self.progress.emit_provider_update(model_key, {
'provider': self._get_provider_name(model_key),
'model': self._get_model_display_name(model_key),
'status': 'started',
'startedAt': data.get('timestamp') if data else None
})
await self.progress.emit_log('INFO', f"Starting analysis with {model_key}")
elif stage == 'end':
if 'error' in data:
await self.progress.emit_provider_update(model_key, {
'provider': self._get_provider_name(model_key),
'model': self._get_model_display_name(model_key),
'status': 'error',
'error': str(data['error']),
'completedAt': data.get('timestamp') if data else None
})
await self.progress.emit_log('ERROR', f"Analysis failed for {model_key}: {data['error']}")
else:
response = data.get('response')
cost = data.get('cost', 0)
if response:
await self.progress.emit_provider_update(model_key, {
'provider': self._get_provider_name(model_key),
'model': self._get_model_display_name(model_key),
'status': 'success',
'completedAt': data.get('timestamp') if data else None,
'latencyMs': response.processing_time * 1000 if response.processing_time else None,
'tokensIn': response.token_usage.input_tokens,
'tokensOut': response.token_usage.output_tokens,
'tokensCached': response.token_usage.cached_input_tokens,
'costUsd': cost
})
await self.progress.emit_log('INFO', f"Analysis completed for {model_key} "
f"({response.token_usage.input_tokens + response.token_usage.output_tokens} tokens, ${cost:.4f})")
# Update overall progress
completed_count = len([
p for p in self.progress.job.provider_updates.values()
if p.status in ['success', 'error']
])
total_count = len(self.primary_models)
# Calculate progress: 25% (extraction done) + (completed/total * 50%) for analysis
analysis_progress = await self.progress.calculate_analysis_progress(
base_progress=25,
completed_providers=completed_count,
total_providers=total_count,
analysis_weight=50
)
await self.progress.emit(
JobPhase.LLM_ANALYSIS,
analysis_progress,
f"Analysis progress: {completed_count}/{total_count} models complete"
)
except Exception as e:
logger.error(f"Error in provider progress callback: {e}")
return on_model_event
def _get_provider_name(self, model_key: str) -> str:
"""Get provider name from model key"""
from core.config import config
try:
provider_name, _ = config.get_model_info(model_key)
return provider_name
except:
return model_key.split('-')[0] if '-' in model_key else 'unknown'
def _get_model_display_name(self, model_key: str) -> str:
"""Get display name for model"""
display_names = {
'openai-gpt51': 'GPT-5.1',
'anthropic-opus45': 'Claude Opus 4.5',
'anthropic-sonnet45': 'Claude Sonnet 4.5',
'google-gemini31': 'Gemini 3.1 Pro'
}
return display_names.get(model_key, model_key)
def create_job_summary(self, result: ProcessingResult) -> JobSummary:
"""
Create job summary from processing result
Args:
result: Processing result
Returns:
JobSummary object
"""
# Extract cost information
consolidation_metadata = result.metadata.get('consolidation_metadata', {})
cost_breakdown = consolidation_metadata.get('cost_breakdown', {})
token_usage = consolidation_metadata.get('token_usage', {})
return JobSummary(
doc_type=result.metadata.get('doc_type', 'unknown'),
assets_extracted=len(result.raw_data),
confidence_score=result.confidence_score,
notes=result.processing_notes,
cost_usd_total=cost_breakdown.get('total_cost', 0),
tokens_total=token_usage.get('grand_total', 0),
primary_models=result.metadata.get('primary_models_used', []),
consolidation_model=result.metadata.get('consolidation_model', ''),
processing_time_seconds=None # Will be set by job runner
)

View file

@ -0,0 +1,258 @@
"""
Job runner that orchestrates document processing with progress reporting
"""
import asyncio
import logging
import os
import time
from datetime import datetime
from typing import Dict, Any
from ..jobs.models import Job, JobPhase, JobSummary
from ..jobs.storage import StorageManager
from ..ws.manager import WebSocketManager
from .progress import ProgressReporter, create_job_logger
from core.process_brief_enhanced import DocumentAnalyzer
logger = logging.getLogger(__name__)
async def run_job(job: Job, ws_manager: WebSocketManager) -> bool:
"""
Execute a document processing job with progress reporting
Args:
job: Job to process
ws_manager: WebSocket manager for real-time updates
Returns:
True if job completed successfully, False otherwise
"""
start_time = time.time()
job_logger = create_job_logger(job.id, ws_manager)
try:
# Create progress reporter
progress = ProgressReporter(job, ws_manager)
# Create analyzer with model configuration
analyzer = DocumentAnalyzer(
primary_models=job.model_config.primary_models,
consolidation_model=job.model_config.consolidation_model
)
# Mark as GUI mode to suppress legacy print statements
analyzer._is_gui_mode = True
await progress.emit_log('INFO', f"Starting processing of {job.file_name}")
await progress.emit_log('INFO', f"File size: {job.file_size:,} bytes")
await progress.emit_log('INFO', f"Selected models: {', '.join(job.model_config.primary_models)}")
await progress.emit_log('INFO', f"Consolidation model: {job.model_config.consolidation_model}")
# Validate upload path exists
if not job.upload_path or not os.path.exists(job.upload_path):
error_msg = f"Upload file not found: {job.upload_path}"
await progress.emit_failure(error_msg)
return False
# Process document
result = await analyzer.process_document_multi_model(job.upload_path, progress)
if not result.raw_data:
notes = "; ".join(result.processing_notes) if result.processing_notes else ""
error_msg = (
"No marketing deliverables found in this document. "
"Please ensure you upload a marketing brief (PDF, PPTX, DOCX, or XLSX) "
"containing campaign assets or deliverables."
)
if notes:
error_msg += f" (Details: {notes})"
await progress.emit_failure(error_msg)
return False
# Generate output CSV
await progress.emit(JobPhase.CSV_GENERATION, 95, "Generating CSV output")
storage = StorageManager()
output_path = storage.get_output_path(job.id, job.file_name)
# Write CSV file asynchronously
import csv
import asyncio
def _write_csv():
"""Blocking CSV write operation for thread pool"""
with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
if result.raw_data:
# Get headers from first record
headers = list(result.raw_data[0].keys())
writer = csv.DictWriter(csvfile, fieldnames=headers, extrasaction='ignore')
writer.writeheader()
writer.writerows(result.raw_data)
# Run CSV writing in thread pool to avoid blocking event loop
loop = asyncio.get_running_loop()
await loop.run_in_executor(None, _write_csv)
# Create job summary
processing_time = time.time() - start_time
summary = create_job_summary(result, processing_time)
# Generate CSV download URL
result_csv_url = f"/api/jobs/{job.id}/download"
# Mark job as completed
job.mark_completed(result_csv_url, summary, output_path)
# Emit completion event
await progress.emit_completion(result_csv_url, summary.to_dict())
await progress.emit_log('INFO', f"Processing completed in {processing_time:.1f} seconds")
await progress.emit_log('INFO', f"Extracted {len(result.raw_data)} marketing assets")
await progress.emit_log('INFO', f"Total cost: ${summary.cost_usd_total:.4f}")
await progress.emit_log('INFO', f"Total tokens: {summary.tokens_total:,}")
logger.info(f"Job {job.id} completed successfully: {len(result.raw_data)} assets, "
f"${summary.cost_usd_total:.4f}, {processing_time:.1f}s")
return True
except Exception as e:
error_msg = f"Job processing failed: {str(e)}"
logger.error(f"Job {job.id} failed: {error_msg}", exc_info=True)
try:
progress = ProgressReporter(job, ws_manager)
await progress.emit_failure(error_msg)
except:
# Fallback if progress reporter fails
job.mark_failed(error_msg)
return False
async def process_job_queue(job_manager, ws_manager: WebSocketManager):
"""
Background worker that processes jobs from the queue
Args:
job_manager: JobManager instance
ws_manager: WebSocket manager for updates
"""
logger.info("Starting job queue processor")
while True:
try:
# Get next job from queue (blocks until available)
job_id = await job_manager.queue.get()
# Get job details
job = await job_manager.get_job(job_id)
if not job:
logger.warning(f"Job {job_id} not found in registry")
job_manager.queue.task_done()
continue
logger.info(f"Processing job {job_id}: {job.file_name}")
# Check queue size for debugging
queue_size = job_manager.queue.qsize()
logger.info(f"Queue size before processing: {queue_size}")
# Acquire semaphore for concurrency control
async with job_manager.processing_semaphore:
# Process the job
success = await run_job(job, ws_manager)
if success:
logger.info(f"Job {job_id} completed successfully")
else:
logger.error(f"Job {job_id} failed")
# Mark task as done
job_manager.queue.task_done()
# Check queue size after processing
remaining_queue_size = job_manager.queue.qsize()
logger.info(f"Queue size after processing: {remaining_queue_size}")
except asyncio.CancelledError:
logger.info("Job queue processor cancelled")
break
except Exception as e:
logger.error(f"Error in job queue processor: {e}", exc_info=True)
# Continue processing other jobs
try:
job_manager.queue.task_done()
except:
pass
async def start_background_workers(job_manager, ws_manager: WebSocketManager, num_workers: int = 1):
"""
Start background worker tasks for job processing
Args:
job_manager: JobManager instance
ws_manager: WebSocket manager
num_workers: Number of worker tasks to start
Returns:
List of worker tasks
"""
workers = []
for i in range(num_workers):
worker = asyncio.create_task(
process_job_queue(job_manager, ws_manager),
name=f"job-worker-{i}"
)
workers.append(worker)
logger.info(f"Started job worker {i}")
return workers
async def stop_background_workers(workers):
"""
Stop background worker tasks
Args:
workers: List of worker tasks to stop
"""
logger.info("Stopping background workers...")
for worker in workers:
worker.cancel()
# Wait for workers to finish
try:
await asyncio.gather(*workers, return_exceptions=True)
except Exception as e:
logger.warning(f"Error stopping workers: {e}")
logger.info("Background workers stopped")
def create_job_summary(result, processing_time: float) -> JobSummary:
"""
Create job summary from processing result
Args:
result: ProcessingResult from DocumentAnalyzer
processing_time: Total processing time in seconds
Returns:
JobSummary object
"""
# Extract cost information
consolidation_metadata = result.metadata.get('consolidation_metadata', {})
cost_breakdown = consolidation_metadata.get('cost_breakdown', {})
token_usage = consolidation_metadata.get('token_usage', {})
return JobSummary(
doc_type=result.metadata.get('doc_type', 'unknown'),
assets_extracted=len(result.raw_data),
confidence_score=result.confidence_score,
notes=result.processing_notes,
cost_usd_total=cost_breakdown.get('total_cost', 0),
tokens_total=token_usage.get('grand_total', 0),
primary_models=result.metadata.get('primary_models_used', []),
consolidation_model=result.metadata.get('consolidation_model', ''),
processing_time_seconds=processing_time
)

View file

@ -0,0 +1,304 @@
"""
Progress reporting for job processing with WebSocket integration
"""
import logging
from datetime import datetime
from typing import Dict, Any, Optional
from ..jobs.models import Job, JobPhase, ProviderUpdate
from ..ws.manager import WebSocketManager
logger = logging.getLogger(__name__)
class ProgressReporter:
"""
Reports progress updates for job processing with WebSocket broadcasting
"""
def __init__(self, job: Job, ws_manager: WebSocketManager):
self.job = job
self.ws_manager = ws_manager
self.logger = logging.getLogger(f"{__name__}.{job.id}")
async def emit(
self,
phase: JobPhase,
progress_pct: int,
message: str = "",
step_label: str = ""
):
"""
Emit progress update for job
Args:
phase: Current processing phase
progress_pct: Progress percentage (0-100)
message: Optional progress message
step_label: Optional custom step label
"""
try:
# Update job progress
self.job.update_progress(phase, progress_pct, step_label)
# Add log entry
if message:
self.job.add_log('INFO', message)
self.logger.info(message)
# Broadcast progress update — include full job so frontend can updateJob()
await self.ws_manager.broadcast_job_update(self.job.id, {
'type': 'job.progress',
'jobId': self.job.id,
'phase': phase.value if hasattr(phase, 'value') else phase,
'progressPct': progress_pct,
'message': message,
'stepLabel': self.job.step_label,
'providerUpdates': {k: v.to_dict() for k, v in self.job.provider_updates.items()},
'job': self.job.to_dict(),
})
self.logger.debug(f"Progress update: {phase.value if hasattr(phase, 'value') else phase} {progress_pct}% - {message}")
except Exception as e:
self.logger.error(f"Failed to emit progress update: {e}")
# Don't re-raise to avoid breaking the processing pipeline
async def emit_provider_update(
self,
model_key: str,
update_data: Dict[str, Any]
):
"""
Emit provider-specific update
Args:
model_key: Model identifier (e.g., 'openai-gpt51')
update_data: Provider update information
"""
try:
# Create provider update object
provider_update = ProviderUpdate(
provider=update_data.get('provider', ''),
model=update_data.get('model', ''),
status=update_data.get('status', ''),
started_at=update_data.get('startedAt'),
completed_at=update_data.get('completedAt'),
latency_ms=update_data.get('latencyMs'),
tokens_in=update_data.get('tokensIn'),
tokens_out=update_data.get('tokensOut'),
tokens_cached=update_data.get('tokensCached'),
cost_usd=update_data.get('costUsd'),
error=update_data.get('error')
)
# Update job
self.job.update_provider(model_key, provider_update)
# Log provider update
status_msg = f"Provider {model_key}: {provider_update.status}"
if provider_update.error:
status_msg += f" - {provider_update.error}"
self.job.add_log('ERROR', status_msg)
self.logger.error(status_msg)
else:
self.job.add_log('INFO', status_msg)
self.logger.info(status_msg)
# Broadcast provider update
await self.ws_manager.broadcast_job_update(self.job.id, {
'type': 'job.provider_update',
'jobId': self.job.id,
'modelKey': model_key,
'update': provider_update.to_dict()
})
self.logger.debug(f"Provider update: {model_key} - {provider_update.status}")
except Exception as e:
self.logger.error(f"Failed to emit provider update for {model_key}: {e}")
async def emit_log(self, level: str, message: str):
"""
Emit log message with WebSocket streaming
Args:
level: Log level (DEBUG, INFO, WARNING, ERROR)
message: Log message
"""
try:
# Add to job logs
self.job.add_log(level, message)
# Log to system logger
getattr(self.logger, level.lower(), self.logger.info)(message)
# Broadcast log entry
await self.ws_manager.broadcast_job_update(self.job.id, {
'type': 'job.log',
'jobId': self.job.id,
'logEntry': {
'timestamp': datetime.utcnow().isoformat(),
'level': level,
'message': message
}
})
except Exception as e:
self.logger.error(f"Failed to emit log message: {e}")
async def calculate_analysis_progress(
self,
base_progress: int,
completed_providers: int,
total_providers: int,
analysis_weight: int = 50
) -> int:
"""
Calculate progress percentage for LLM analysis phase
Args:
base_progress: Starting progress percentage (usually 25)
completed_providers: Number of completed providers
total_providers: Total number of providers
analysis_weight: Weight of analysis phase in total progress
Returns:
Updated progress percentage
"""
if total_providers == 0:
return base_progress
analysis_progress = (completed_providers / total_providers) * analysis_weight
return min(100, base_progress + int(analysis_progress))
async def emit_completion(
self,
result_csv_url: str,
summary_data: Dict[str, Any]
):
"""
Emit job completion event
Args:
result_csv_url: URL to download CSV result
summary_data: Job summary information
"""
try:
self.job.add_log('INFO', 'Processing completed successfully')
# Broadcast completion — include full job so frontend can updateJob()
await self.ws_manager.broadcast_job_update(self.job.id, {
'type': 'job.completed',
'jobId': self.job.id,
'resultCsvUrl': result_csv_url,
'summary': summary_data,
'job': self.job.to_dict(),
})
self.logger.info(f"Job {self.job.id} completed successfully")
except Exception as e:
self.logger.error(f"Failed to emit completion event: {e}")
async def emit_failure(self, error: str):
"""
Emit job failure event
Args:
error: Error message
"""
try:
self.job.mark_failed(error)
self.job.add_log('ERROR', f'Processing failed: {error}')
# Broadcast failure — include full job so frontend can updateJob()
await self.ws_manager.broadcast_job_update(self.job.id, {
'type': 'job.failed',
'jobId': self.job.id,
'error': error,
'job': self.job.to_dict(),
})
self.logger.error(f"Job {self.job.id} failed: {error}")
except Exception as e:
self.logger.error(f"Failed to emit failure event: {e}")
class JobLogHandler(logging.Handler):
"""
Custom logging handler that routes job-specific logs to WebSocket clients
"""
def __init__(self, job_id: str, ws_manager: WebSocketManager):
super().__init__()
self.job_id = job_id
self.ws_manager = ws_manager
# Set up formatter for log messages
self.setFormatter(logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
))
def emit(self, record):
"""
Process a log record and send it via WebSocket
Args:
record: LogRecord to process
"""
try:
# Format the message
message = self.format(record)
# Create log entry
log_entry = {
'timestamp': datetime.utcnow().isoformat(),
'level': record.levelname,
'message': message,
'logger': record.name
}
# Send via WebSocket (non-blocking)
import asyncio
try:
loop = asyncio.get_event_loop()
loop.create_task(self.ws_manager.broadcast_job_update(self.job_id, {
'type': 'job.log',
'jobId': self.job_id,
'logEntry': log_entry
}))
except RuntimeError:
# No event loop available, skip WebSocket update
pass
except Exception as e:
# Don't let logging errors break the application
print(f"JobLogHandler error: {e}")
def create_job_logger(job_id: str, ws_manager: WebSocketManager) -> logging.Logger:
"""
Create a job-specific logger with WebSocket streaming
Args:
job_id: Job identifier
ws_manager: WebSocket manager instance
Returns:
Logger instance with job-specific handler
"""
logger = logging.getLogger(f"job.{job_id}")
# Remove existing handlers to avoid duplicates
logger.handlers.clear()
# Add job-specific handler
handler = JobLogHandler(job_id, ws_manager)
handler.setLevel(logging.INFO)
logger.addHandler(handler)
# Set logger level
logger.setLevel(logging.INFO)
return logger

View file

View file

@ -0,0 +1,143 @@
"""
Sheet management PostgreSQL-backed.
All functions are async.
"""
import logging
import re
from datetime import datetime, timezone
from typing import List, Optional, Dict
import time
import random
from ..db.pool import get_pool
logger = logging.getLogger(__name__)
async def get_user_sheets(user_id: str) -> List[Dict]:
pool = get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
'SELECT id, name, client_id, item_count, created_at, modified_at '
'FROM sheets WHERE user_id = $1 ORDER BY modified_at DESC',
user_id
)
return [_row_to_meta(r, user_id) for r in rows]
async def create_sheet(user_id: str, name: str, data: List[dict] = None, client_id: str = '') -> Dict:
if data is None:
data = []
sheet_id = str(int(time.time())) + str(random.randint(100, 999))
sheet_name = name or f"Untitled Sheet — {datetime.now().strftime('%Y-%m-%d %H:%M')}"
client_id_val = client_id or None
pool = get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow('''
INSERT INTO sheets (id, user_id, name, client_id, data, item_count)
VALUES ($1, $2, $3, $4, $5, $6)
RETURNING id, name, client_id, item_count, created_at, modified_at
''', sheet_id, user_id, sheet_name, client_id_val, data, len(data))
return _row_to_meta(row, user_id)
async def load_sheet_data(user_id: str, sheet_id: str) -> Optional[List[dict]]:
pool = get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
'SELECT data FROM sheets WHERE id = $1 AND user_id = $2',
sheet_id, user_id
)
if row is None:
return None
return row['data'] if row['data'] is not None else []
async def update_sheet(user_id: str, sheet_id: str, data: List[dict]) -> bool:
pool = get_pool()
async with pool.acquire() as conn:
result = await conn.execute('''
UPDATE sheets SET data = $3, item_count = $4, modified_at = NOW()
WHERE id = $1 AND user_id = $2
''', sheet_id, user_id, data, len(data))
return result != 'UPDATE 0'
async def delete_sheet(user_id: str, sheet_id: str):
pool = get_pool()
async with pool.acquire() as conn:
await conn.execute(
'DELETE FROM sheets WHERE id = $1 AND user_id = $2',
sheet_id, user_id
)
async def rename_sheet(user_id: str, sheet_id: str, new_name: str) -> bool:
pool = get_pool()
async with pool.acquire() as conn:
result = await conn.execute('''
UPDATE sheets SET name = $3, modified_at = NOW()
WHERE id = $1 AND user_id = $2
''', sheet_id, user_id, new_name)
return result != 'UPDATE 0'
async def duplicate_sheet(user_id: str, sheet_id: str) -> Optional[Dict]:
pool = get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
'SELECT name, data FROM sheets WHERE id = $1 AND user_id = $2',
sheet_id, user_id
)
if row is None:
return None
return await create_sheet(user_id, f"Copy of {row['name']}", row['data'])
async def get_sheet_client_id(user_id: str, sheet_id: str) -> Optional[str]:
pool = get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
'SELECT client_id FROM sheets WHERE id = $1 AND user_id = $2',
sheet_id, user_id
)
if row is None:
return None
return row['client_id']
async def set_sheet_client_id(user_id: str, sheet_id: str, client_id: str):
pool = get_pool()
async with pool.acquire() as conn:
await conn.execute('''
UPDATE sheets SET client_id = $3, modified_at = NOW()
WHERE id = $1 AND user_id = $2
''', sheet_id, user_id, client_id or None)
def generate_next_id(data: List[dict]) -> str:
"""Generate the next DEL-NNN id. Remains sync — operates on in-memory data."""
max_id = 0
for row in data:
num_str = row.get('Number', '').replace('DEL-', '')
try:
n = int(num_str)
if n > max_id:
max_id = n
except (ValueError, AttributeError):
pass
return f"DEL-{str(max_id + 1).zfill(3)}"
def _row_to_meta(row, user_id: str) -> Dict:
return {
'id': row['id'],
'name': row['name'],
'client_id': row['client_id'],
'itemCount': row['item_count'],
'user': user_id,
'created': row['created_at'].isoformat() if row['created_at'] else None,
'modified': row['modified_at'].isoformat() if row['modified_at'] else None,
}

View file

@ -0,0 +1,73 @@
"""
Pydantic models for sheets and deliverables.
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import List, Optional
from pydantic import BaseModel, Field
class Deliverable(BaseModel):
Number: str = ""
Title: str = ""
Status: str = "Booked"
Category: str = ""
Media: str = ""
SubMedia: str = Field(default="", alias="Sub-media")
Format: str = ""
SupplyDate: str = Field(default="", alias="Supply date")
LiveDate: str = Field(default="", alias="Live date")
Language: str = ""
Country: str = ""
Quantity: int = 1
class Config:
populate_by_name = True
def to_dict(self) -> dict:
return {
"Number": self.Number,
"Title": self.Title,
"Status": self.Status,
"Category": self.Category,
"Media": self.Media,
"Sub-media": self.SubMedia,
"Format": self.Format,
"Supply date": self.SupplyDate,
"Live date": self.LiveDate,
"Language": self.Language,
"Country": self.Country,
"Quantity": self.Quantity,
}
@classmethod
def from_dict(cls, d: dict) -> "Deliverable":
return cls(
Number=d.get("Number", ""),
Title=d.get("Title", ""),
Status=d.get("Status", "Booked"),
Category=d.get("Category", ""),
Media=d.get("Media", ""),
**{"Sub-media": d.get("Sub-media", "")},
Format=d.get("Format", ""),
**{"Supply date": d.get("Supply date", "")},
**{"Live date": d.get("Live date", "")},
Language=d.get("Language", ""),
Country=d.get("Country", ""),
Quantity=int(d.get("Quantity", 1)),
)
class SheetMeta(BaseModel):
id: str
name: str
created: str
modified: str
itemCount: int
user: str
class Sheet(BaseModel):
meta: SheetMeta
data: List[dict] # raw dicts for speed; validated on write

13
backend/server/ws/__init__.py Executable file
View file

@ -0,0 +1,13 @@
"""
WebSocket module for real-time communication
"""
from .manager import WebSocketManager
# Create global instance
ws_manager = WebSocketManager()
__all__ = [
'WebSocketManager',
'ws_manager'
]

300
backend/server/ws/manager.py Executable file
View file

@ -0,0 +1,300 @@
"""
WebSocket connection and message management
"""
import asyncio
import json
import logging
from datetime import datetime
from typing import Dict, Set, Any, Optional
import uuid
from weakref import WeakSet
from quart import websocket
from ..config_runtime import server_config
logger = logging.getLogger(__name__)
class WebSocketClient:
"""Represents a connected WebSocket client"""
def __init__(self, client_id: str, user_id: Optional[str] = None):
self.client_id = client_id
self.user_id = user_id or 'anonymous'
self.connected_at = datetime.utcnow()
self.last_ping = datetime.utcnow()
self.websocket = websocket._get_current_object()
async def send(self, message: Dict[str, Any]):
"""Send a message to this client"""
try:
await self.websocket.send(json.dumps(message))
except Exception as e:
logger.warning(f"Failed to send message to client {self.client_id}: {e}")
raise
async def ping(self):
"""Send ping to client"""
try:
await self.send({'type': 'ping', 'timestamp': datetime.utcnow().isoformat()})
self.last_ping = datetime.utcnow()
except Exception as e:
logger.warning(f"Failed to ping client {self.client_id}: {e}")
raise
class WebSocketManager:
"""
Manages WebSocket connections and broadcasts
Singleton for coordinating real-time updates
"""
_instance: Optional['WebSocketManager'] = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
def __init__(self):
if hasattr(self, '_initialized'):
return
self._initialized = True
self.clients: Dict[str, WebSocketClient] = {}
self._lock = asyncio.Lock()
# Start background tasks
self.ping_task = None
self.cleanup_task = None
logger.info("WebSocketManager initialized")
async def start_background_tasks(self):
"""Start background maintenance tasks"""
if not self.ping_task:
self.ping_task = asyncio.create_task(self._ping_clients_loop())
if not self.cleanup_task:
self.cleanup_task = asyncio.create_task(self._cleanup_disconnected_loop())
async def stop_background_tasks(self):
"""Stop background maintenance tasks"""
if self.ping_task:
self.ping_task.cancel()
try:
await self.ping_task
except asyncio.CancelledError:
pass
if self.cleanup_task:
self.cleanup_task.cancel()
try:
await self.cleanup_task
except asyncio.CancelledError:
pass
async def register_client(self, user_id: Optional[str] = None) -> WebSocketClient:
"""
Register a new WebSocket client
Args:
user_id: User identifier (optional for dev mode)
Returns:
WebSocketClient instance
"""
client_id = str(uuid.uuid4())
client = WebSocketClient(client_id, user_id)
async with self._lock:
self.clients[client_id] = client
logger.info(f"Registered WebSocket client {client_id} for user {user_id}")
# Send initial connection acknowledgment
await client.send({
'type': 'connection.established',
'clientId': client_id,
'userId': user_id,
'connectedAt': client.connected_at.isoformat()
})
return client
async def unregister_client(self, client_id: str):
"""
Unregister a WebSocket client
Args:
client_id: Client identifier
"""
async with self._lock:
if client_id in self.clients:
client = self.clients.pop(client_id)
logger.info(f"Unregistered WebSocket client {client_id} for user {client.user_id}")
async def broadcast_to_all(self, message: Dict[str, Any]):
"""
Broadcast message to all connected clients
Args:
message: Message to broadcast
"""
if not self.clients:
return
# Add timestamp to message
message['timestamp'] = datetime.utcnow().isoformat()
async with self._lock:
clients_to_remove = []
for client_id, client in self.clients.items():
try:
await client.send(message)
except Exception as e:
logger.warning(f"Failed to send to client {client_id}: {e}")
clients_to_remove.append(client_id)
# Remove failed clients
for client_id in clients_to_remove:
self.clients.pop(client_id, None)
async def broadcast_to_user(self, user_id: str, message: Dict[str, Any]):
"""
Broadcast message to all connections for a specific user
Args:
user_id: User identifier
message: Message to broadcast
"""
if not self.clients:
return
# Add timestamp to message
message['timestamp'] = datetime.utcnow().isoformat()
async with self._lock:
clients_to_remove = []
sent_count = 0
for client_id, client in self.clients.items():
if client.user_id == user_id:
try:
await client.send(message)
sent_count += 1
except Exception as e:
logger.warning(f"Failed to send to client {client_id}: {e}")
clients_to_remove.append(client_id)
# Remove failed clients
for client_id in clients_to_remove:
self.clients.pop(client_id, None)
if sent_count > 0:
logger.debug(f"Broadcast message to {sent_count} clients for user {user_id}")
async def broadcast_job_update(self, job_id: str, message: Dict[str, Any]):
"""
Broadcast job-specific update
Args:
job_id: Job identifier
message: Message to broadcast
"""
# For now, broadcast to all clients
# In the future, we could implement job-specific subscriptions
message['jobId'] = job_id
await self.broadcast_to_all(message)
async def send_queue_snapshot(self, client: WebSocketClient, jobs_data: list):
"""
Send initial queue snapshot to a client
Args:
client: WebSocket client
jobs_data: Serialized jobs data
"""
try:
await client.send({
'type': 'queue.snapshot',
'jobs': jobs_data
})
logger.debug(f"Sent queue snapshot to client {client.client_id}")
except Exception as e:
logger.error(f"Failed to send queue snapshot to {client.client_id}: {e}")
raise
async def get_connection_stats(self) -> Dict[str, Any]:
"""
Get WebSocket connection statistics
Returns:
Statistics dictionary
"""
async with self._lock:
user_counts = {}
for client in self.clients.values():
user_counts[client.user_id] = user_counts.get(client.user_id, 0) + 1
return {
'total_connections': len(self.clients),
'unique_users': len(user_counts),
'connections_per_user': user_counts,
'uptime_seconds': (datetime.utcnow() -
min((c.connected_at for c in self.clients.values()),
default=datetime.utcnow())).total_seconds()
}
async def _ping_clients_loop(self):
"""Background task to ping clients periodically"""
while True:
try:
await asyncio.sleep(server_config.WS_PING_INTERVAL_SECONDS)
async with self._lock:
clients_to_remove = []
for client_id, client in self.clients.items():
try:
await client.ping()
except Exception:
clients_to_remove.append(client_id)
# Remove failed clients
for client_id in clients_to_remove:
self.clients.pop(client_id, None)
logger.debug(f"Removed unresponsive client {client_id}")
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Error in ping loop: {e}")
async def _cleanup_disconnected_loop(self):
"""Background task to clean up disconnected clients"""
while True:
try:
await asyncio.sleep(60) # Check every minute
async with self._lock:
# Clean up clients that haven't been pinged recently
cutoff = datetime.utcnow().timestamp() - (server_config.WS_PING_INTERVAL_SECONDS * 3)
clients_to_remove = []
for client_id, client in self.clients.items():
if client.last_ping.timestamp() < cutoff:
clients_to_remove.append(client_id)
for client_id in clients_to_remove:
self.clients.pop(client_id, None)
logger.debug(f"Cleaned up stale client {client_id}")
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Error in cleanup loop: {e}")
# Global instance
ws_manager = WebSocketManager()

View file

@ -1,8 +0,0 @@
<?php
// API Configuration
$GEMINI_API_KEY = 'AIzaSyC2DrDCeNIhI531JcXCF9uolTMU_KBcjDY';
// User Configuration (hardcoded for now, will be SSO later)
$CURRENT_USER = 'daveporter@oliver.agency';
$SSO_ENABLED = false; // Will be true when SSO is implemented
?>

235
deploy.sh Executable file
View file

@ -0,0 +1,235 @@
#!/usr/bin/env bash
# deploy.sh — idempotent deployment script for ac-tool
# Usage: sudo bash /opt/ac-tool/deploy.sh
set -euo pipefail
# ── Config ────────────────────────────────────────────────────────────────────
APP_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
WEB_DIR="/var/www/html/ac-helper"
WEB_USER="www-data"
ENV_FILE="$APP_DIR/.env"
ENV_EXAMPLE="$APP_DIR/.env.example"
CONTAINER_NAME="ac-tool"
DB_CONTAINER_NAME="ac-tool-db"
FE_BUILD_TAG="ac-tool-fe-extract"
# ── Colours ───────────────────────────────────────────────────────────────────
log() { echo -e "\033[1;34m▶\033[0m $*"; }
ok() { echo -e "\033[1;32m✔\033[0m $*"; }
warn() { echo -e "\033[1;33m⚠\033[0m $*"; }
die() { echo -e "\033[1;31m✖\033[0m $*" >&2; exit 1; }
hr() { echo "────────────────────────────────────────────────────────────"; }
hr
echo " AC Tool — deployment"
echo " $(date '+%Y-%m-%d %H:%M:%S') | dir: $APP_DIR"
hr
# ── 1. Root check ─────────────────────────────────────────────────────────────
if [[ $EUID -ne 0 ]]; then
die "Run as root: sudo bash $0"
fi
# ── 2. Prerequisites ──────────────────────────────────────────────────────────
log "Checking prerequisites..."
command -v docker >/dev/null 2>&1 || die "Docker not installed"
command -v git >/dev/null 2>&1 || die "Git not installed"
docker compose version >/dev/null 2>&1 || die "Docker Compose plugin not found (need 'docker compose', not 'docker-compose')"
ok "Prerequisites OK"
# ── 3. .env setup ─────────────────────────────────────────────────────────────
log "Checking .env..."
if [[ ! -f "$ENV_FILE" ]]; then
[[ ! -f "$ENV_EXAMPLE" ]] && die ".env.example not found in $APP_DIR"
cp "$ENV_EXAMPLE" "$ENV_FILE"
warn ".env created from .env.example"
warn "Fill in the required values and re-run:"
warn " nano $ENV_FILE"
exit 0
fi
# Load .env into the current shell so we can read APP_PORT etc.
set -o allexport
# shellcheck disable=SC1090
source "$ENV_FILE"
set +o allexport
APP_PORT="${APP_PORT:-8100}"
POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-}"
# Warn about blank required keys
MISSING=""
for KEY in GEMINI_API_KEY ADMIN_EMAIL SESSION_SECRET POSTGRES_PASSWORD; do
VAL="${!KEY:-}"
[[ -z "$VAL" ]] && MISSING="$MISSING $KEY"
done
[[ -n "$MISSING" ]] && warn "These keys are empty in .env:$MISSING"
ok ".env loaded (APP_PORT=$APP_PORT)"
# ── 4. Port check ─────────────────────────────────────────────────────────────
log "Checking port $APP_PORT..."
if ss -tlnp | grep -q ":${APP_PORT} "; then
if docker ps --format '{{.Names}} {{.Ports}}' | grep -q "${CONTAINER_NAME}.*${APP_PORT}"; then
warn "Port $APP_PORT already used by our container (will be restarted)"
else
die "Port $APP_PORT is occupied by another process. Change APP_PORT in .env or free the port."
fi
else
ok "Port $APP_PORT is free"
fi
# ── 5. Git pull ───────────────────────────────────────────────────────────────
# Run git as the invoking user (not root) so SSH keys work
GIT_USER="${SUDO_USER:-$(whoami)}"
log "Pulling latest code (as $GIT_USER)..."
cd "$APP_DIR"
sudo -u "$GIT_USER" git fetch origin
LOCAL=$(git rev-parse HEAD)
REMOTE=$(git rev-parse '@{u}')
if [[ "$LOCAL" == "$REMOTE" ]]; then
ok "Already up to date ($(git rev-parse --short HEAD))"
else
sudo -u "$GIT_USER" git pull --ff-only || die "git pull failed — local changes detected. Stash or reset them first."
ok "Updated to $(git rev-parse --short HEAD)"
fi
# ── 6. Docker build ───────────────────────────────────────────────────────────
log "Building Docker image..."
docker compose build --pull
ok "Docker image built"
# ── 7. Extract frontend from Docker build stage ───────────────────────────────
log "Building and extracting frontend..."
TMPDIR=$(mktemp -d)
trap 'rm -rf "$TMPDIR"; docker rmi "$FE_BUILD_TAG" >/dev/null 2>&1 || true' EXIT
docker build --target frontend-builder --tag "$FE_BUILD_TAG" "$APP_DIR"
EXTRACT_CONTAINER=$(docker create "$FE_BUILD_TAG")
docker cp "$EXTRACT_CONTAINER:/app/frontend/dist/." "$TMPDIR/"
docker rm "$EXTRACT_CONTAINER" >/dev/null
FILE_COUNT=$(find "$TMPDIR" -type f | wc -l)
ok "Frontend built: $FILE_COUNT files"
# ── 8. Deploy frontend static files ───────────────────────────────────────────
log "Deploying frontend to $WEB_DIR..."
mkdir -p "$WEB_DIR"
find "${WEB_DIR}" -mindepth 1 -delete
cp -r "$TMPDIR/." "$WEB_DIR/"
chown -R "$WEB_USER:$WEB_USER" "$WEB_DIR"
chmod -R 755 "$WEB_DIR"
ok "Frontend deployed to $WEB_DIR"
# ── 9. Restart containers ─────────────────────────────────────────────────────
log "Restarting containers (app + postgres)..."
docker compose down --remove-orphans 2>/dev/null || true
docker compose up -d
ok "Containers started"
# ── 10. Wait for PostgreSQL ────────────────────────────────────────────────────
log "Waiting for PostgreSQL to be ready..."
for i in $(seq 1 30); do
if docker exec "$DB_CONTAINER_NAME" pg_isready -U achelper -d achelper >/dev/null 2>&1; then
ok "PostgreSQL is ready"
break
fi
if [[ $i -eq 30 ]]; then
die "PostgreSQL did not become ready after 60s. Check logs: docker logs $DB_CONTAINER_NAME"
fi
sleep 2
done
# ── 11. Health check ──────────────────────────────────────────────────────────
log "Waiting for application to be healthy..."
HEALTH_URL="http://localhost:${APP_PORT}/health"
for i in $(seq 1 30); do
if curl -sf "$HEALTH_URL" >/dev/null 2>&1; then
ok "Application is healthy"
break
fi
if [[ $i -eq 30 ]]; then
die "Health check failed after 60s. Check logs: docker logs $CONTAINER_NAME"
fi
sleep 2
done
# ── 12. Data directory ────────────────────────────────────────────────────────
DATA_DIR="$APP_DIR/data"
mkdir -p "$DATA_DIR/uploads" "$DATA_DIR/outputs"
ok "Data directories ready at $DATA_DIR"
# ── 13. JSON → PostgreSQL migration (first deploy after adding Postgres) ───────
#
# If old JSON data files exist, offer to run the one-time migration script
# that imports users, clients, dropdowns, sheets, and export templates into DB.
#
JSON_MIGRATION_MARKER="$DATA_DIR/.pg_migrated"
if [[ ! -f "$JSON_MIGRATION_MARKER" ]]; then
HAS_JSON=false
for f in "$DATA_DIR/users.json" "$DATA_DIR/sheets_metadata.json" "$DATA_DIR/clients.json"; do
[[ -f "$f" ]] && HAS_JSON=true && break
done
if [[ "$HAS_JSON" == "true" ]]; then
echo ""
warn "Old JSON data files detected. Run the one-time migration to import them into PostgreSQL?"
warn " yes — migrate now (recommended)"
warn " no — skip (data already in DB or you'll migrate manually)"
read -r -p " Migrate now? [yes/no]: " MIGRATE_ANSWER </dev/tty
if [[ "${MIGRATE_ANSWER,,}" == "yes" ]]; then
log "Running JSON → PostgreSQL migration..."
docker exec "$CONTAINER_NAME" python -m server.db.migrate_json \
&& touch "$JSON_MIGRATION_MARKER" \
&& ok "Migration complete. Marker written to $JSON_MIGRATION_MARKER" \
|| warn "Migration reported errors — check logs above. Re-run manually with: docker exec $CONTAINER_NAME python -m server.db.migrate_json"
else
warn "Skipped. To migrate manually: docker exec $CONTAINER_NAME python -m server.db.migrate_json"
warn "To suppress this prompt in future deployments, create: $JSON_MIGRATION_MARKER"
fi
fi
fi
# ── 14. Apache config reminder ────────────────────────────────────────────────
if ! grep -rq "ac-helper" /etc/apache2/sites-enabled/ 2>/dev/null; then
echo ""
hr
echo " Apache config not detected — add this inside your VirtualHost block:"
hr
cat <<APACHE
# Required: a2enmod proxy proxy_http proxy_wstunnel
# Proxy API to Docker
<Location /ac-helper/api/>
ProxyPass http://localhost:${APP_PORT}/api/
ProxyPassReverse http://localhost:${APP_PORT}/api/
</Location>
# Proxy WebSocket
ProxyPass /ac-helper/ws ws://localhost:${APP_PORT}/ws
ProxyPassReverse /ac-helper/ws ws://localhost:${APP_PORT}/ws
# Serve frontend static files
Alias /ac-helper/ /var/www/html/ac-helper/
<Directory /var/www/html/ac-helper>
Options -Indexes
AllowOverride None
Require all granted
FallbackResource /ac-helper/index.html
</Directory>
APACHE
hr
fi
# ── Summary ───────────────────────────────────────────────────────────────────
echo ""
ok "Deployment complete!"
echo " App container: docker logs -f $CONTAINER_NAME"
echo " DB container: docker logs -f $DB_CONTAINER_NAME"
echo " Health: $HEALTH_URL"
echo " Frontend: $WEB_DIR ($FILE_COUNT files)"
echo " Data: $DATA_DIR"
echo " Commit: $(git -C "$APP_DIR" rev-parse --short HEAD)"
echo ""

139
docker-compose.yml Normal file
View file

@ -0,0 +1,139 @@
# Apache reverse proxy config (add inside your VirtualHost block):
#
# # Required modules: a2enmod proxy proxy_http proxy_wstunnel
#
# # Proxy API requests to the Docker container
# <Location /ac-helper/api/>
# ProxyPass http://localhost:8100/api/
# ProxyPassReverse http://localhost:8100/api/
# </Location>
#
# # Proxy WebSocket
# ProxyPass /ac-helper/ws ws://localhost:8100/ws
# ProxyPassReverse /ac-helper/ws ws://localhost:8100/ws
#
# # Serve frontend static files directly from disk
# Alias /ac-helper/ /var/www/html/ac-helper/
# <Directory /var/www/html/ac-helper>
# Options -Indexes
# AllowOverride None
# Require all granted
# FallbackResource /ac-helper/index.html
# </Directory>
#
# Apache serves static files; Docker handles /api and /ws only.
# APP_PORT in .env controls the host port (default: 8100).
version: '3.9'
services:
postgres:
image: postgres:16-alpine
container_name: ac-tool-db
restart: unless-stopped
environment:
POSTGRES_DB: achelper
POSTGRES_USER: achelper
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-achelper_secret}
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U achelper -d achelper"]
interval: 10s
timeout: 5s
retries: 5
app:
build: .
container_name: ac-tool
restart: unless-stopped
depends_on:
postgres:
condition: service_healthy
ports:
- "${APP_PORT:-8100}:8000"
volumes:
- ./data:/app/data
environment:
DATABASE_URL: postgresql://achelper:${POSTGRES_PASSWORD:-achelper_secret}@postgres:5432/achelper
# Auth — AZURE_* names in .env, MSAL_* names read by server/config_runtime.py
AZURE_TENANT_ID: ${AZURE_TENANT_ID:-e519c2e6-bc6d-4fdf-8d9c-923c2f002385}
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-9079054c-9620-4757-a256-23413042f1ef}
AZURE_REDIRECT_URI: ${AZURE_REDIRECT_URI:-https://ai-sandbox.oliver.solutions/ac-helper/}
MSAL_TENANT_ID: ${AZURE_TENANT_ID:-e519c2e6-bc6d-4fdf-8d9c-923c2f002385}
MSAL_CLIENT_ID: ${AZURE_CLIENT_ID:-9079054c-9620-4757-a256-23413042f1ef}
MSAL_REDIRECT_URI: ${AZURE_REDIRECT_URI:-https://ai-sandbox.oliver.solutions/ac-helper/}
SESSION_SECRET: ${SESSION_SECRET:-change-me-in-production}
# Dev mode (set to false in production)
DEV_MODE: ${DEV_MODE:-false}
DEV_USER_ID: ${DEV_USER_ID:-dev-user-001}
DEV_USER_ROLE: ${DEV_USER_ROLE:-admin}
# Admin bootstrap
ADMIN_EMAIL: ${ADMIN_EMAIL:-daveporter@oliver.agency}
ADMIN_EMAILS: ${ADMIN_EMAILS:-daveporter@oliver.agency,vadymsamoilenko@oliver.agency}
# Emergency access (bypass SSO) — set a long random string to enable
EMERGENCY_TOKEN: ${EMERGENCY_TOKEN:-}
EMERGENCY_USER_EMAIL: ${EMERGENCY_USER_EMAIL:-daveporter@oliver.agency}
EMERGENCY_USER_NAME: ${EMERGENCY_USER_NAME:-Emergency Access}
# OpenAI
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
OPENAI_MODEL: ${OPENAI_MODEL:-gpt-4.1}
OPENAI_REASONING_EFFORT: ${OPENAI_REASONING_EFFORT:-medium}
OPENAI_TIMEOUT: ${OPENAI_TIMEOUT:-3600}
OPENAI_MAX_RETRIES: ${OPENAI_MAX_RETRIES:-2}
# Google Gemini
GEMINI_API_KEY: ${GEMINI_API_KEY:-}
GEMINI_MODEL: ${GEMINI_MODEL:-gemini-3-flash-preview}
GOOGLE_MODEL: ${GOOGLE_MODEL:-gemini-3.1-pro-preview}
GOOGLE_TEMPERATURE: ${GOOGLE_TEMPERATURE:-0.7}
GOOGLE_MAX_OUTPUT_TOKENS: ${GOOGLE_MAX_OUTPUT_TOKENS:-100000}
GOOGLE_THINKING_BUDGET: ${GOOGLE_THINKING_BUDGET:-12000}
GOOGLE_TIMEOUT: ${GOOGLE_TIMEOUT:-3600}
# Anthropic
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
ANTHROPIC_MODEL_OPUS: ${ANTHROPIC_MODEL_OPUS:-claude-opus-4-5-20251101}
ANTHROPIC_MODEL_SONNET: ${ANTHROPIC_MODEL_SONNET:-claude-sonnet-4-5-20250929}
ANTHROPIC_TEMPERATURE: ${ANTHROPIC_TEMPERATURE:-1}
ANTHROPIC_MAX_TOKENS: ${ANTHROPIC_MAX_TOKENS:-32000}
ANTHROPIC_THINKING_BUDGET: ${ANTHROPIC_THINKING_BUDGET:-12000}
ANTHROPIC_TIMEOUT: ${ANTHROPIC_TIMEOUT:-300}
# LlamaCloud
LLAMA_CLOUD_API_KEY: ${LLAMA_CLOUD_API_KEY:-}
# Brief extraction
DEFAULT_PRIMARY_MODELS: ${DEFAULT_PRIMARY_MODELS:-anthropic-sonnet45,google-gemini20}
DEFAULT_CONSOLIDATION_MODEL: ${DEFAULT_CONSOLIDATION_MODEL:-anthropic-sonnet45}
MINIMUM_SUCCESS_THRESHOLD: ${MINIMUM_SUCCESS_THRESHOLD:-1}
ENABLE_COST_ESTIMATION: ${ENABLE_COST_ESTIMATION:-true}
MAX_PROCESSING_COST_USD: ${MAX_PROCESSING_COST_USD:-10.00}
MAX_CONCURRENT_JOBS: ${MAX_CONCURRENT_JOBS:-5}
# File upload
MAX_UPLOAD_SIZE_MB: ${MAX_UPLOAD_SIZE_MB:-200}
FILE_RETENTION_HOURS: ${FILE_RETENTION_HOURS:-24}
WS_PING_INTERVAL_SECONDS: ${WS_PING_INTERVAL_SECONDS:-30}
# Paths
DATA_DIR: /app/data
UPLOADS_DIR: /app/data/uploads
OUTPUTS_DIR: /app/data/outputs
SHEETS_DIR: /app/data/sheets
USERS_FILE: /app/data/users.json
DROPDOWNS_FILE: /app/data/dropdowns.json
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
volumes:
postgres_data:

24
frontend/.gitignore vendored Normal file
View file

@ -0,0 +1,24 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

73
frontend/README.md Normal file
View file

@ -0,0 +1,73 @@
# React + TypeScript + Vite
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
Currently, two official plugins are available:
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Oxc](https://oxc.rs)
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/)
## React Compiler
The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
## Expanding the ESLint configuration
If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules:
```js
export default defineConfig([
globalIgnores(['dist']),
{
files: ['**/*.{ts,tsx}'],
extends: [
// Other configs...
// Remove tseslint.configs.recommended and replace with this
tseslint.configs.recommendedTypeChecked,
// Alternatively, use this for stricter rules
tseslint.configs.strictTypeChecked,
// Optionally, add this for stylistic rules
tseslint.configs.stylisticTypeChecked,
// Other configs...
],
languageOptions: {
parserOptions: {
project: ['./tsconfig.node.json', './tsconfig.app.json'],
tsconfigRootDir: import.meta.dirname,
},
// other options...
},
},
])
```
You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules:
```js
// eslint.config.js
import reactX from 'eslint-plugin-react-x'
import reactDom from 'eslint-plugin-react-dom'
export default defineConfig([
globalIgnores(['dist']),
{
files: ['**/*.{ts,tsx}'],
extends: [
// Other configs...
// Enable lint rules for React
reactX.configs['recommended-typescript'],
// Enable lint rules for React DOM
reactDom.configs.recommended,
],
languageOptions: {
parserOptions: {
project: ['./tsconfig.node.json', './tsconfig.app.json'],
tsconfigRootDir: import.meta.dirname,
},
// other options...
},
},
])
```

23
frontend/eslint.config.js Normal file
View file

@ -0,0 +1,23 @@
import js from '@eslint/js'
import globals from 'globals'
import reactHooks from 'eslint-plugin-react-hooks'
import reactRefresh from 'eslint-plugin-react-refresh'
import tseslint from 'typescript-eslint'
import { defineConfig, globalIgnores } from 'eslint/config'
export default defineConfig([
globalIgnores(['dist']),
{
files: ['**/*.{ts,tsx}'],
extends: [
js.configs.recommended,
tseslint.configs.recommended,
reactHooks.configs.flat.recommended,
reactRefresh.configs.vite,
],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
},
},
])

16
frontend/index.html Normal file
View file

@ -0,0 +1,16 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/ac-helper/favicon.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>AC Tool — Oliver Agency</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Montserrat:wght@400;500;600;700&display=swap" rel="stylesheet">
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

3925
frontend/package-lock.json generated Normal file

File diff suppressed because it is too large Load diff

44
frontend/package.json Normal file
View file

@ -0,0 +1,44 @@
{
"name": "frontend",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc -b && vite build",
"lint": "eslint .",
"preview": "vite preview"
},
"dependencies": {
"@azure/msal-browser": "^4.30.0",
"@azure/msal-react": "^3.0.29",
"@handsontable/react": "^16.2.0",
"@tailwindcss/vite": "^4.2.2",
"@types/react-router-dom": "^5.3.3",
"autoprefixer": "^10.4.27",
"axios": "^1.13.6",
"handsontable": "^17.0.0",
"postcss": "^8.5.8",
"react": "^19.2.4",
"react-dom": "^19.2.4",
"react-dropzone": "^15.0.0",
"react-hot-toast": "^2.6.0",
"react-router-dom": "^7.13.1",
"tailwindcss": "^4.2.2",
"zustand": "^5.0.12"
},
"devDependencies": {
"@eslint/js": "^9.39.4",
"@types/node": "^24.12.0",
"@types/react": "^19.2.14",
"@types/react-dom": "^19.2.3",
"@vitejs/plugin-react": "^6.0.1",
"eslint": "^9.39.4",
"eslint-plugin-react-hooks": "^7.0.1",
"eslint-plugin-react-refresh": "^0.5.2",
"globals": "^17.4.0",
"typescript": "~5.9.3",
"typescript-eslint": "^8.57.0",
"vite": "^8.0.1"
}
}

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 9.3 KiB

24
frontend/public/icons.svg Normal file
View file

@ -0,0 +1,24 @@
<svg xmlns="http://www.w3.org/2000/svg">
<symbol id="bluesky-icon" viewBox="0 0 16 17">
<g clip-path="url(#bluesky-clip)"><path fill="#08060d" d="M7.75 7.735c-.693-1.348-2.58-3.86-4.334-5.097-1.68-1.187-2.32-.981-2.74-.79C.188 2.065.1 2.812.1 3.251s.241 3.602.398 4.13c.52 1.744 2.367 2.333 4.07 2.145-2.495.37-4.71 1.278-1.805 4.512 3.196 3.309 4.38-.71 4.987-2.746.608 2.036 1.307 5.91 4.93 2.746 2.72-2.746.747-4.143-1.747-4.512 1.702.189 3.55-.4 4.07-2.145.156-.528.397-3.691.397-4.13s-.088-1.186-.575-1.406c-.42-.19-1.06-.395-2.741.79-1.755 1.24-3.64 3.752-4.334 5.099"/></g>
<defs><clipPath id="bluesky-clip"><path fill="#fff" d="M.1.85h15.3v15.3H.1z"/></clipPath></defs>
</symbol>
<symbol id="discord-icon" viewBox="0 0 20 19">
<path fill="#08060d" d="M16.224 3.768a14.5 14.5 0 0 0-3.67-1.153c-.158.286-.343.67-.47.976a13.5 13.5 0 0 0-4.067 0c-.128-.306-.317-.69-.476-.976A14.4 14.4 0 0 0 3.868 3.77C1.546 7.28.916 10.703 1.231 14.077a14.7 14.7 0 0 0 4.5 2.306q.545-.748.965-1.587a9.5 9.5 0 0 1-1.518-.74q.191-.14.372-.293c2.927 1.369 6.107 1.369 8.999 0q.183.152.372.294-.723.437-1.52.74.418.838.963 1.588a14.6 14.6 0 0 0 4.504-2.308c.37-3.911-.63-7.302-2.644-10.309m-9.13 8.234c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.894 0 1.614.82 1.599 1.82.001 1-.705 1.82-1.6 1.82m5.91 0c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.893 0 1.614.82 1.599 1.82 0 1-.706 1.82-1.6 1.82"/>
</symbol>
<symbol id="documentation-icon" viewBox="0 0 21 20">
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="m15.5 13.333 1.533 1.322c.645.555.967.833.967 1.178s-.322.623-.967 1.179L15.5 18.333m-3.333-5-1.534 1.322c-.644.555-.966.833-.966 1.178s.322.623.966 1.179l1.534 1.321"/>
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M17.167 10.836v-4.32c0-1.41 0-2.117-.224-2.68-.359-.906-1.118-1.621-2.08-1.96-.599-.21-1.349-.21-2.848-.21-2.623 0-3.935 0-4.983.369-1.684.591-3.013 1.842-3.641 3.428C3 6.449 3 7.684 3 10.154v2.122c0 2.558 0 3.838.706 4.726q.306.383.713.671c.76.536 1.79.64 3.581.66"/>
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M3 10a2.78 2.78 0 0 1 2.778-2.778c.555 0 1.209.097 1.748-.047.48-.129.854-.503.982-.982.145-.54.048-1.194.048-1.749a2.78 2.78 0 0 1 2.777-2.777"/>
</symbol>
<symbol id="github-icon" viewBox="0 0 19 19">
<path fill="#08060d" fill-rule="evenodd" d="M9.356 1.85C5.05 1.85 1.57 5.356 1.57 9.694a7.84 7.84 0 0 0 5.324 7.44c.387.079.528-.168.528-.376 0-.182-.013-.805-.013-1.454-2.165.467-2.616-.935-2.616-.935-.349-.91-.864-1.143-.864-1.143-.71-.48.051-.48.051-.48.787.051 1.2.805 1.2.805.695 1.194 1.817.857 2.268.649.064-.507.27-.857.49-1.052-1.728-.182-3.545-.857-3.545-3.87 0-.857.31-1.558.8-2.104-.078-.195-.349-1 .077-2.078 0 0 .657-.208 2.14.805a7.5 7.5 0 0 1 1.946-.26c.657 0 1.328.092 1.946.26 1.483-1.013 2.14-.805 2.14-.805.426 1.078.155 1.883.078 2.078.502.546.799 1.247.799 2.104 0 3.013-1.818 3.675-3.558 3.87.284.247.528.714.528 1.454 0 1.052-.012 1.896-.012 2.156 0 .208.142.455.528.377a7.84 7.84 0 0 0 5.324-7.441c.013-4.338-3.48-7.844-7.773-7.844" clip-rule="evenodd"/>
</symbol>
<symbol id="social-icon" viewBox="0 0 20 20">
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M12.5 6.667a4.167 4.167 0 1 0-8.334 0 4.167 4.167 0 0 0 8.334 0"/>
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M2.5 16.667a5.833 5.833 0 0 1 8.75-5.053m3.837.474.513 1.035c.07.144.257.282.414.309l.93.155c.596.1.736.536.307.965l-.723.73a.64.64 0 0 0-.152.531l.207.903c.164.715-.213.991-.84.618l-.872-.52a.63.63 0 0 0-.577 0l-.872.52c-.624.373-1.003.094-.84-.618l.207-.903a.64.64 0 0 0-.152-.532l-.723-.729c-.426-.43-.289-.864.306-.964l.93-.156a.64.64 0 0 0 .412-.31l.513-1.034c.28-.562.735-.562 1.012 0"/>
</symbol>
<symbol id="x-icon" viewBox="0 0 19 19">
<path fill="#08060d" fill-rule="evenodd" d="M1.893 1.98c.052.072 1.245 1.769 2.653 3.77l2.892 4.114c.183.261.333.48.333.486s-.068.089-.152.183l-.522.593-.765.867-3.597 4.087c-.375.426-.734.834-.798.905a1 1 0 0 0-.118.148c0 .01.236.017.664.017h.663l.729-.83c.4-.457.796-.906.879-.999a692 692 0 0 0 1.794-2.038c.034-.037.301-.34.594-.675l.551-.624.345-.392a7 7 0 0 1 .34-.374c.006 0 .93 1.306 2.052 2.903l2.084 2.965.045.063h2.275c1.87 0 2.273-.003 2.266-.021-.008-.02-1.098-1.572-3.894-5.547-2.013-2.862-2.28-3.246-2.273-3.266.008-.019.282-.332 2.085-2.38l2-2.274 1.567-1.782c.022-.028-.016-.03-.65-.03h-.674l-.3.342a871 871 0 0 1-1.782 2.025c-.067.075-.405.458-.75.852a100 100 0 0 1-.803.91c-.148.172-.299.344-.99 1.127-.304.343-.32.358-.345.327-.015-.019-.904-1.282-1.976-2.808L6.365 1.85H1.8zm1.782.91 8.078 11.294c.772 1.08 1.413 1.973 1.425 1.984.016.017.241.02 1.05.017l1.03-.004-2.694-3.766L7.796 5.75 5.722 2.852l-1.039-.004-1.039-.004z" clip-rule="evenodd"/>
</symbol>
</svg>

After

Width:  |  Height:  |  Size: 4.9 KiB

184
frontend/src/App.css Normal file
View file

@ -0,0 +1,184 @@
.counter {
font-size: 16px;
padding: 5px 10px;
border-radius: 5px;
color: var(--accent);
background: var(--accent-bg);
border: 2px solid transparent;
transition: border-color 0.3s;
margin-bottom: 24px;
&:hover {
border-color: var(--accent-border);
}
&:focus-visible {
outline: 2px solid var(--accent);
outline-offset: 2px;
}
}
.hero {
position: relative;
.base,
.framework,
.vite {
inset-inline: 0;
margin: 0 auto;
}
.base {
width: 170px;
position: relative;
z-index: 0;
}
.framework,
.vite {
position: absolute;
}
.framework {
z-index: 1;
top: 34px;
height: 28px;
transform: perspective(2000px) rotateZ(300deg) rotateX(44deg) rotateY(39deg)
scale(1.4);
}
.vite {
z-index: 0;
top: 107px;
height: 26px;
width: auto;
transform: perspective(2000px) rotateZ(300deg) rotateX(40deg) rotateY(39deg)
scale(0.8);
}
}
#center {
display: flex;
flex-direction: column;
gap: 25px;
place-content: center;
place-items: center;
flex-grow: 1;
@media (max-width: 1024px) {
padding: 32px 20px 24px;
gap: 18px;
}
}
#next-steps {
display: flex;
border-top: 1px solid var(--border);
text-align: left;
& > div {
flex: 1 1 0;
padding: 32px;
@media (max-width: 1024px) {
padding: 24px 20px;
}
}
.icon {
margin-bottom: 16px;
width: 22px;
height: 22px;
}
@media (max-width: 1024px) {
flex-direction: column;
text-align: center;
}
}
#docs {
border-right: 1px solid var(--border);
@media (max-width: 1024px) {
border-right: none;
border-bottom: 1px solid var(--border);
}
}
#next-steps ul {
list-style: none;
padding: 0;
display: flex;
gap: 8px;
margin: 32px 0 0;
.logo {
height: 18px;
}
a {
color: var(--text-h);
font-size: 16px;
border-radius: 6px;
background: var(--social-bg);
display: flex;
padding: 6px 12px;
align-items: center;
gap: 8px;
text-decoration: none;
transition: box-shadow 0.3s;
&:hover {
box-shadow: var(--shadow);
}
.button-icon {
height: 18px;
width: 18px;
}
}
@media (max-width: 1024px) {
margin-top: 20px;
flex-wrap: wrap;
justify-content: center;
li {
flex: 1 1 calc(50% - 8px);
}
a {
width: 100%;
justify-content: center;
box-sizing: border-box;
}
}
}
#spacer {
height: 88px;
border-top: 1px solid var(--border);
@media (max-width: 1024px) {
height: 48px;
}
}
.ticks {
position: relative;
width: 100%;
&::before,
&::after {
content: '';
position: absolute;
top: -4.5px;
border: 5px solid transparent;
}
&::before {
left: 0;
border-left-color: var(--border);
}
&::after {
right: 0;
border-right-color: var(--border);
}
}

121
frontend/src/App.tsx Normal file
View file

@ -0,0 +1,121 @@
import { useEffect } from 'react'
import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom'
import ErrorBoundary from './components/ErrorBoundary'
import { useMsal } from '@azure/msal-react'
import { InteractionStatus } from '@azure/msal-browser'
import { Toaster } from 'react-hot-toast'
import { useAuthStore } from './stores/useAuthStore'
import { registerTokenRefresher } from './api/client'
import AppShell from './components/layout/AppShell'
import DashboardPage from './pages/DashboardPage'
import SheetPage from './pages/SheetPage'
import BriefUploadPage from './pages/BriefUploadPage'
import BriefReviewPage from './pages/BriefReviewPage'
import AdminUsersPage from './pages/admin/AdminUsersPage'
import AdminDropdownsPage from './pages/admin/AdminDropdownsPage'
import AdminClientsPage from './pages/admin/AdminClientsPage'
import LoginPage from './pages/LoginPage'
import HelpPage from './pages/HelpPage'
function AuthGate({ children }: { children: React.ReactNode }) {
const { instance, inProgress, accounts } = useMsal()
const { user, loading, fetchMe, setToken } = useAuthStore()
useEffect(() => {
if (import.meta.env.DEV) {
// Register no-op refresher in dev mode
registerTokenRefresher(async () => sessionStorage.getItem('ac_access_token') ?? '')
if (!user) fetchMe()
return
}
// Register MSAL token refresher so axios interceptor can silently refresh
if (accounts.length > 0) {
registerTokenRefresher(async () => {
const result = await instance.acquireTokenSilent({
account: accounts[0],
scopes: ['openid', 'profile', 'email'],
forceRefresh: false,
})
return result.idToken
})
}
if (inProgress !== InteractionStatus.None) return
// Only try silent token acquisition if there's already an active account.
// Do NOT auto-redirect — let LoginPage handle the explicit sign-in action.
if (accounts.length === 0) {
// Stop the loading spinner so LoginPage is shown
useAuthStore.getState().setLoading(false)
return
}
const acquire = async () => {
try {
const result = await instance.acquireTokenSilent({
account: accounts[0],
scopes: ['openid', 'profile', 'email'],
})
setToken(result.idToken)
if (!user) fetchMe()
} catch {
// Silent refresh failed — user will see LoginPage and can sign in manually
}
}
acquire()
}, [inProgress, accounts.length])
// Show spinner only while MSAL is processing a redirect or we're fetching user
const msalBusy = inProgress !== InteractionStatus.None
if (msalBusy || (loading && accounts.length > 0)) {
return (
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100vh', background: '#000' }}>
<div style={{ color: 'var(--text-muted)' }}>Loading</div>
</div>
)
}
if (!user) {
return <LoginPage />
}
return <>{children}</>
}
function AdminRoute({ children }: { children: React.ReactNode }) {
const { user } = useAuthStore()
if (user?.role !== 'admin') return <Navigate to="/" replace />
return <>{children}</>
}
export default function App() {
return (
<ErrorBoundary>
<BrowserRouter basename="/ac-helper/">
<Toaster
position="bottom-right"
toastOptions={{
style: { background: '#1a1a1a', color: '#fff', border: '1px solid #2a2a2a' },
}}
/>
<AuthGate>
<AppShell>
<Routes>
<Route path="/" element={<DashboardPage />} />
<Route path="/sheet/:sheetId" element={<SheetPage />} />
<Route path="/brief/upload" element={<BriefUploadPage />} />
<Route path="/brief/review/:jobId" element={<BriefReviewPage />} />
<Route path="/admin/users" element={<AdminRoute><AdminUsersPage /></AdminRoute>} />
<Route path="/admin/dropdowns" element={<AdminRoute><AdminDropdownsPage /></AdminRoute>} />
<Route path="/admin/clients" element={<AdminRoute><AdminClientsPage /></AdminRoute>} />
<Route path="/help" element={<HelpPage />} />
<Route path="*" element={<Navigate to="/" replace />} />
</Routes>
</AppShell>
</AuthGate>
</BrowserRouter>
</ErrorBoundary>
)
}

122
frontend/src/api/admin.ts Normal file
View file

@ -0,0 +1,122 @@
import api from './client'
import type { User, CategoryData } from '../types'
export interface ColumnMapping {
name_col: number
status_col: number
media_col: number
}
export interface DetectMappingResult {
headers: string[]
mapping: ColumnMapping
sample: CategoryData[]
}
export const listUsers = () =>
api.get<{ users: User[] }>('/admin/users').then(r => r.data.users)
export const updateUser = (id: string, patch: { role?: User['role']; active?: boolean }) =>
api.patch<{ success: boolean; user: User }>(`/admin/users/${id}`, patch).then(r => r.data.user)
export const detectDropdownMapping = (file: File) => {
const form = new FormData()
form.append('file', file)
return api.post<DetectMappingResult>('/admin/dropdowns/detect-mapping', form).then(r => r.data)
}
export const uploadDropdowns = (file: File, mapping?: ColumnMapping) => {
const form = new FormData()
form.append('file', file)
if (mapping) {
form.append('name_col', String(mapping.name_col))
form.append('status_col', String(mapping.status_col))
form.append('media_col', String(mapping.media_col))
}
return api.post<{ success: boolean; total: number; active: number }>('/admin/dropdowns/upload', form).then(r => r.data)
}
export const previewDropdowns = (file: File, mapping?: ColumnMapping) => {
const form = new FormData()
form.append('file', file)
if (mapping) {
form.append('name_col', String(mapping.name_col))
form.append('status_col', String(mapping.status_col))
form.append('media_col', String(mapping.media_col))
}
return api.post<{ categories: CategoryData[] }>('/admin/dropdowns/preview', form).then(r => r.data.categories)
}
export const detectClientDropdownMapping = (clientId: string, file: File) => {
const form = new FormData()
form.append('file', file)
return api.post<DetectMappingResult>(`/admin/clients/${clientId}/dropdowns/detect-mapping`, form).then(r => r.data)
}
export const uploadClientDropdowns = (clientId: string, file: File, mapping?: ColumnMapping) => {
const form = new FormData()
form.append('file', file)
if (mapping) {
form.append('name_col', String(mapping.name_col))
form.append('status_col', String(mapping.status_col))
form.append('media_col', String(mapping.media_col))
}
return api.post<{ success: boolean; total: number; active: number }>(`/admin/clients/${clientId}/dropdowns/upload`, form).then(r => r.data)
}
export const previewClientDropdowns = (clientId: string, file: File, mapping?: ColumnMapping) => {
const form = new FormData()
form.append('file', file)
if (mapping) {
form.append('name_col', String(mapping.name_col))
form.append('status_col', String(mapping.status_col))
form.append('media_col', String(mapping.media_col))
}
return api.post<{ categories: CategoryData[] }>(`/admin/clients/${clientId}/dropdowns/preview`, form).then(r => r.data.categories)
}
export const deleteClientDropdowns = (clientId: string) =>
api.delete(`/admin/clients/${clientId}/dropdowns`)
// ── Export templates ──────────────────────────────────────────────────────────
export interface ExportTemplateColumn {
header: string
field: string | null
}
export interface DetectTemplateResult {
headers: string[]
template: ExportTemplateColumn[]
fields: string[]
}
export const getGlobalExportTemplate = () =>
api.get<{ template: ExportTemplateColumn[]; fields: string[] }>('/admin/export-template').then(r => r.data)
export const detectGlobalExportTemplate = (file: File) => {
const form = new FormData()
form.append('file', file)
return api.post<DetectTemplateResult>('/admin/export-template/detect', form).then(r => r.data)
}
export const saveGlobalExportTemplate = (template: ExportTemplateColumn[]) =>
api.post('/admin/export-template', { template }).then(r => r.data)
export const deleteGlobalExportTemplate = () =>
api.delete('/admin/export-template')
export const getClientExportTemplate = (clientId: string) =>
api.get<{ template: ExportTemplateColumn[]; hasCustomTemplate: boolean; fields: string[] }>(`/admin/clients/${clientId}/export-template`).then(r => r.data)
export const detectClientExportTemplate = (clientId: string, file: File) => {
const form = new FormData()
form.append('file', file)
return api.post<DetectTemplateResult>(`/admin/clients/${clientId}/export-template/detect`, form).then(r => r.data)
}
export const saveClientExportTemplate = (clientId: string, template: ExportTemplateColumn[]) =>
api.post(`/admin/clients/${clientId}/export-template`, { template }).then(r => r.data)
export const deleteClientExportTemplate = (clientId: string) =>
api.delete(`/admin/clients/${clientId}/export-template`)

17
frontend/src/api/ai.ts Normal file
View file

@ -0,0 +1,17 @@
import api from './client'
import type { Deliverable } from '../types'
export interface CommandResult {
success: boolean
operation?: 'create' | 'update' | 'batch_update' | 'question'
count?: number
question?: string
data?: Deliverable[]
error?: string
}
export const sendCommand = (sheetId: string, command: string, yoloMode: boolean, history: string): Promise<CommandResult> =>
api.post<CommandResult>(`/sheets/${sheetId}/command`, { command, yolo_mode: yoloMode, history }).then(r => r.data)
export const updateSheetClient = (sheetId: string, clientId: string) =>
api.patch(`/sheets/${sheetId}/client`, { client_id: clientId })

View file

@ -0,0 +1,73 @@
import axios from 'axios'
const api = axios.create({
baseURL: `${import.meta.env.BASE_URL}api`,
})
// Module-level token refresher — registered by AuthGate on mount
type TokenRefresher = () => Promise<string>
let _refreshToken: TokenRefresher | null = null
export function registerTokenRefresher(fn: TokenRefresher) {
_refreshToken = fn
}
function isTokenExpired(token: string): boolean {
try {
const parts = token.split('.')
if (parts.length !== 3) return false // not a JWT (e.g. emergency token) — never expires
const payload = JSON.parse(atob(parts[1]))
if (!payload.exp) return false
// Refresh 60s before actual expiry
return payload.exp * 1000 < Date.now() + 60_000
} catch {
return false
}
}
// Proactively refresh before request if token is expired/about to expire
api.interceptors.request.use(async (config) => {
const token = sessionStorage.getItem('ac_access_token')
if (token && isTokenExpired(token) && _refreshToken) {
try {
const fresh = await _refreshToken()
sessionStorage.setItem('ac_access_token', fresh)
config.headers.Authorization = `Bearer ${fresh}`
return config
} catch {
// Refresh failed — let request go with expired token so 401 surfaces to user
}
}
if (token) {
config.headers.Authorization = `Bearer ${token}`
}
return config
})
// On 401 — try once more with a fresh token (JWT only), then reload
api.interceptors.response.use(
res => res,
async (error) => {
const originalRequest = error.config
const currentToken = sessionStorage.getItem('ac_access_token') ?? ''
const isJwt = currentToken.split('.').length === 3
if (error.response?.status === 401 && !originalRequest._retry && _refreshToken && isJwt) {
originalRequest._retry = true
try {
const fresh = await _refreshToken()
sessionStorage.setItem('ac_access_token', fresh)
originalRequest.headers.Authorization = `Bearer ${fresh}`
return api(originalRequest)
} catch {
// Token refresh failed — force re-login
sessionStorage.removeItem('ac_access_token')
window.location.reload()
}
}
return Promise.reject(error)
}
)
export default api

View file

@ -0,0 +1,14 @@
import api from './client'
import type { Client } from '../types'
export const listClients = () =>
api.get<{ clients: Client[] }>('/clients').then(r => r.data.clients)
export const createClient = (name: string) =>
api.post<{ client: Client }>('/clients', { name }).then(r => r.data.client)
export const deleteClient = (id: string) =>
api.delete(`/clients/${id}`)
export const updateClient = (id: string, patch: { name: string }) =>
api.patch<{ client: Client }>(`/clients/${id}`, patch).then(r => r.data.client)

View file

@ -0,0 +1,8 @@
import api from './client'
import type { CategoryData } from '../types'
export const getCategories = (activeOnly = true, clientId?: string | null) => {
const params = new URLSearchParams({ active: String(activeOnly) })
if (clientId) params.append('client_id', clientId)
return api.get<{ categories: CategoryData[] }>(`/dropdowns/categories?${params}`).then(r => r.data.categories)
}

View file

@ -0,0 +1,17 @@
import api from './client'
import type { ExportTemplateColumn, DetectTemplateResult } from './admin'
export const getUserExportTemplate = () =>
api.get<{ template: ExportTemplateColumn[]; hasCustom: boolean; fields: string[] }>('/export/template').then(r => r.data)
export const detectUserExportTemplate = (file: File) => {
const form = new FormData()
form.append('file', file)
return api.post<DetectTemplateResult>('/export/template/detect', form).then(r => r.data)
}
export const saveUserExportTemplate = (template: ExportTemplateColumn[]) =>
api.post('/export/template', { template }).then(r => r.data)
export const deleteUserExportTemplate = () =>
api.delete('/export/template')

26
frontend/src/api/jobs.ts Normal file
View file

@ -0,0 +1,26 @@
import api from './client'
import type { Job, ModelConfiguration, Deliverable } from '../types'
export const listJobs = (limit = 50) =>
api.get<{ jobs: Job[] }>(`/jobs?limit=${limit}`).then(r => r.data.jobs)
export const getJob = (id: string) =>
api.get<{ job: Job }>(`/jobs/${id}`).then(r => r.data.job)
export const createJob = (files: File[], modelConfig?: ModelConfiguration, clientId?: string) => {
const form = new FormData()
files.forEach((f, i) => form.append(`file_${i}`, f))
if (modelConfig) form.append('modelConfig', JSON.stringify(modelConfig))
if (clientId) form.append('clientId', clientId)
// Do NOT set Content-Type manually — axios sets it automatically with the
// correct multipart boundary when given a FormData instance.
return api.post<{ jobs: Job[] }>('/jobs', form).then(r => r.data.jobs)
}
export const deleteJob = (id: string) => api.delete(`/jobs/${id}`)
export const getJobDeliverables = (id: string) =>
api.get<{ deliverables: Deliverable[]; count: number }>(`/jobs/${id}/deliverables`).then(r => r.data)
export const getJobStats = () =>
api.get('/jobs/stats').then(r => r.data.stats)

View file

@ -0,0 +1,30 @@
import api from './client'
import type { SheetMeta, Deliverable } from '../types'
export const listSheets = () => api.get<{ sheets: SheetMeta[] }>('/sheets').then(r => r.data.sheets)
export const createSheet = (name: string, data: Deliverable[] = []) =>
api.post<{ sheet: SheetMeta }>('/sheets', { name, data }).then(r => r.data.sheet)
export const loadSheet = (id: string) =>
api.get<{ data: Deliverable[] }>(`/sheets/${id}`).then(r => r.data.data)
export const updateSheet = (id: string, data: Deliverable[]) =>
api.put(`/sheets/${id}`, { data })
export const deleteSheet = (id: string) => api.delete(`/sheets/${id}`)
export const renameSheet = (id: string, name: string) =>
api.patch(`/sheets/${id}`, { name })
export const duplicateSheet = (id: string) =>
api.post<{ sheet: SheetMeta }>(`/sheets/${id}/duplicate`).then(r => r.data.sheet)
export const importDeliverables = (sheetId: string, deliverables: Deliverable[], mode: 'append' | 'replace' = 'append') =>
api.post(`/sheets/${sheetId}/import`, { deliverables, mode }).then(r => r.data)
export const exportSheet = (id: string) => {
const token = sessionStorage.getItem('ac_access_token')
const query = token ? `?_token=${token}` : ''
window.open(`${import.meta.env.BASE_URL}api/sheets/${id}/export${query}`, '_blank')
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>

After

Width:  |  Height:  |  Size: 4 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 8.5 KiB

View file

@ -0,0 +1,36 @@
import { Component, type ReactNode } from 'react'
interface Props { children: ReactNode }
interface State { error: Error | null }
export default class ErrorBoundary extends Component<Props, State> {
state: State = { error: null }
static getDerivedStateFromError(error: Error): State {
return { error }
}
render() {
if (this.state.error) {
return (
<div style={{
display: 'flex', alignItems: 'center', justifyContent: 'center',
height: '100vh', background: '#000', flexDirection: 'column', gap: 16,
}}>
<div style={{ fontSize: 32 }}></div>
<div style={{ color: 'var(--text-primary)', fontWeight: 600 }}>Something went wrong</div>
<div style={{ color: 'var(--text-muted)', fontSize: 12, maxWidth: 400, textAlign: 'center' }}>
{this.state.error.message}
</div>
<button
onClick={() => window.location.reload()}
style={{ background: 'var(--accent)', color: '#000', border: 'none', borderRadius: 6, padding: '8px 20px', cursor: 'pointer', fontSize: 13 }}
>
Reload
</button>
</div>
)
}
return this.props.children
}
}

View file

@ -0,0 +1,102 @@
import { useState } from 'react'
import type { ColumnMapping, DetectMappingResult } from '../../api/admin'
interface Props {
detection: DetectMappingResult
onConfirm: (mapping: ColumnMapping) => void
onCancel: () => void
}
const FIELD_LABELS: Record<keyof ColumnMapping, string> = {
name_col: 'Category name',
status_col: 'Status (Active / Archived)',
media_col: 'Media types',
}
export default function ColumnMappingStep({ detection, onConfirm, onCancel }: Props) {
const [mapping, setMapping] = useState<ColumnMapping>({ ...detection.mapping })
const setCol = (field: keyof ColumnMapping, idx: number) =>
setMapping(m => ({ ...m, [field]: idx }))
return (
<div className="rounded-xl overflow-hidden" style={{ border: '1px solid var(--accent)', background: 'var(--bg-card)' }}>
<div className="px-4 py-3" style={{ borderBottom: '1px solid var(--border)', background: 'rgba(255,196,7,0.06)' }}>
<div className="text-sm font-semibold" style={{ color: 'var(--accent)' }}>Confirm column mapping</div>
<div className="text-xs mt-0.5" style={{ color: 'var(--text-muted)' }}>
Verify which columns contain the data fields, then click Confirm.
</div>
</div>
<div className="p-4 space-y-3">
{(Object.keys(FIELD_LABELS) as (keyof ColumnMapping)[]).map(field => (
<div key={field} className="flex items-center gap-3">
<span className="text-xs w-44 flex-shrink-0" style={{ color: 'var(--text-secondary)' }}>
{FIELD_LABELS[field]}
</span>
<select
value={mapping[field]}
onChange={e => setCol(field, Number(e.target.value))}
className="flex-1 px-2 py-1.5 rounded text-xs outline-none"
style={{
background: 'var(--bg-elevated, #1a1a1a)',
border: '1px solid var(--border)',
color: 'var(--text-primary)',
}}
>
{detection.headers.map((h, i) => (
<option key={i} value={i}>{`Col ${i + 1}${h ? `${h}` : ''}`}</option>
))}
</select>
</div>
))}
</div>
{/* Sample preview */}
{detection.sample.length > 0 && (
<div className="px-4 pb-3">
<div className="text-xs font-semibold uppercase tracking-wider mb-2" style={{ color: 'var(--text-muted)' }}>
Sample (first {detection.sample.length} rows)
</div>
<div className="rounded overflow-hidden" style={{ border: '1px solid var(--border)' }}>
<table className="w-full text-xs border-collapse">
<thead style={{ background: '#1a1a1a' }}>
<tr>
<th className="px-3 py-1.5 text-left" style={{ color: 'var(--accent)' }}>Category</th>
<th className="px-3 py-1.5 text-left" style={{ color: 'var(--accent)' }}>Status</th>
<th className="px-3 py-1.5 text-left" style={{ color: 'var(--accent)' }}>Media Types</th>
</tr>
</thead>
<tbody>
{detection.sample.map((row, i) => (
<tr key={i} style={{ borderTop: '1px solid var(--border)' }}>
<td className="px-3 py-1.5" style={{ color: 'var(--text-primary)' }}>{row.name}</td>
<td className="px-3 py-1.5" style={{ color: row.status === 'Active' ? 'var(--accent)' : 'var(--text-muted)' }}>{row.status}</td>
<td className="px-3 py-1.5" style={{ color: 'var(--text-muted)' }}>{row.mediaTypes.join(', ') || '—'}</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
)}
<div className="flex justify-end gap-2 px-4 pb-4">
<button
onClick={onCancel}
className="px-3 py-1.5 rounded text-xs"
style={{ border: '1px solid var(--border)', color: 'var(--text-muted)' }}
>
Cancel
</button>
<button
onClick={() => onConfirm(mapping)}
className="px-4 py-1.5 rounded text-xs font-medium"
style={{ background: 'var(--accent)', color: '#000' }}
>
Confirm mapping
</button>
</div>
</div>
)
}

View file

@ -0,0 +1,193 @@
import { useState, useCallback } from 'react'
import { useDropzone } from 'react-dropzone'
import type { ExportTemplateColumn, DetectTemplateResult } from '../../api/admin'
import toast from 'react-hot-toast'
interface Props {
/** Current saved template (null = using fallback) */
template: ExportTemplateColumn[] | null
hasCustom: boolean
internalFields: string[]
onDetect: (file: File) => Promise<DetectTemplateResult>
onSave: (template: ExportTemplateColumn[]) => Promise<unknown>
onDelete: () => Promise<unknown>
}
export default function ExportTemplateEditor({ template, hasCustom, internalFields, onDetect, onSave, onDelete }: Props) {
const [stage, setStage] = useState<'idle' | 'edit'>('idle')
const [editing, setEditing] = useState<ExportTemplateColumn[]>([])
const [busy, setBusy] = useState(false)
const openEdit = (cols: ExportTemplateColumn[]) => {
setEditing(cols.map(c => ({ ...c })))
setStage('edit')
}
const onDrop = useCallback(async (files: File[]) => {
if (!files[0]) return
setBusy(true)
try {
const result = await onDetect(files[0])
openEdit(result.template)
} catch {
toast.error('Failed to read CSV file')
} finally {
setBusy(false)
}
}, [onDetect])
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop,
accept: { 'text/csv': ['.csv'] },
maxFiles: 1,
disabled: busy || stage === 'edit',
})
const handleSave = async () => {
setBusy(true)
try {
await onSave(editing)
setStage('idle')
toast.success(`Export template saved — ${editing.length} columns`)
} catch {
toast.error('Failed to save template')
} finally {
setBusy(false)
}
}
const handleDelete = async () => {
if (!confirm('Remove custom export template? The default format will be used.')) return
setBusy(true)
try {
await onDelete()
setStage('idle')
toast.success('Export template removed')
} catch {
toast.error('Failed to remove template')
} finally {
setBusy(false)
}
}
const addRow = () => setEditing(e => [...e, { header: '', field: null }])
const removeRow = (i: number) => setEditing(e => e.filter((_, idx) => idx !== i))
const updateHeader = (i: number, val: string) =>
setEditing(e => e.map((c, idx) => idx === i ? { ...c, header: val } : c))
const updateField = (i: number, val: string) =>
setEditing(e => e.map((c, idx) => idx === i ? { ...c, field: val || null } : c))
return (
<div className="space-y-3">
{/* Status bar */}
<div className="flex items-center justify-between">
<div className="text-xs" style={{ color: 'var(--text-muted)' }}>
{hasCustom
? <span style={{ color: 'var(--accent)' }}>Custom template active {template?.length} columns</span>
: 'Using default format (AC columns)'}
</div>
<div className="flex gap-2">
{hasCustom && stage === 'idle' && (
<>
<button
onClick={() => template && openEdit(template)}
className="text-xs px-2 py-1 rounded hover:opacity-70"
style={{ border: '1px solid var(--border)', color: 'var(--text-secondary)' }}
>
Edit
</button>
<button
onClick={handleDelete}
disabled={busy}
className="text-xs px-2 py-1 rounded hover:opacity-70 disabled:opacity-40"
style={{ border: '1px solid rgba(255,50,50,0.3)', color: 'var(--danger)' }}
>
Remove
</button>
</>
)}
</div>
</div>
{/* Drop zone */}
{stage === 'idle' && (
<div
{...getRootProps()}
className="rounded-lg p-3 text-center cursor-pointer transition-colors"
style={{
border: `1px dashed ${isDragActive ? 'var(--accent)' : 'var(--border)'}`,
background: isDragActive ? 'rgba(255,196,7,0.05)' : 'transparent',
opacity: busy ? 0.6 : 1,
}}
>
<input {...getInputProps()} />
<p className="text-xs" style={{ color: 'var(--text-muted)' }}>
{busy ? 'Reading…' : isDragActive
? 'Drop CSV here'
: 'Drop a sample .csv file to set export format (headers only needed)'}
</p>
</div>
)}
{/* Column mapping editor */}
{stage === 'edit' && (
<div className="rounded-xl overflow-hidden" style={{ border: '1px solid var(--accent)' }}>
<div className="px-3 py-2.5 flex items-center justify-between" style={{ background: 'rgba(255,196,7,0.06)', borderBottom: '1px solid var(--border)' }}>
<div>
<span className="text-xs font-semibold" style={{ color: 'var(--accent)' }}>Map columns internal fields</span>
<span className="text-xs ml-2" style={{ color: 'var(--text-muted)' }}>Left = your CSV header, Right = data field to fill</span>
</div>
<button onClick={addRow} className="text-xs px-2 py-1 rounded" style={{ border: '1px solid var(--border)', color: 'var(--text-muted)' }}>
+ Add column
</button>
</div>
<div className="p-3 space-y-1.5 max-h-72 overflow-y-auto">
{editing.map((col, i) => (
<div key={i} className="flex items-center gap-2">
<input
value={col.header}
onChange={e => updateHeader(i, e.target.value)}
placeholder="Column name in CSV…"
className="flex-1 px-2 py-1.5 rounded text-xs outline-none"
style={{ background: 'var(--bg-elevated, #111)', border: '1px solid var(--border)', color: 'var(--text-primary)' }}
/>
<span className="text-xs" style={{ color: 'var(--text-muted)' }}></span>
<select
value={col.field ?? ''}
onChange={e => updateField(i, e.target.value)}
className="flex-1 px-2 py-1.5 rounded text-xs outline-none"
style={{ background: 'var(--bg-elevated, #111)', border: '1px solid var(--border)', color: 'var(--text-primary)' }}
>
<option value=""> empty </option>
{internalFields.map(f => (
<option key={f} value={f}>{f}</option>
))}
</select>
<button onClick={() => removeRow(i)} className="text-xs px-1.5 py-1 rounded hover:opacity-70" style={{ color: 'var(--danger)', border: '1px solid rgba(255,50,50,0.2)' }}></button>
</div>
))}
</div>
<div className="flex justify-end gap-2 px-3 pb-3">
<button
onClick={() => setStage('idle')}
className="px-3 py-1.5 rounded text-xs"
style={{ border: '1px solid var(--border)', color: 'var(--text-muted)' }}
>
Cancel
</button>
<button
onClick={handleSave}
disabled={busy || editing.length === 0}
className="px-4 py-1.5 rounded text-xs font-medium disabled:opacity-40"
style={{ background: 'var(--accent)', color: '#000' }}
>
{busy ? 'Saving…' : 'Save template'}
</button>
</div>
</div>
)}
</div>
)
}

View file

@ -0,0 +1,52 @@
import { useCallback } from 'react'
import { useDropzone } from 'react-dropzone'
interface Props {
onFiles: (files: File[]) => void
loading: boolean
}
const ACCEPTED = {
'application/pdf': ['.pdf'],
'application/vnd.openxmlformats-officedocument.presentationml.presentation': ['.pptx'],
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'],
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
}
export default function FileDropzone({ onFiles, loading }: Props) {
const onDrop = useCallback((accepted: File[]) => {
if (accepted.length) onFiles(accepted)
}, [onFiles])
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop, accept: ACCEPTED, disabled: loading, maxSize: 200 * 1024 * 1024,
})
return (
<div
{...getRootProps()}
className="rounded-xl p-12 text-center cursor-pointer transition-colors"
style={{
border: `2px dashed ${isDragActive ? 'var(--accent)' : 'var(--border)'}`,
background: isDragActive ? 'rgba(255,196,7,0.05)' : 'var(--bg-card)',
opacity: loading ? 0.5 : 1,
}}
>
<input {...getInputProps()} />
<div className="text-4xl mb-3">📄</div>
<p className="font-medium mb-1" style={{ color: 'var(--text-primary)' }}>
{isDragActive ? 'Drop brief here' : 'Drag & drop your brief'}
</p>
<p className="text-sm" style={{ color: 'var(--text-muted)' }}>
PDF, PPTX, DOCX, XLSX up to 200 MB
</p>
<button
className="mt-4 px-4 py-2 rounded text-sm font-medium"
style={{ background: 'var(--accent)', color: '#000' }}
disabled={loading}
>
Browse files
</button>
</div>
)
}

View file

@ -0,0 +1,103 @@
import type { Job } from '../../types'
import { useNavigate } from 'react-router-dom'
const PHASE_LABELS: Record<string, string> = {
QUEUED: 'Queued',
EXTRACT_CONTENT: 'Extracting Content',
LLM_ANALYSIS: 'AI Analysis',
CONSOLIDATION: 'Consolidating',
CSV_GENERATION: 'Generating CSV',
COMPLETED: 'Completed',
FAILED: 'Failed',
}
interface Props { job: Job; onDelete?: (id: string) => void }
export default function JobProgressCard({ job, onDelete }: Props) {
const navigate = useNavigate()
const isDone = job.phase === 'COMPLETED'
const isFailed = job.phase === 'FAILED'
return (
<div className="rounded-lg p-4" style={{ background: 'var(--bg-card)', border: '1px solid var(--border)' }}>
<div className="flex items-start justify-between mb-2">
<div>
<div className="font-medium text-sm truncate max-w-xs" style={{ color: 'var(--text-primary)' }}>
{job.fileName}
</div>
<div className="text-xs mt-0.5" style={{ color: 'var(--text-muted)' }}>
{job.fileSize ? (job.fileSize / 1024 / 1024).toFixed(1) + ' MB' : '—'}
</div>
</div>
<div className="flex items-center gap-2">
<span
className="text-xs px-2 py-1 rounded font-medium"
style={{
background: isDone ? 'rgba(34,197,94,0.15)' : isFailed ? 'rgba(239,68,68,0.15)' : 'rgba(255,196,7,0.15)',
color: isDone ? 'var(--success)' : isFailed ? 'var(--danger)' : 'var(--accent)',
}}
>
{PHASE_LABELS[job.phase] || job.phase}
</span>
{onDelete && (
<button onClick={() => onDelete(job.id)} className="text-xs" style={{ color: 'var(--text-muted)' }}></button>
)}
</div>
</div>
{/* Progress bar */}
{!isFailed && (
<div className="h-1 rounded-full mb-2" style={{ background: 'var(--border)' }}>
<div
className="h-full rounded-full transition-all duration-500"
style={{ width: `${job.progressPct ?? 0}%`, background: isDone ? 'var(--success)' : 'var(--accent)' }}
/>
</div>
)}
{job.stepLabel && (
<div className="text-xs mb-2" style={{ color: 'var(--text-muted)' }}>{job.stepLabel}</div>
)}
{/* Provider updates */}
{Object.entries(job.providerUpdates || {}).length > 0 && (
<div className="flex gap-2 flex-wrap mb-2">
{Object.entries(job.providerUpdates).map(([key, pu]) => (
<span key={key} className="text-xs px-2 py-0.5 rounded" style={{
background: pu.status === 'success' ? 'rgba(34,197,94,0.1)' : pu.status === 'error' ? 'rgba(239,68,68,0.1)' : 'rgba(255,255,255,0.05)',
color: pu.status === 'success' ? 'var(--success)' : pu.status === 'error' ? 'var(--danger)' : 'var(--text-muted)',
border: '1px solid var(--border)',
}}>
{pu.provider} {pu.status === 'success' ? '✓' : pu.status === 'error' ? '✗' : '…'}
</span>
))}
</div>
)}
{/* Action buttons */}
{isDone && (
<div className="flex gap-2 mt-2">
<button
onClick={() => navigate(`/brief/review/${job.id}`)}
className="px-3 py-1.5 rounded text-xs font-medium"
style={{ background: 'var(--accent)', color: '#000' }}
>
Review Results
</button>
</div>
)}
{isFailed && job.error && (
<div className="text-xs mt-1 p-2 rounded" style={{ background: 'rgba(239,68,68,0.1)', color: 'var(--danger)' }}>
{job.error}
</div>
)}
{isDone && job.summary && (
<div className="text-xs mt-2" style={{ color: 'var(--text-muted)' }}>
{job.summary.assetsExtracted} assets · ${job.summary.costUsdTotal?.toFixed(4)} · {job.summary.processingTimeSeconds?.toFixed(1)}s
</div>
)}
</div>
)
}

View file

@ -0,0 +1,29 @@
import React, { useEffect } from 'react'
import Sidebar from './Sidebar'
import TopBar from './TopBar'
import { useSheetStore } from '../../stores/useSheetStore'
import { useWebSocket } from '../../hooks/useWebSocket'
interface Props {
children: React.ReactNode
}
export default function AppShell({ children }: Props) {
const fetchSheets = useSheetStore(s => s.fetchSheets)
useEffect(() => { fetchSheets() }, [])
// WebSocket mounted here so job updates persist across all pages
useWebSocket()
return (
<div className="flex h-screen overflow-hidden" style={{ background: 'var(--bg-color)' }}>
<Sidebar />
<div className="flex-1 flex flex-col overflow-hidden">
<TopBar />
<main className="flex-1 overflow-auto p-4">
{children}
</main>
</div>
</div>
)
}

Some files were not shown because too many files have changed in this diff Show more