hm_ai_qc_report_tool/app.py
nickviljoen 6a2945275a Reporting: filesystem-back the search-result cache
The previous in-memory dict only worked with a single gunicorn worker.
With workers=2 in gunicorn_config.py, the async-search worker stored
the result in its own process memory while the dashboard request
landed on the other worker ~50% of the time — cache miss → fell
through to a synchronous Box fetch → exceeded the GCP load
balancer's 30s timeout, returning "stream timeout" to the user even
though the search itself succeeded.

Now stores cache entries as pickled files at storage/cache/<key>.pkl,
shared across workers via the existing volume mount. Atomic writes
via tempfile + os.replace. TTL still 30 minutes. Public API
(cache_set/get/delete/cleanup) is unchanged so call sites in
reporting/routes.py continue to work.
2026-05-09 17:46:42 +02:00

271 lines
9 KiB
Python

"""
Flask Application Factory for Unified HM QC Platform.
This application merges multiple QC tools into a single platform with:
- HM QC (PDF/image quality control)
- Video QC (video quality control)
- Video Master Adot Detection (video matching)
- Reporting (consolidated reports from Box.com and QC modules)
"""
import logging
import os
from flask import Flask, render_template, request, redirect, url_for, g, jsonify
# Import configuration
import config as app_config
# Import core modules
from core.auth.middleware import AuthMiddleware
from core.models.database import init_db, db
from core.services.box_client import BoxReportClient
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def _import_legacy_global_pricing(app):
"""Import storage/reference/global_pricing.json as a PricingReference row
if it exists. Runs once — only when the pricing_references table is empty.
"""
import json
from datetime import datetime
from core.models.database import db
from core.models.pricing_reference import PricingReference
legacy_json = os.path.join('storage', 'reference', 'global_pricing.json')
legacy_pdf = os.path.join('storage', 'reference', 'global_pricing.pdf')
if not os.path.exists(legacy_json):
return
try:
with open(legacy_json, 'r', encoding='utf-8') as f:
data = json.load(f)
except (json.JSONDecodeError, OSError):
return
# Create a storage dir and copy the PDF alongside (if present) so the
# legacy row has a coherent file location.
import shutil
ref = PricingReference(
name='Default (legacy global)',
pdf_filename=os.path.basename(legacy_pdf) if os.path.exists(legacy_pdf) else 'global_pricing.json',
pdf_path='',
parsed_data_json=json.dumps(data, ensure_ascii=False),
parsed_at=datetime.utcnow(),
status='ready',
created_by='system (legacy import)'
)
db.session.add(ref)
db.session.flush()
target_dir = os.path.join('storage', 'pricing_references', str(ref.id))
os.makedirs(target_dir, exist_ok=True)
if os.path.exists(legacy_pdf):
target_pdf = os.path.join(target_dir, os.path.basename(legacy_pdf))
shutil.copy2(legacy_pdf, target_pdf)
ref.pdf_path = target_pdf
else:
# Persist the JSON itself so the row has a valid file_path
target_json = os.path.join(target_dir, 'global_pricing.json')
shutil.copy2(legacy_json, target_json)
ref.pdf_path = target_json
db.session.commit()
logger.info(f"Imported legacy global pricing as PricingReference id={ref.id}")
def create_app(config_class=app_config.Config):
"""
Application factory function.
Args:
config_class: Configuration class to use
Returns:
Configured Flask application instance
"""
# Initialize Flask app
app = Flask(__name__)
app.config.from_object(config_class)
# Create necessary directories
os.makedirs('database', exist_ok=True)
os.makedirs('uploads/hm_qc', exist_ok=True)
os.makedirs('uploads/video_qc', exist_ok=True)
os.makedirs('uploads/video_master', exist_ok=True)
os.makedirs('storage/reports/hm_qc', exist_ok=True)
os.makedirs('storage/reports/consolidated', exist_ok=True)
os.makedirs('storage/campaigns', exist_ok=True)
os.makedirs('storage/reference', exist_ok=True)
os.makedirs('storage/pricing_references', exist_ok=True)
os.makedirs('storage/cache', exist_ok=True)
# Initialize database
init_db(app)
logger.info("Database initialized")
# One-time migration: if a legacy global pricing JSON exists and no
# PricingReference rows are present, import it as a default row so
# existing installs keep working after the move to per-run references.
try:
with app.app_context():
from core.models.pricing_reference import PricingReference
if PricingReference.query.count() == 0:
_import_legacy_global_pricing(app)
except Exception as e:
logger.warning(f"Legacy pricing import skipped: {e}")
# Initialize Azure AD JWT authentication middleware
auth = AuthMiddleware(app)
logger.info("Authentication initialized (Azure AD)")
app.auth = auth
# Make Azure config available to all templates (login.html + base.html)
@app.context_processor
def inject_azure_config():
return {
'azure_tenant_id': auth.tenant_id,
'azure_client_id': auth.client_id,
}
# Initialize Box client (lazy loading)
app._box_client = None
def get_box_client():
"""Get or initialize Box client."""
if app._box_client is None:
try:
app._box_client = BoxReportClient(
config_path=app.config['BOX_CONFIG_PATH'],
report_folder_id=app.config['BOX_REPORT_FOLDER_ID']
)
logger.info("Box client initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize Box client: {e}")
raise
return app._box_client
# Store box client getter in app context
app.get_box_client = get_box_client
# Register blueprints
# Health blueprint (must be unauthenticated for deploy smoke-tests)
from core.health import health_bp
app.register_blueprint(health_bp)
logger.info("Health blueprint registered at /health")
# Auth blueprint (for login/logout endpoints)
from core.auth.routes import auth_bp
app.register_blueprint(auth_bp)
logger.info("Auth blueprint registered at /auth")
# Require Azure AD JWT cookie for all routes except auth, health, static
@app.before_request
def require_login():
allowed_prefixes = ('/auth/', '/static/', '/health')
if any(request.path.startswith(p) for p in allowed_prefixes):
return None
auth_result = auth.is_authenticated()
if not auth_result['authenticated']:
# JSON / fetch requests get 401; page loads get redirected to login
wants_json = (
request.is_json
or 'application/json' in request.headers.get('Accept', '')
or request.headers.get('X-Requested-With') == 'XMLHttpRequest'
)
if wants_json:
return jsonify({
'error': 'Authentication required',
'message': auth_result.get('error'),
'authenticated': False
}), 401
return redirect(url_for('auth.login_page'))
# Expose user to downstream views and templates
g.user = auth_result['user']
g.token_payload = auth_result['payload']
# Task #3: Reporting blueprint (COMPLETED)
from modules.reporting import reporting_bp
app.register_blueprint(reporting_bp)
logger.info("Reporting blueprint registered at /reporting")
# Task #4: HM QC blueprint (COMPLETED)
from modules.hm_qc import hm_qc_bp
app.register_blueprint(hm_qc_bp)
logger.info("HM QC blueprint registered at /hm-qc")
# Task #5: Video QC blueprint (BETA)
from modules.video_qc import video_qc_bp
app.register_blueprint(video_qc_bp)
logger.info("Video QC blueprint (BETA) registered at /video-qc")
# Task #6: Video Master blueprint (BETA)
from modules.video_master import video_master_bp
app.register_blueprint(video_master_bp)
logger.info("Video Master blueprint (BETA) registered at /video-master")
# Usage Dashboard
from modules.usage import usage_bp
app.register_blueprint(usage_bp)
logger.info("Usage dashboard registered at /usage")
# Campaign Management
from modules.campaigns import campaigns_bp
app.register_blueprint(campaigns_bp)
logger.info("Campaigns blueprint registered at /campaigns")
# Printer Check
from modules.printer_check import printer_check_bp
app.register_blueprint(printer_check_bp)
logger.info("Printer Check blueprint registered at /printer-check")
@app.route('/')
def root():
"""Render reporting index at root."""
return render_template('reporting/index.html', active_tab='reporting')
# Register error handlers
register_error_handlers(app)
logger.info("Application initialized successfully")
return app
def register_error_handlers(app):
"""
Register error handlers.
Args:
app: Flask application
"""
@app.errorhandler(404)
def not_found(error):
"""Handle 404 errors."""
return render_template('404.html'), 404
@app.errorhandler(500)
def internal_error(error):
"""Handle 500 errors."""
logger.error(f"Internal server error: {error}")
return render_template('500.html'), 500
# Create application instance
app = create_app()
if __name__ == '__main__':
# Run app
app.run(
host=app.config['HOST'],
port=app.config['PORT'],
debug=True
)