From e7720951585b9f74dfbcb3d0ec563cd91f97d253 Mon Sep 17 00:00:00 2001 From: nickviljoen Date: Sat, 9 May 2026 14:08:06 +0200 Subject: [PATCH] Phase 2: deploy machinery for Dev/Prod cutover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - deploy.sh dev|prod with --dry-run, auto-rollback if /health fails within 60s; checkpoint saved to .last_deploy_rollback before reset - deploy/rollback.sh last| with the same Docker compose dance - deploy/health-check.sh — curl wrapper for monitoring/oncall - deploy/apache-{dev,prod}.conf — Location blocks proxying /hm-aiqc/ to gunicorn on 127.0.0.1:5050 with X-Script-Name set so wsgi.py's ReverseProxied middleware emits prefixed URLs - deploy/.env.{dev,prod}.example — starter envs with Azure SSO config --- deploy.sh | 184 +++++++++++++++++++++++++++++++++++++++ deploy/.env.dev.example | 31 +++++++ deploy/.env.prod.example | 31 +++++++ deploy/apache-dev.conf | 28 ++++++ deploy/apache-prod.conf | 28 ++++++ deploy/health-check.sh | 10 +++ deploy/rollback.sh | 75 ++++++++++++++++ 7 files changed, 387 insertions(+) create mode 100755 deploy.sh create mode 100644 deploy/.env.dev.example create mode 100644 deploy/.env.prod.example create mode 100644 deploy/apache-dev.conf create mode 100644 deploy/apache-prod.conf create mode 100755 deploy/health-check.sh create mode 100755 deploy/rollback.sh diff --git a/deploy.sh b/deploy.sh new file mode 100755 index 0000000..f0f0504 --- /dev/null +++ b/deploy.sh @@ -0,0 +1,184 @@ +#!/bin/bash +# HM AI QC deploy script (Docker Compose). +# +# Usage: +# deploy.sh dev Deploy origin/develop HEAD to this server +# deploy.sh prod Deploy a specific tag to this server +# deploy.sh dev --dry-run Show what would change, make no changes +# deploy.sh prod --dry-run +# +# Runs on the target server (optical-dev / optical-prod), not your laptop. +# Saves a rollback checkpoint to .last_deploy_rollback before changing +# anything, and auto-rolls back if the post-deploy /health probe fails. +# +# Differences from the AI QC sibling script (intentional): +# * Docker Compose, not systemd. `docker compose up -d` replaces +# systemctl restart; `docker compose build` replaces pip install. +# * `flask db upgrade` runs as a one-shot container before bringing up +# the web service, so schema changes apply atomically with the deploy. +# * No "delete frontend / build frontend / copy to /var/www/html" steps +# from the IT spec — HM QC ships Flask templates, not an SPA bundle. + +set -euo pipefail + +APP_DIR=${APP_DIR:-/opt/hm-aiqc} +HEALTH_URL=${HEALTH_URL:-http://127.0.0.1:5050/health} +ROLLBACK_FILE="$APP_DIR/.last_deploy_rollback" + +MODE=${1:-} +shift || true + +DRY_RUN=false +TARGET_TAG="" + +case "$MODE" in + dev) + for arg in "$@"; do + [[ "$arg" == "--dry-run" ]] && DRY_RUN=true + done + ;; + prod) + TARGET_TAG=${1:-} + shift || true + for arg in "$@"; do + [[ "$arg" == "--dry-run" ]] && DRY_RUN=true + done + if [[ -z "$TARGET_TAG" ]]; then + echo "Usage: $0 prod [--dry-run]" + exit 1 + fi + ;; + ""|-h|--help) + cat < [--dry-run] Deploy a specific tag to this server + +Run on the target server. Requires permission to talk to docker. +EOF + exit 0 + ;; + *) + echo "Unknown mode: $MODE" + echo "Try: $(basename "$0") --help" + exit 1 + ;; +esac + +cd "$APP_DIR" +if [[ ! -d .git ]]; then + echo "ERROR: $APP_DIR is not a git repo" + exit 1 +fi + +if [[ ! -f .env ]]; then + echo "ERROR: $APP_DIR/.env not found. Copy from deploy/.env.${MODE}.example and fill in." + exit 1 +fi + +CURRENT_REV=$(git rev-parse HEAD) +CURRENT_SHORT=$(git rev-parse --short HEAD) + +echo "============================================" +echo " HM AI QC deploy ($MODE)" +echo "============================================" +echo "Server: $(hostname)" +echo "Current: $CURRENT_SHORT $(git log -1 --format='%s' HEAD)" +echo "" +echo "Fetching latest refs..." +git fetch --tags --prune --quiet + +if [[ "$MODE" == "dev" ]]; then + TARGET_REF="origin/develop" +else + if ! git rev-parse --verify --quiet "refs/tags/$TARGET_TAG^{commit}" > /dev/null; then + echo "ERROR: Tag '$TARGET_TAG' not found after fetch" + exit 1 + fi + TARGET_REF="refs/tags/$TARGET_TAG" +fi + +TARGET_REV=$(git rev-parse "$TARGET_REF") +TARGET_SHORT=$(git rev-parse --short "$TARGET_REF") + +if [[ "$CURRENT_REV" == "$TARGET_REV" ]]; then + echo "Already at $TARGET_SHORT — nothing to do." + exit 0 +fi + +echo "Target: $TARGET_SHORT $(git log -1 --format='%s' "$TARGET_REF")" +echo "" +echo "Commits to apply:" +git log --oneline "$CURRENT_REV..$TARGET_REV" | head -20 +CHANGE_COUNT=$(git log --oneline "$CURRENT_REV..$TARGET_REV" | wc -l | tr -d ' ') +if [[ $CHANGE_COUNT -gt 20 ]]; then + echo " ... and $((CHANGE_COUNT - 20)) more" +fi +echo "" + +if git diff --name-only "$CURRENT_REV" "$TARGET_REV" | grep -qE "(^|/)migrations/versions/"; then + echo "Note: Alembic migrations changed — flask db upgrade will run." + echo "" +fi + +if [[ "$DRY_RUN" == "true" ]]; then + echo "Dry run — no changes made." + exit 0 +fi + +read -r -p "Proceed with deploy? (y/N): " confirm +if [[ ! $confirm =~ ^[Yy]$ ]]; then + echo "Cancelled." + exit 0 +fi + +echo "$CURRENT_REV" > "$ROLLBACK_FILE" + +echo "Applying changes..." +git reset --hard "$TARGET_REV" + +echo "Building images..." +docker compose build + +echo "Starting services (entrypoint runs flask db upgrade first)..." +docker compose up -d + +# Poll $HEALTH_URL every 2s until it answers 2xx, or timeout. +# 60s window allows for migration time on first boot of a major release. +wait_for_health() { + local max_attempts=30 # 30 × 2s = 60s window + for ((i=1; i<=max_attempts; i++)); do + sleep 2 + if curl -sf -o /dev/null "$HEALTH_URL"; then + echo " healthy after ${i}x2s" + return 0 + fi + done + return 1 +} + +echo "Smoke testing $HEALTH_URL..." +if wait_for_health; then + NEW_SHORT=$(git rev-parse --short HEAD) + echo "" + echo "Deploy OK. Now at $NEW_SHORT." + echo "Rollback target saved: $CURRENT_SHORT (run deploy/rollback.sh last to revert)" + exit 0 +fi + +echo "" +echo "Smoke test failed after 60s — rolling back to $CURRENT_SHORT..." +git reset --hard "$CURRENT_REV" +docker compose build +docker compose up -d + +if wait_for_health; then + echo "Rolled back successfully. Service healthy at $CURRENT_SHORT." + echo "Investigate: docker compose logs --tail=200 web" + exit 1 +fi + +echo "ROLLBACK ALSO FAILED. Service is in a broken state." +echo "docker compose ps" +echo "docker compose logs --tail=200 web" +exit 2 diff --git a/deploy/.env.dev.example b/deploy/.env.dev.example new file mode 100644 index 0000000..3fbb0e2 --- /dev/null +++ b/deploy/.env.dev.example @@ -0,0 +1,31 @@ +# HM AI QC — Dev environment starter +# Copy to /opt/hm-aiqc/.env and fill in real secrets. + +# Azure AD authentication (shared with AI QC sibling project) +AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385 +AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef + +ENVIRONMENT=development + +# Box Configuration +BOX_CONFIG_PATH=config/box_config.json +BOX_REPORT_FOLDER_ID=133295752718 +BOX_CAMPAIGNS_FOLDER_ID=156182880490 + +# Flask +FLASK_APP=app:app +FLASK_ENV=production +# Generate with: python -c 'import secrets; print(secrets.token_urlsafe(48))' +SECRET_KEY=replace-me-with-a-long-random-string + +# Server +HOST=0.0.0.0 +PORT=5000 + +# Database — absolute path inside the container, mapped to ./database on host +DATABASE_URI=sqlite:////app/database/qc_platform.db + +# LLM Provider Keys (NO HARDCODED KEYS — set real values below) +OPENAI_API_KEY= +GOOGLE_API_KEY= +ANTHROPIC_API_KEY= diff --git a/deploy/.env.prod.example b/deploy/.env.prod.example new file mode 100644 index 0000000..bbc36f6 --- /dev/null +++ b/deploy/.env.prod.example @@ -0,0 +1,31 @@ +# HM AI QC — Prod environment starter +# Copy to /opt/hm-aiqc/.env and fill in real secrets. + +# Azure AD authentication (shared with AI QC sibling project) +AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385 +AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef + +ENVIRONMENT=production + +# Box Configuration +BOX_CONFIG_PATH=config/box_config.json +BOX_REPORT_FOLDER_ID=133295752718 +BOX_CAMPAIGNS_FOLDER_ID=156182880490 + +# Flask +FLASK_APP=app:app +FLASK_ENV=production +# Generate with: python -c 'import secrets; print(secrets.token_urlsafe(48))' +SECRET_KEY=replace-me-with-a-long-random-string + +# Server +HOST=0.0.0.0 +PORT=5000 + +# Database — absolute path inside the container, mapped to ./database on host +DATABASE_URI=sqlite:////app/database/qc_platform.db + +# LLM Provider Keys (NO HARDCODED KEYS — set real values below) +OPENAI_API_KEY= +GOOGLE_API_KEY= +ANTHROPIC_API_KEY= diff --git a/deploy/apache-dev.conf b/deploy/apache-dev.conf new file mode 100644 index 0000000..00b8e1f --- /dev/null +++ b/deploy/apache-dev.conf @@ -0,0 +1,28 @@ +# Apache Location block for HM AI QC on optical-dev. +# +# Insert inside the existing for +# ServerName optical-dev.oliver.solutions +# and reload: +# sudo systemctl reload apache2 +# +# Required modules (enable once if not already): +# sudo a2enmod proxy proxy_http headers +# +# The X-Script-Name header is read by wsgi.py's ReverseProxied middleware +# so url_for() generates URLs prefixed with /hm-aiqc. + +ProxyPass /hm-aiqc/ http://127.0.0.1:5050/ +ProxyPassReverse /hm-aiqc/ http://127.0.0.1:5050/ + +# Bare /hm-aiqc (no trailing slash) → redirect with slash so the prefix matches +RewriteEngine On +RewriteRule ^/hm-aiqc$ /hm-aiqc/ [R=301,L] + + + RequestHeader set X-Script-Name "/hm-aiqc" + RequestHeader set X-Forwarded-Proto "https" + ProxyPreserveHost On + + # Long-running endpoints (QC execution, Box searches, video analysis) + ProxyTimeout 600 + diff --git a/deploy/apache-prod.conf b/deploy/apache-prod.conf new file mode 100644 index 0000000..d6565c0 --- /dev/null +++ b/deploy/apache-prod.conf @@ -0,0 +1,28 @@ +# Apache Location block for HM AI QC on optical-prod. +# +# Insert inside the existing for +# ServerName optical-prod.oliver.solutions +# and reload: +# sudo systemctl reload apache2 +# +# Required modules (enable once if not already): +# sudo a2enmod proxy proxy_http headers +# +# The X-Script-Name header is read by wsgi.py's ReverseProxied middleware +# so url_for() generates URLs prefixed with /hm-aiqc. + +ProxyPass /hm-aiqc/ http://127.0.0.1:5050/ +ProxyPassReverse /hm-aiqc/ http://127.0.0.1:5050/ + +# Bare /hm-aiqc (no trailing slash) → redirect with slash so the prefix matches +RewriteEngine On +RewriteRule ^/hm-aiqc$ /hm-aiqc/ [R=301,L] + + + RequestHeader set X-Script-Name "/hm-aiqc" + RequestHeader set X-Forwarded-Proto "https" + ProxyPreserveHost On + + # Long-running endpoints (QC execution, Box searches, video analysis) + ProxyTimeout 600 + diff --git a/deploy/health-check.sh b/deploy/health-check.sh new file mode 100755 index 0000000..3b7f0e2 --- /dev/null +++ b/deploy/health-check.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# Quick "is the app alive?" check. Prints status and exits 0 (healthy) or 1 (not). +HEALTH_URL=${HEALTH_URL:-http://127.0.0.1:5050/health} + +if output=$(curl -sf "$HEALTH_URL" 2>&1); then + echo "OK $output" + exit 0 +fi +echo "DOWN no response from $HEALTH_URL" +exit 1 diff --git a/deploy/rollback.sh b/deploy/rollback.sh new file mode 100755 index 0000000..f88de90 --- /dev/null +++ b/deploy/rollback.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# Emergency rollback for HM AI QC. +# +# Usage: +# rollback.sh last Roll back to the checkpoint saved by deploy.sh +# rollback.sh Roll back to an explicit commit +# +# Note: Alembic downgrade is intentionally NOT run here — schema rollbacks +# are risky on data-bearing tables. If the bad deploy added a column the +# rolled-back code doesn't know about, that's almost always fine. If it +# dropped or renamed a column, restore from the daily DB backup. + +set -euo pipefail + +APP_DIR=${APP_DIR:-/opt/hm-aiqc} +HEALTH_URL=${HEALTH_URL:-http://127.0.0.1:5050/health} +ROLLBACK_FILE="$APP_DIR/.last_deploy_rollback" + +TARGET=${1:-} + +if [[ -z "$TARGET" || "$TARGET" == "last" ]]; then + if [[ ! -f "$ROLLBACK_FILE" ]]; then + echo "No .last_deploy_rollback file. Specify a commit hash explicitly." + echo "Usage: $(basename "$0") last | " + exit 1 + fi + TARGET=$(cat "$ROLLBACK_FILE") +fi + +cd "$APP_DIR" + +if ! git rev-parse --verify --quiet "$TARGET^{commit}" > /dev/null; then + echo "ERROR: Commit '$TARGET' not found" + exit 1 +fi + +CURRENT_REV=$(git rev-parse HEAD) +CURRENT_SHORT=$(git rev-parse --short HEAD) +TARGET_REV=$(git rev-parse "$TARGET") +TARGET_SHORT=$(git rev-parse --short "$TARGET") + +if [[ "$CURRENT_REV" == "$TARGET_REV" ]]; then + echo "Already at $TARGET_SHORT — nothing to do." + exit 0 +fi + +echo "============================================" +echo " HM AI QC rollback" +echo "============================================" +echo "Current: $CURRENT_SHORT $(git log -1 --format='%s' HEAD)" +echo "Target: $TARGET_SHORT $(git log -1 --format='%s' "$TARGET")" +echo "" + +read -r -p "Proceed? (y/N): " confirm +if [[ ! $confirm =~ ^[Yy]$ ]]; then + echo "Cancelled." + exit 0 +fi + +git reset --hard "$TARGET_REV" +docker compose build +docker compose up -d + +# 60s window — same as deploy.sh +for i in {1..30}; do + sleep 2 + if curl -sf -o /dev/null "$HEALTH_URL"; then + echo "Rollback OK. Now at $TARGET_SHORT." + exit 0 + fi +done + +echo "Service unhealthy after rollback." +echo "docker compose logs --tail=200 web" +exit 1