Phase 2: deploy machinery for Dev/Prod cutover
- deploy.sh dev|prod with --dry-run, auto-rollback if /health fails
within 60s; checkpoint saved to .last_deploy_rollback before reset
- deploy/rollback.sh last|<sha> with the same Docker compose dance
- deploy/health-check.sh — curl wrapper for monitoring/oncall
- deploy/apache-{dev,prod}.conf — Location blocks proxying /hm-aiqc/
to gunicorn on 127.0.0.1:5050 with X-Script-Name set so wsgi.py's
ReverseProxied middleware emits prefixed URLs
- deploy/.env.{dev,prod}.example — starter envs with Azure SSO config
This commit is contained in:
parent
84326352b2
commit
e772095158
7 changed files with 387 additions and 0 deletions
184
deploy.sh
Executable file
184
deploy.sh
Executable file
|
|
@ -0,0 +1,184 @@
|
|||
#!/bin/bash
|
||||
# HM AI QC deploy script (Docker Compose).
|
||||
#
|
||||
# Usage:
|
||||
# deploy.sh dev Deploy origin/develop HEAD to this server
|
||||
# deploy.sh prod <tag> Deploy a specific tag to this server
|
||||
# deploy.sh dev --dry-run Show what would change, make no changes
|
||||
# deploy.sh prod <tag> --dry-run
|
||||
#
|
||||
# Runs on the target server (optical-dev / optical-prod), not your laptop.
|
||||
# Saves a rollback checkpoint to .last_deploy_rollback before changing
|
||||
# anything, and auto-rolls back if the post-deploy /health probe fails.
|
||||
#
|
||||
# Differences from the AI QC sibling script (intentional):
|
||||
# * Docker Compose, not systemd. `docker compose up -d` replaces
|
||||
# systemctl restart; `docker compose build` replaces pip install.
|
||||
# * `flask db upgrade` runs as a one-shot container before bringing up
|
||||
# the web service, so schema changes apply atomically with the deploy.
|
||||
# * No "delete frontend / build frontend / copy to /var/www/html" steps
|
||||
# from the IT spec — HM QC ships Flask templates, not an SPA bundle.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
APP_DIR=${APP_DIR:-/opt/hm-aiqc}
|
||||
HEALTH_URL=${HEALTH_URL:-http://127.0.0.1:5050/health}
|
||||
ROLLBACK_FILE="$APP_DIR/.last_deploy_rollback"
|
||||
|
||||
MODE=${1:-}
|
||||
shift || true
|
||||
|
||||
DRY_RUN=false
|
||||
TARGET_TAG=""
|
||||
|
||||
case "$MODE" in
|
||||
dev)
|
||||
for arg in "$@"; do
|
||||
[[ "$arg" == "--dry-run" ]] && DRY_RUN=true
|
||||
done
|
||||
;;
|
||||
prod)
|
||||
TARGET_TAG=${1:-}
|
||||
shift || true
|
||||
for arg in "$@"; do
|
||||
[[ "$arg" == "--dry-run" ]] && DRY_RUN=true
|
||||
done
|
||||
if [[ -z "$TARGET_TAG" ]]; then
|
||||
echo "Usage: $0 prod <tag> [--dry-run]"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
""|-h|--help)
|
||||
cat <<EOF
|
||||
Usage:
|
||||
$(basename "$0") dev [--dry-run] Deploy latest develop to this server
|
||||
$(basename "$0") prod <tag> [--dry-run] Deploy a specific tag to this server
|
||||
|
||||
Run on the target server. Requires permission to talk to docker.
|
||||
EOF
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown mode: $MODE"
|
||||
echo "Try: $(basename "$0") --help"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
cd "$APP_DIR"
|
||||
if [[ ! -d .git ]]; then
|
||||
echo "ERROR: $APP_DIR is not a git repo"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f .env ]]; then
|
||||
echo "ERROR: $APP_DIR/.env not found. Copy from deploy/.env.${MODE}.example and fill in."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CURRENT_REV=$(git rev-parse HEAD)
|
||||
CURRENT_SHORT=$(git rev-parse --short HEAD)
|
||||
|
||||
echo "============================================"
|
||||
echo " HM AI QC deploy ($MODE)"
|
||||
echo "============================================"
|
||||
echo "Server: $(hostname)"
|
||||
echo "Current: $CURRENT_SHORT $(git log -1 --format='%s' HEAD)"
|
||||
echo ""
|
||||
echo "Fetching latest refs..."
|
||||
git fetch --tags --prune --quiet
|
||||
|
||||
if [[ "$MODE" == "dev" ]]; then
|
||||
TARGET_REF="origin/develop"
|
||||
else
|
||||
if ! git rev-parse --verify --quiet "refs/tags/$TARGET_TAG^{commit}" > /dev/null; then
|
||||
echo "ERROR: Tag '$TARGET_TAG' not found after fetch"
|
||||
exit 1
|
||||
fi
|
||||
TARGET_REF="refs/tags/$TARGET_TAG"
|
||||
fi
|
||||
|
||||
TARGET_REV=$(git rev-parse "$TARGET_REF")
|
||||
TARGET_SHORT=$(git rev-parse --short "$TARGET_REF")
|
||||
|
||||
if [[ "$CURRENT_REV" == "$TARGET_REV" ]]; then
|
||||
echo "Already at $TARGET_SHORT — nothing to do."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Target: $TARGET_SHORT $(git log -1 --format='%s' "$TARGET_REF")"
|
||||
echo ""
|
||||
echo "Commits to apply:"
|
||||
git log --oneline "$CURRENT_REV..$TARGET_REV" | head -20
|
||||
CHANGE_COUNT=$(git log --oneline "$CURRENT_REV..$TARGET_REV" | wc -l | tr -d ' ')
|
||||
if [[ $CHANGE_COUNT -gt 20 ]]; then
|
||||
echo " ... and $((CHANGE_COUNT - 20)) more"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
if git diff --name-only "$CURRENT_REV" "$TARGET_REV" | grep -qE "(^|/)migrations/versions/"; then
|
||||
echo "Note: Alembic migrations changed — flask db upgrade will run."
|
||||
echo ""
|
||||
fi
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo "Dry run — no changes made."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
read -r -p "Proceed with deploy? (y/N): " confirm
|
||||
if [[ ! $confirm =~ ^[Yy]$ ]]; then
|
||||
echo "Cancelled."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "$CURRENT_REV" > "$ROLLBACK_FILE"
|
||||
|
||||
echo "Applying changes..."
|
||||
git reset --hard "$TARGET_REV"
|
||||
|
||||
echo "Building images..."
|
||||
docker compose build
|
||||
|
||||
echo "Starting services (entrypoint runs flask db upgrade first)..."
|
||||
docker compose up -d
|
||||
|
||||
# Poll $HEALTH_URL every 2s until it answers 2xx, or timeout.
|
||||
# 60s window allows for migration time on first boot of a major release.
|
||||
wait_for_health() {
|
||||
local max_attempts=30 # 30 × 2s = 60s window
|
||||
for ((i=1; i<=max_attempts; i++)); do
|
||||
sleep 2
|
||||
if curl -sf -o /dev/null "$HEALTH_URL"; then
|
||||
echo " healthy after ${i}x2s"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
echo "Smoke testing $HEALTH_URL..."
|
||||
if wait_for_health; then
|
||||
NEW_SHORT=$(git rev-parse --short HEAD)
|
||||
echo ""
|
||||
echo "Deploy OK. Now at $NEW_SHORT."
|
||||
echo "Rollback target saved: $CURRENT_SHORT (run deploy/rollback.sh last to revert)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Smoke test failed after 60s — rolling back to $CURRENT_SHORT..."
|
||||
git reset --hard "$CURRENT_REV"
|
||||
docker compose build
|
||||
docker compose up -d
|
||||
|
||||
if wait_for_health; then
|
||||
echo "Rolled back successfully. Service healthy at $CURRENT_SHORT."
|
||||
echo "Investigate: docker compose logs --tail=200 web"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "ROLLBACK ALSO FAILED. Service is in a broken state."
|
||||
echo "docker compose ps"
|
||||
echo "docker compose logs --tail=200 web"
|
||||
exit 2
|
||||
31
deploy/.env.dev.example
Normal file
31
deploy/.env.dev.example
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# HM AI QC — Dev environment starter
|
||||
# Copy to /opt/hm-aiqc/.env and fill in real secrets.
|
||||
|
||||
# Azure AD authentication (shared with AI QC sibling project)
|
||||
AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
|
||||
AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
|
||||
|
||||
ENVIRONMENT=development
|
||||
|
||||
# Box Configuration
|
||||
BOX_CONFIG_PATH=config/box_config.json
|
||||
BOX_REPORT_FOLDER_ID=133295752718
|
||||
BOX_CAMPAIGNS_FOLDER_ID=156182880490
|
||||
|
||||
# Flask
|
||||
FLASK_APP=app:app
|
||||
FLASK_ENV=production
|
||||
# Generate with: python -c 'import secrets; print(secrets.token_urlsafe(48))'
|
||||
SECRET_KEY=replace-me-with-a-long-random-string
|
||||
|
||||
# Server
|
||||
HOST=0.0.0.0
|
||||
PORT=5000
|
||||
|
||||
# Database — absolute path inside the container, mapped to ./database on host
|
||||
DATABASE_URI=sqlite:////app/database/qc_platform.db
|
||||
|
||||
# LLM Provider Keys (NO HARDCODED KEYS — set real values below)
|
||||
OPENAI_API_KEY=
|
||||
GOOGLE_API_KEY=
|
||||
ANTHROPIC_API_KEY=
|
||||
31
deploy/.env.prod.example
Normal file
31
deploy/.env.prod.example
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# HM AI QC — Prod environment starter
|
||||
# Copy to /opt/hm-aiqc/.env and fill in real secrets.
|
||||
|
||||
# Azure AD authentication (shared with AI QC sibling project)
|
||||
AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
|
||||
AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
|
||||
|
||||
ENVIRONMENT=production
|
||||
|
||||
# Box Configuration
|
||||
BOX_CONFIG_PATH=config/box_config.json
|
||||
BOX_REPORT_FOLDER_ID=133295752718
|
||||
BOX_CAMPAIGNS_FOLDER_ID=156182880490
|
||||
|
||||
# Flask
|
||||
FLASK_APP=app:app
|
||||
FLASK_ENV=production
|
||||
# Generate with: python -c 'import secrets; print(secrets.token_urlsafe(48))'
|
||||
SECRET_KEY=replace-me-with-a-long-random-string
|
||||
|
||||
# Server
|
||||
HOST=0.0.0.0
|
||||
PORT=5000
|
||||
|
||||
# Database — absolute path inside the container, mapped to ./database on host
|
||||
DATABASE_URI=sqlite:////app/database/qc_platform.db
|
||||
|
||||
# LLM Provider Keys (NO HARDCODED KEYS — set real values below)
|
||||
OPENAI_API_KEY=
|
||||
GOOGLE_API_KEY=
|
||||
ANTHROPIC_API_KEY=
|
||||
28
deploy/apache-dev.conf
Normal file
28
deploy/apache-dev.conf
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# Apache Location block for HM AI QC on optical-dev.
|
||||
#
|
||||
# Insert inside the existing <VirtualHost *:443> for
|
||||
# ServerName optical-dev.oliver.solutions
|
||||
# and reload:
|
||||
# sudo systemctl reload apache2
|
||||
#
|
||||
# Required modules (enable once if not already):
|
||||
# sudo a2enmod proxy proxy_http headers
|
||||
#
|
||||
# The X-Script-Name header is read by wsgi.py's ReverseProxied middleware
|
||||
# so url_for() generates URLs prefixed with /hm-aiqc.
|
||||
|
||||
ProxyPass /hm-aiqc/ http://127.0.0.1:5050/
|
||||
ProxyPassReverse /hm-aiqc/ http://127.0.0.1:5050/
|
||||
|
||||
# Bare /hm-aiqc (no trailing slash) → redirect with slash so the prefix matches
|
||||
RewriteEngine On
|
||||
RewriteRule ^/hm-aiqc$ /hm-aiqc/ [R=301,L]
|
||||
|
||||
<Location /hm-aiqc/>
|
||||
RequestHeader set X-Script-Name "/hm-aiqc"
|
||||
RequestHeader set X-Forwarded-Proto "https"
|
||||
ProxyPreserveHost On
|
||||
|
||||
# Long-running endpoints (QC execution, Box searches, video analysis)
|
||||
ProxyTimeout 600
|
||||
</Location>
|
||||
28
deploy/apache-prod.conf
Normal file
28
deploy/apache-prod.conf
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# Apache Location block for HM AI QC on optical-prod.
|
||||
#
|
||||
# Insert inside the existing <VirtualHost *:443> for
|
||||
# ServerName optical-prod.oliver.solutions
|
||||
# and reload:
|
||||
# sudo systemctl reload apache2
|
||||
#
|
||||
# Required modules (enable once if not already):
|
||||
# sudo a2enmod proxy proxy_http headers
|
||||
#
|
||||
# The X-Script-Name header is read by wsgi.py's ReverseProxied middleware
|
||||
# so url_for() generates URLs prefixed with /hm-aiqc.
|
||||
|
||||
ProxyPass /hm-aiqc/ http://127.0.0.1:5050/
|
||||
ProxyPassReverse /hm-aiqc/ http://127.0.0.1:5050/
|
||||
|
||||
# Bare /hm-aiqc (no trailing slash) → redirect with slash so the prefix matches
|
||||
RewriteEngine On
|
||||
RewriteRule ^/hm-aiqc$ /hm-aiqc/ [R=301,L]
|
||||
|
||||
<Location /hm-aiqc/>
|
||||
RequestHeader set X-Script-Name "/hm-aiqc"
|
||||
RequestHeader set X-Forwarded-Proto "https"
|
||||
ProxyPreserveHost On
|
||||
|
||||
# Long-running endpoints (QC execution, Box searches, video analysis)
|
||||
ProxyTimeout 600
|
||||
</Location>
|
||||
10
deploy/health-check.sh
Executable file
10
deploy/health-check.sh
Executable file
|
|
@ -0,0 +1,10 @@
|
|||
#!/bin/bash
|
||||
# Quick "is the app alive?" check. Prints status and exits 0 (healthy) or 1 (not).
|
||||
HEALTH_URL=${HEALTH_URL:-http://127.0.0.1:5050/health}
|
||||
|
||||
if output=$(curl -sf "$HEALTH_URL" 2>&1); then
|
||||
echo "OK $output"
|
||||
exit 0
|
||||
fi
|
||||
echo "DOWN no response from $HEALTH_URL"
|
||||
exit 1
|
||||
75
deploy/rollback.sh
Executable file
75
deploy/rollback.sh
Executable file
|
|
@ -0,0 +1,75 @@
|
|||
#!/bin/bash
|
||||
# Emergency rollback for HM AI QC.
|
||||
#
|
||||
# Usage:
|
||||
# rollback.sh last Roll back to the checkpoint saved by deploy.sh
|
||||
# rollback.sh <commit-hash> Roll back to an explicit commit
|
||||
#
|
||||
# Note: Alembic downgrade is intentionally NOT run here — schema rollbacks
|
||||
# are risky on data-bearing tables. If the bad deploy added a column the
|
||||
# rolled-back code doesn't know about, that's almost always fine. If it
|
||||
# dropped or renamed a column, restore from the daily DB backup.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
APP_DIR=${APP_DIR:-/opt/hm-aiqc}
|
||||
HEALTH_URL=${HEALTH_URL:-http://127.0.0.1:5050/health}
|
||||
ROLLBACK_FILE="$APP_DIR/.last_deploy_rollback"
|
||||
|
||||
TARGET=${1:-}
|
||||
|
||||
if [[ -z "$TARGET" || "$TARGET" == "last" ]]; then
|
||||
if [[ ! -f "$ROLLBACK_FILE" ]]; then
|
||||
echo "No .last_deploy_rollback file. Specify a commit hash explicitly."
|
||||
echo "Usage: $(basename "$0") last | <commit-hash>"
|
||||
exit 1
|
||||
fi
|
||||
TARGET=$(cat "$ROLLBACK_FILE")
|
||||
fi
|
||||
|
||||
cd "$APP_DIR"
|
||||
|
||||
if ! git rev-parse --verify --quiet "$TARGET^{commit}" > /dev/null; then
|
||||
echo "ERROR: Commit '$TARGET' not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CURRENT_REV=$(git rev-parse HEAD)
|
||||
CURRENT_SHORT=$(git rev-parse --short HEAD)
|
||||
TARGET_REV=$(git rev-parse "$TARGET")
|
||||
TARGET_SHORT=$(git rev-parse --short "$TARGET")
|
||||
|
||||
if [[ "$CURRENT_REV" == "$TARGET_REV" ]]; then
|
||||
echo "Already at $TARGET_SHORT — nothing to do."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "============================================"
|
||||
echo " HM AI QC rollback"
|
||||
echo "============================================"
|
||||
echo "Current: $CURRENT_SHORT $(git log -1 --format='%s' HEAD)"
|
||||
echo "Target: $TARGET_SHORT $(git log -1 --format='%s' "$TARGET")"
|
||||
echo ""
|
||||
|
||||
read -r -p "Proceed? (y/N): " confirm
|
||||
if [[ ! $confirm =~ ^[Yy]$ ]]; then
|
||||
echo "Cancelled."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
git reset --hard "$TARGET_REV"
|
||||
docker compose build
|
||||
docker compose up -d
|
||||
|
||||
# 60s window — same as deploy.sh
|
||||
for i in {1..30}; do
|
||||
sleep 2
|
||||
if curl -sf -o /dev/null "$HEALTH_URL"; then
|
||||
echo "Rollback OK. Now at $TARGET_SHORT."
|
||||
exit 0
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Service unhealthy after rollback."
|
||||
echo "docker compose logs --tail=200 web"
|
||||
exit 1
|
||||
Loading…
Add table
Reference in a new issue