#!/bin/bash # AI QC deploy script. # # Usage: # deploy.sh dev Pull latest develop → restart service # deploy.sh prod Check out a specific tag → restart service # deploy.sh dev --dry-run Show what would change, make no changes # # Runs on the target server (not your laptop). Needs sudo for systemctl. # Saves a rollback checkpoint to .last_deploy_rollback before changing anything, # and auto-rolls back if the post-deploy smoke test fails. set -euo pipefail APP_DIR=/opt/ai_qc SERVICE=ai-qc.service HEALTH_URL=http://127.0.0.1:7183/health ROLLBACK_FILE="$APP_DIR/.last_deploy_rollback" MODE=${1:-} shift || true DRY_RUN=false TARGET_TAG="" case "$MODE" in dev) for arg in "$@"; do [[ "$arg" == "--dry-run" ]] && DRY_RUN=true done ;; prod) TARGET_TAG=${1:-} shift || true for arg in "$@"; do [[ "$arg" == "--dry-run" ]] && DRY_RUN=true done if [[ -z "$TARGET_TAG" ]]; then echo "Usage: $0 prod [--dry-run]" exit 1 fi ;; ""|-h|--help) cat < [--dry-run] Deploy a specific tag to this server Run on the target server. Requires sudo for systemctl restart. EOF exit 0 ;; *) echo "Unknown mode: $MODE" echo "Try: $(basename "$0") --help" exit 1 ;; esac cd "$APP_DIR" if [[ ! -d .git ]]; then echo "ERROR: $APP_DIR is not a git repo" exit 1 fi CURRENT_REV=$(git rev-parse HEAD) CURRENT_SHORT=$(git rev-parse --short HEAD) echo "============================================" echo " AI QC deploy ($MODE)" echo "============================================" echo "Server: $(hostname)" echo "Current: $CURRENT_SHORT $(git log -1 --format='%s' HEAD)" echo "" echo "Fetching latest refs..." git fetch --tags --prune --quiet if [[ "$MODE" == "dev" ]]; then TARGET_REF="origin/develop" else if ! git rev-parse --verify --quiet "refs/tags/$TARGET_TAG^{commit}" > /dev/null; then echo "ERROR: Tag '$TARGET_TAG' not found after fetch" exit 1 fi TARGET_REF="refs/tags/$TARGET_TAG" fi TARGET_REV=$(git rev-parse "$TARGET_REF") TARGET_SHORT=$(git rev-parse --short "$TARGET_REF") if [[ "$CURRENT_REV" == "$TARGET_REV" ]]; then echo "Already at $TARGET_SHORT — nothing to do." exit 0 fi echo "Target: $TARGET_SHORT $(git log -1 --format='%s' "$TARGET_REF")" echo "" echo "Commits to apply:" # Use git's own line limit (`-n 20`) rather than `| head -20`: piping to head # closes the pipe after 20 lines and makes git log exit with SIGPIPE (141), # which `set -o pipefail` propagates and `set -e` then uses to kill the # script silently. Only bites when the deploy batch is >20 commits — i.e. # real prod releases. First hit observed on the v1.3.0 prod deploy. git log --oneline -n 20 "$CURRENT_REV..$TARGET_REV" CHANGE_COUNT=$(git rev-list --count "$CURRENT_REV..$TARGET_REV") if [[ $CHANGE_COUNT -gt 20 ]]; then echo " ... and $((CHANGE_COUNT - 20)) more" fi echo "" REQS_CHANGED=false if git diff --name-only "$CURRENT_REV" "$TARGET_REV" | grep -qE "(^|/)requirements.txt$"; then REQS_CHANGED=true echo "Note: requirements.txt changed — pip install will run." echo "" fi if [[ "$DRY_RUN" == "true" ]]; then echo "Dry run — no changes made." exit 0 fi read -r -p "Proceed with deploy? (y/N): " confirm if [[ ! $confirm =~ ^[Yy]$ ]]; then echo "Cancelled." exit 0 fi echo "$CURRENT_REV" > "$ROLLBACK_FILE" echo "Applying changes..." git reset --hard "$TARGET_REV" if [[ "$REQS_CHANGED" == "true" ]]; then echo "Installing updated dependencies..." "$APP_DIR/venv/bin/pip" install -q -r "$APP_DIR/requirements.txt" fi # Poll $HEALTH_URL every 2s until it answers 2xx, or timeout. Returns 0 on success. # Startup takes ~4s on dev (75 QC modules + 14 profiles load on import). wait_for_health() { local max_attempts=15 # 15 × 2s = 30s window for ((i=1; i<=max_attempts; i++)); do sleep 2 if curl -sf -o /dev/null "$HEALTH_URL"; then echo " healthy after ${i}x2s" return 0 fi done return 1 } echo "Restarting $SERVICE..." sudo systemctl restart "$SERVICE" echo "Smoke testing $HEALTH_URL..." if wait_for_health; then NEW_SHORT=$(git rev-parse --short HEAD) echo "" echo "Deploy OK. Now at $NEW_SHORT." echo "Rollback target saved: $CURRENT_SHORT (run rollback.sh last to revert)" exit 0 fi echo "" echo "Smoke test failed after 30s — rolling back to $CURRENT_SHORT..." git reset --hard "$CURRENT_REV" sudo systemctl restart "$SERVICE" if wait_for_health; then echo "Rolled back successfully. Service healthy at $CURRENT_SHORT." echo "Investigate: sudo journalctl -u $SERVICE -n 100" exit 1 fi echo "ROLLBACK ALSO FAILED. Service is in a broken state." echo "sudo systemctl status $SERVICE" echo "sudo journalctl -u $SERVICE -n 100" exit 2