ai_qc/backend/scripts/deploy.sh
nickviljoen a3b3f45f01 fix(deploy): use git's own -n limit instead of | head -20
When the deploy batch has more than 20 commits, the `git log ... | head -20`
pipeline closes the pipe after 20 lines. git log gets SIGPIPE (exit 141),
which `set -o pipefail` propagates, and `set -e` then exits the script
silently — no prompt shown, no error message.

Only bites for release-sized batches (>20 commits). First seen on the
v1.3.0 prod deploy: 20 commits displayed, then the script returned to
the shell without prompting. dev deploys never hit this because they
typically only have 1-3 commits ahead.

Fix: tell git to limit its own output via `-n 20`. Same display, no
broken pipe. Also swap the count-by-wc-l for `git rev-list --count`
which is more idiomatic and avoids any further pipe shenanigans.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 15:25:38 +02:00

179 lines
5 KiB
Bash
Executable file
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# AI QC deploy script.
#
# Usage:
# deploy.sh dev Pull latest develop → restart service
# deploy.sh prod <tag> Check out a specific tag → restart service
# deploy.sh dev --dry-run Show what would change, make no changes
#
# Runs on the target server (not your laptop). Needs sudo for systemctl.
# Saves a rollback checkpoint to .last_deploy_rollback before changing anything,
# and auto-rolls back if the post-deploy smoke test fails.
set -euo pipefail
APP_DIR=/opt/ai_qc
SERVICE=ai-qc.service
HEALTH_URL=http://127.0.0.1:7183/health
ROLLBACK_FILE="$APP_DIR/.last_deploy_rollback"
MODE=${1:-}
shift || true
DRY_RUN=false
TARGET_TAG=""
case "$MODE" in
dev)
for arg in "$@"; do
[[ "$arg" == "--dry-run" ]] && DRY_RUN=true
done
;;
prod)
TARGET_TAG=${1:-}
shift || true
for arg in "$@"; do
[[ "$arg" == "--dry-run" ]] && DRY_RUN=true
done
if [[ -z "$TARGET_TAG" ]]; then
echo "Usage: $0 prod <tag> [--dry-run]"
exit 1
fi
;;
""|-h|--help)
cat <<EOF
Usage:
$(basename "$0") dev [--dry-run] Deploy latest develop to this server
$(basename "$0") prod <tag> [--dry-run] Deploy a specific tag to this server
Run on the target server. Requires sudo for systemctl restart.
EOF
exit 0
;;
*)
echo "Unknown mode: $MODE"
echo "Try: $(basename "$0") --help"
exit 1
;;
esac
cd "$APP_DIR"
if [[ ! -d .git ]]; then
echo "ERROR: $APP_DIR is not a git repo"
exit 1
fi
CURRENT_REV=$(git rev-parse HEAD)
CURRENT_SHORT=$(git rev-parse --short HEAD)
echo "============================================"
echo " AI QC deploy ($MODE)"
echo "============================================"
echo "Server: $(hostname)"
echo "Current: $CURRENT_SHORT $(git log -1 --format='%s' HEAD)"
echo ""
echo "Fetching latest refs..."
git fetch --tags --prune --quiet
if [[ "$MODE" == "dev" ]]; then
TARGET_REF="origin/develop"
else
if ! git rev-parse --verify --quiet "refs/tags/$TARGET_TAG^{commit}" > /dev/null; then
echo "ERROR: Tag '$TARGET_TAG' not found after fetch"
exit 1
fi
TARGET_REF="refs/tags/$TARGET_TAG"
fi
TARGET_REV=$(git rev-parse "$TARGET_REF")
TARGET_SHORT=$(git rev-parse --short "$TARGET_REF")
if [[ "$CURRENT_REV" == "$TARGET_REV" ]]; then
echo "Already at $TARGET_SHORT — nothing to do."
exit 0
fi
echo "Target: $TARGET_SHORT $(git log -1 --format='%s' "$TARGET_REF")"
echo ""
echo "Commits to apply:"
# Use git's own line limit (`-n 20`) rather than `| head -20`: piping to head
# closes the pipe after 20 lines and makes git log exit with SIGPIPE (141),
# which `set -o pipefail` propagates and `set -e` then uses to kill the
# script silently. Only bites when the deploy batch is >20 commits — i.e.
# real prod releases. First hit observed on the v1.3.0 prod deploy.
git log --oneline -n 20 "$CURRENT_REV..$TARGET_REV"
CHANGE_COUNT=$(git rev-list --count "$CURRENT_REV..$TARGET_REV")
if [[ $CHANGE_COUNT -gt 20 ]]; then
echo " ... and $((CHANGE_COUNT - 20)) more"
fi
echo ""
REQS_CHANGED=false
if git diff --name-only "$CURRENT_REV" "$TARGET_REV" | grep -qE "(^|/)requirements.txt$"; then
REQS_CHANGED=true
echo "Note: requirements.txt changed — pip install will run."
echo ""
fi
if [[ "$DRY_RUN" == "true" ]]; then
echo "Dry run — no changes made."
exit 0
fi
read -r -p "Proceed with deploy? (y/N): " confirm
if [[ ! $confirm =~ ^[Yy]$ ]]; then
echo "Cancelled."
exit 0
fi
echo "$CURRENT_REV" > "$ROLLBACK_FILE"
echo "Applying changes..."
git reset --hard "$TARGET_REV"
if [[ "$REQS_CHANGED" == "true" ]]; then
echo "Installing updated dependencies..."
"$APP_DIR/venv/bin/pip" install -q -r "$APP_DIR/requirements.txt"
fi
# Poll $HEALTH_URL every 2s until it answers 2xx, or timeout. Returns 0 on success.
# Startup takes ~4s on dev (75 QC modules + 14 profiles load on import).
wait_for_health() {
local max_attempts=15 # 15 × 2s = 30s window
for ((i=1; i<=max_attempts; i++)); do
sleep 2
if curl -sf -o /dev/null "$HEALTH_URL"; then
echo " healthy after ${i}x2s"
return 0
fi
done
return 1
}
echo "Restarting $SERVICE..."
sudo systemctl restart "$SERVICE"
echo "Smoke testing $HEALTH_URL..."
if wait_for_health; then
NEW_SHORT=$(git rev-parse --short HEAD)
echo ""
echo "Deploy OK. Now at $NEW_SHORT."
echo "Rollback target saved: $CURRENT_SHORT (run rollback.sh last to revert)"
exit 0
fi
echo ""
echo "Smoke test failed after 30s — rolling back to $CURRENT_SHORT..."
git reset --hard "$CURRENT_REV"
sudo systemctl restart "$SERVICE"
if wait_for_health; then
echo "Rolled back successfully. Service healthy at $CURRENT_SHORT."
echo "Investigate: sudo journalctl -u $SERVICE -n 100"
exit 1
fi
echo "ROLLBACK ALSO FAILED. Service is in a broken state."
echo "sudo systemctl status $SERVICE"
echo "sudo journalctl -u $SERVICE -n 100"
exit 2