When the deploy batch has more than 20 commits, the `git log ... | head -20` pipeline closes the pipe after 20 lines. git log gets SIGPIPE (exit 141), which `set -o pipefail` propagates, and `set -e` then exits the script silently — no prompt shown, no error message. Only bites for release-sized batches (>20 commits). First seen on the v1.3.0 prod deploy: 20 commits displayed, then the script returned to the shell without prompting. dev deploys never hit this because they typically only have 1-3 commits ahead. Fix: tell git to limit its own output via `-n 20`. Same display, no broken pipe. Also swap the count-by-wc-l for `git rev-list --count` which is more idiomatic and avoids any further pipe shenanigans. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
179 lines
5 KiB
Bash
Executable file
179 lines
5 KiB
Bash
Executable file
#!/bin/bash
|
||
# AI QC deploy script.
|
||
#
|
||
# Usage:
|
||
# deploy.sh dev Pull latest develop → restart service
|
||
# deploy.sh prod <tag> Check out a specific tag → restart service
|
||
# deploy.sh dev --dry-run Show what would change, make no changes
|
||
#
|
||
# Runs on the target server (not your laptop). Needs sudo for systemctl.
|
||
# Saves a rollback checkpoint to .last_deploy_rollback before changing anything,
|
||
# and auto-rolls back if the post-deploy smoke test fails.
|
||
|
||
set -euo pipefail
|
||
|
||
APP_DIR=/opt/ai_qc
|
||
SERVICE=ai-qc.service
|
||
HEALTH_URL=http://127.0.0.1:7183/health
|
||
ROLLBACK_FILE="$APP_DIR/.last_deploy_rollback"
|
||
|
||
MODE=${1:-}
|
||
shift || true
|
||
|
||
DRY_RUN=false
|
||
TARGET_TAG=""
|
||
|
||
case "$MODE" in
|
||
dev)
|
||
for arg in "$@"; do
|
||
[[ "$arg" == "--dry-run" ]] && DRY_RUN=true
|
||
done
|
||
;;
|
||
prod)
|
||
TARGET_TAG=${1:-}
|
||
shift || true
|
||
for arg in "$@"; do
|
||
[[ "$arg" == "--dry-run" ]] && DRY_RUN=true
|
||
done
|
||
if [[ -z "$TARGET_TAG" ]]; then
|
||
echo "Usage: $0 prod <tag> [--dry-run]"
|
||
exit 1
|
||
fi
|
||
;;
|
||
""|-h|--help)
|
||
cat <<EOF
|
||
Usage:
|
||
$(basename "$0") dev [--dry-run] Deploy latest develop to this server
|
||
$(basename "$0") prod <tag> [--dry-run] Deploy a specific tag to this server
|
||
|
||
Run on the target server. Requires sudo for systemctl restart.
|
||
EOF
|
||
exit 0
|
||
;;
|
||
*)
|
||
echo "Unknown mode: $MODE"
|
||
echo "Try: $(basename "$0") --help"
|
||
exit 1
|
||
;;
|
||
esac
|
||
|
||
cd "$APP_DIR"
|
||
if [[ ! -d .git ]]; then
|
||
echo "ERROR: $APP_DIR is not a git repo"
|
||
exit 1
|
||
fi
|
||
|
||
CURRENT_REV=$(git rev-parse HEAD)
|
||
CURRENT_SHORT=$(git rev-parse --short HEAD)
|
||
|
||
echo "============================================"
|
||
echo " AI QC deploy ($MODE)"
|
||
echo "============================================"
|
||
echo "Server: $(hostname)"
|
||
echo "Current: $CURRENT_SHORT $(git log -1 --format='%s' HEAD)"
|
||
echo ""
|
||
echo "Fetching latest refs..."
|
||
git fetch --tags --prune --quiet
|
||
|
||
if [[ "$MODE" == "dev" ]]; then
|
||
TARGET_REF="origin/develop"
|
||
else
|
||
if ! git rev-parse --verify --quiet "refs/tags/$TARGET_TAG^{commit}" > /dev/null; then
|
||
echo "ERROR: Tag '$TARGET_TAG' not found after fetch"
|
||
exit 1
|
||
fi
|
||
TARGET_REF="refs/tags/$TARGET_TAG"
|
||
fi
|
||
|
||
TARGET_REV=$(git rev-parse "$TARGET_REF")
|
||
TARGET_SHORT=$(git rev-parse --short "$TARGET_REF")
|
||
|
||
if [[ "$CURRENT_REV" == "$TARGET_REV" ]]; then
|
||
echo "Already at $TARGET_SHORT — nothing to do."
|
||
exit 0
|
||
fi
|
||
|
||
echo "Target: $TARGET_SHORT $(git log -1 --format='%s' "$TARGET_REF")"
|
||
echo ""
|
||
echo "Commits to apply:"
|
||
# Use git's own line limit (`-n 20`) rather than `| head -20`: piping to head
|
||
# closes the pipe after 20 lines and makes git log exit with SIGPIPE (141),
|
||
# which `set -o pipefail` propagates and `set -e` then uses to kill the
|
||
# script silently. Only bites when the deploy batch is >20 commits — i.e.
|
||
# real prod releases. First hit observed on the v1.3.0 prod deploy.
|
||
git log --oneline -n 20 "$CURRENT_REV..$TARGET_REV"
|
||
CHANGE_COUNT=$(git rev-list --count "$CURRENT_REV..$TARGET_REV")
|
||
if [[ $CHANGE_COUNT -gt 20 ]]; then
|
||
echo " ... and $((CHANGE_COUNT - 20)) more"
|
||
fi
|
||
echo ""
|
||
|
||
REQS_CHANGED=false
|
||
if git diff --name-only "$CURRENT_REV" "$TARGET_REV" | grep -qE "(^|/)requirements.txt$"; then
|
||
REQS_CHANGED=true
|
||
echo "Note: requirements.txt changed — pip install will run."
|
||
echo ""
|
||
fi
|
||
|
||
if [[ "$DRY_RUN" == "true" ]]; then
|
||
echo "Dry run — no changes made."
|
||
exit 0
|
||
fi
|
||
|
||
read -r -p "Proceed with deploy? (y/N): " confirm
|
||
if [[ ! $confirm =~ ^[Yy]$ ]]; then
|
||
echo "Cancelled."
|
||
exit 0
|
||
fi
|
||
|
||
echo "$CURRENT_REV" > "$ROLLBACK_FILE"
|
||
|
||
echo "Applying changes..."
|
||
git reset --hard "$TARGET_REV"
|
||
|
||
if [[ "$REQS_CHANGED" == "true" ]]; then
|
||
echo "Installing updated dependencies..."
|
||
"$APP_DIR/venv/bin/pip" install -q -r "$APP_DIR/requirements.txt"
|
||
fi
|
||
|
||
# Poll $HEALTH_URL every 2s until it answers 2xx, or timeout. Returns 0 on success.
|
||
# Startup takes ~4s on dev (75 QC modules + 14 profiles load on import).
|
||
wait_for_health() {
|
||
local max_attempts=15 # 15 × 2s = 30s window
|
||
for ((i=1; i<=max_attempts; i++)); do
|
||
sleep 2
|
||
if curl -sf -o /dev/null "$HEALTH_URL"; then
|
||
echo " healthy after ${i}x2s"
|
||
return 0
|
||
fi
|
||
done
|
||
return 1
|
||
}
|
||
|
||
echo "Restarting $SERVICE..."
|
||
sudo systemctl restart "$SERVICE"
|
||
|
||
echo "Smoke testing $HEALTH_URL..."
|
||
if wait_for_health; then
|
||
NEW_SHORT=$(git rev-parse --short HEAD)
|
||
echo ""
|
||
echo "Deploy OK. Now at $NEW_SHORT."
|
||
echo "Rollback target saved: $CURRENT_SHORT (run rollback.sh last to revert)"
|
||
exit 0
|
||
fi
|
||
|
||
echo ""
|
||
echo "Smoke test failed after 30s — rolling back to $CURRENT_SHORT..."
|
||
git reset --hard "$CURRENT_REV"
|
||
sudo systemctl restart "$SERVICE"
|
||
|
||
if wait_for_health; then
|
||
echo "Rolled back successfully. Service healthy at $CURRENT_SHORT."
|
||
echo "Investigate: sudo journalctl -u $SERVICE -n 100"
|
||
exit 1
|
||
fi
|
||
|
||
echo "ROLLBACK ALSO FAILED. Service is in a broken state."
|
||
echo "sudo systemctl status $SERVICE"
|
||
echo "sudo journalctl -u $SERVICE -n 100"
|
||
exit 2
|