marriott-box-image-video-ta.../deploy/deploy.sh
DJP 9e6a75feb6 Manual-only runs, DB-based skip check, backfill-from-Box
Previously a nightly APScheduler container fired the tagger on every
file in the configured Box folder. With ~5000 files coming, that's
~5000 Box HTTP calls every night just to ask "is this tagged?". Move
to manual-only mode and source the skip decision from the local DB.

- `db.is_file_already_tagged(conn, file_id)` — returns True iff the
  DB has a row with status IN ('success','backfilled'). Used by both
  image and video loops in main.py instead of the previous
  `check_existing_metadata(box_client, file_id)` Box round-trip.
- `fetch_existing_metadata(box_client, file_id)` (main.py) — returns
  the user-defined template fields as a flat dict by stripping the
  Box `$id`/`$type`/etc. attrs from the SDK response.
- `_run_backfill(run_id, db_conn)` (main.py) — walks the Box folder
  and inserts a `status='backfilled'` row for every file Box already
  has marriottUsa metadata for. Read-only against Box; safe to re-run.
  Use this after first deploy, or to repopulate the DB from Box.
- `POST /api/backfill` mirrors `POST /api/runs` (background thread,
  same live-state record).
- SPA: new "Backfill from Box" button next to "Run now" (with a
  confirm dialog and a yellow `.status-backfilled` event treatment).
- docker-compose.yml: removed the `tagger` (scheduler) service.
  Manual triggers via the SPA / `POST /api/runs` only. scheduler.py
  stays in the repo for archival / opt-back-in.
- deploy.sh: readiness now checks the `api` container instead of
  `tagger`; `--logs` tails api logs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 15:41:10 -04:00

320 lines
12 KiB
Bash
Executable file

#!/usr/bin/env bash
# Marriott Box Tagger — deploy script.
#
# Idempotent. Safe to re-run on the dev server.
# Public URL: https://optical-dev.oliver.solutions/marriott-tagging/
#
# Server layout (mirrors /oliver-sales-ops-platform/, /adeo-maturity/, etc.):
# /opt/marriott-box-image-video-tagging/ — repo + docker-compose
# /var/www/html/marriott-tagging/ — built SPA, served by Apache
# docker volume marriott-tagging_pgdata — Postgres data (survives rebuilds)
#
# What it does:
# 1. Sanity (.env, box_config.json, docker, git, compose v2 on PATH).
# 2. Auto-pick free host ports:
# - prefers POSTGRES_HOST_PORT (default 5435, range 5435-5499)
# - prefers MARRIOTT_API_PORT (default 8004, range 8003-8099)
# Persisted to .env so re-deploys keep using them.
# 3. Render deploy/apache-marriott-tagging.conf from .tmpl with the picked
# API port.
# 4. git pull --ff-only origin main (--no-pull to skip).
# 5. docker compose build && up -d (--no-build to skip).
# 6. Build the Vite SPA in a one-shot node:20 container and rsync dist/
# into /var/www/html/marriott-tagging/ (--no-frontend to skip).
# 7. Poll /api/health for up to 60s; verify tagger container running.
# 8. Print URLs, ports, the Apache Include line, and reload reminder.
#
# Flags:
# --no-pull skip git pull
# --no-build skip docker rebuild
# --no-frontend skip Vite build + SPA sync
# --run-now also fire a tagging pass immediately (via the API)
# --logs tail tagger logs after deploy
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
COMPOSE_PROJECT="marriott-tagging"
URL_PATH="/marriott-tagging"
WEB_ROOT="/var/www/html/marriott-tagging"
cd "$REPO_ROOT"
log() { printf '\033[1;36m[deploy]\033[0m %s\n' "$*"; }
err() { printf '\033[1;31m[deploy]\033[0m %s\n' "$*" >&2; }
ok() { printf '\033[1;32m[deploy]\033[0m %s\n' "$*"; }
warn() { printf '\033[1;33m[deploy]\033[0m %s\n' "$*"; }
DO_PULL=1
DO_BUILD=1
DO_FRONTEND=1
RUN_NOW=0
TAIL_LOGS=0
for arg in "$@"; do
case "$arg" in
--no-pull) DO_PULL=0 ;;
--no-build) DO_BUILD=0 ;;
--no-frontend) DO_FRONTEND=0 ;;
--run-now) RUN_NOW=1 ;;
--logs) TAIL_LOGS=1 ;;
--help|-h)
sed -n '2,/^set/p' "$0" | grep -E '^# ' | sed 's/^# //'
exit 0
;;
*)
err "Unknown flag: $arg (try --help)"
exit 2
;;
esac
done
# ---------- 1. Sanity ----------
[[ -f docker-compose.yml ]] || { err "docker-compose.yml not found in $REPO_ROOT"; exit 1; }
if [[ ! -f .env ]]; then
err ".env not found. Copy .env.example and fill it in:"
err " cp .env.example .env && \$EDITOR .env"
err "Required keys: GEMINI_API_KEY, POSTGRES_PASSWORD"
err "MSAL keys: AZURE_TENANT_ID, AZURE_CLIENT_ID, VITE_AZURE_* (or set DEV_AUTH_BYPASS=true)"
exit 1
fi
if [[ ! -f box_config.json ]]; then
err "box_config.json not found in $REPO_ROOT"
err "Drop the Box JWT config (from the Box Developer Console) in the repo"
err "root before deploying. It's bind-mounted into the tagger + api containers."
exit 1
fi
command -v docker >/dev/null 2>&1 || { err "docker not on PATH"; exit 1; }
command -v git >/dev/null 2>&1 || { err "git not on PATH"; exit 1; }
docker compose version >/dev/null 2>&1 || { err "docker compose v2 not available"; exit 1; }
# ---------- helpers ----------
port_in_use() {
local port=$1 pid=""
if command -v lsof >/dev/null 2>&1; then
pid=$( { lsof -nP -iTCP:"$port" -sTCP:LISTEN 2>/dev/null || true; } | awk 'NR>1 {print $2}' | head -1 )
else
pid=$( { ss -ltnp "sport = :$port" 2>/dev/null || true; } | awk -F'pid=' 'NR>1 {print $2}' | cut -d, -f1 | head -1 )
fi
[[ -n "$pid" ]]
}
find_free_port() {
local preferred=$1 start=$2 end=$3
if ! port_in_use "$preferred"; then printf '%s' "$preferred"; return 0; fi
local p
for ((p=start; p<=end; p++)); do
if ! port_in_use "$p"; then printf '%s' "$p"; return 0; fi
done
return 1
}
set_env_var() {
local key=$1 value=$2 file="${REPO_ROOT}/.env"
if grep -q "^${key}=" "$file" 2>/dev/null; then
sed -i.bak "s#^${key}=.*#${key}=${value}#" "$file"
rm -f "${file}.bak"
else
printf '%s=%s\n' "$key" "$value" >> "$file"
fi
}
get_env_var() {
grep -E "^${1}=" "${REPO_ROOT}/.env" 2>/dev/null | head -1 | cut -d= -f2- | tr -d '"' || true
}
# ---------- 2. Pick host ports ----------
DEFAULT_DB_PORT=5435
DEFAULT_API_PORT=8004
DB_PORT=$(get_env_var POSTGRES_HOST_PORT); DB_PORT=${DB_PORT:-$DEFAULT_DB_PORT}
API_PORT=$(get_env_var MARRIOTT_API_PORT); API_PORT=${API_PORT:-$DEFAULT_API_PORT}
PREV_API_PORT="$API_PORT"
log "Resolving host ports (preferred: db=$DB_PORT api=$API_PORT)…"
RUNNING=$(docker compose ps -q 2>/dev/null | wc -l | tr -d ' ')
if [[ "$RUNNING" -gt 0 ]]; then
ok "Project '$COMPOSE_PROJECT' already has $RUNNING containers — keeping current port assignment."
else
NEW_DB_PORT=$(find_free_port "$DB_PORT" 5435 5499) || NEW_DB_PORT=""
NEW_API_PORT=$(find_free_port "$API_PORT" 8003 8099) || NEW_API_PORT=""
if [[ -z "$NEW_DB_PORT" || -z "$NEW_API_PORT" ]]; then
err "Could not find a free port in the configured ranges."
err " db desired=$DB_PORT scanned=5435-5499"
err " api desired=$API_PORT scanned=8003-8099"
exit 1
fi
[[ "$NEW_DB_PORT" != "$DB_PORT" ]] && warn "db port $DB_PORT busy → using $NEW_DB_PORT"
[[ "$NEW_API_PORT" != "$API_PORT" ]] && warn "api port $API_PORT busy → using $NEW_API_PORT"
DB_PORT=$NEW_DB_PORT
API_PORT=$NEW_API_PORT
set_env_var POSTGRES_HOST_PORT "$DB_PORT"
set_env_var MARRIOTT_API_PORT "$API_PORT"
ok "Ports: db=$DB_PORT api=$API_PORT (persisted to .env)"
fi
# ---------- 3. Render apache include ----------
APACHE_TMPL="$REPO_ROOT/deploy/apache-marriott-tagging.conf.tmpl"
APACHE_CONF="$REPO_ROOT/deploy/apache-marriott-tagging.conf"
if [[ -f "$APACHE_TMPL" ]]; then
sed "s#__API_PORT__#${API_PORT}#g" "$APACHE_TMPL" > "$APACHE_CONF"
ok "Rendered apache-marriott-tagging.conf with api port $API_PORT"
else
warn "apache-marriott-tagging.conf.tmpl missing — leaving deploy/apache-marriott-tagging.conf untouched."
fi
# ---------- 4. git pull ----------
if (( DO_PULL )); then
log "git pull --ff-only origin main"
git pull --ff-only origin main
fi
# ---------- 5. Backend build + up ----------
if (( DO_BUILD )); then
log "docker compose build"
docker compose build
fi
log "docker compose up -d (db + api)"
docker compose up -d
# ---------- 6. Frontend build + sync ----------
if (( DO_FRONTEND )); then
if [[ ! -d "$REPO_ROOT/frontend" ]]; then
warn "frontend/ directory missing — skipping SPA build."
else
BYPASS=$(get_env_var VITE_DEV_AUTH_BYPASS); BYPASS=${BYPASS:-$(get_env_var DEV_AUTH_BYPASS)}
TENANT=$(get_env_var VITE_AZURE_TENANT_ID)
CLIENT=$(get_env_var VITE_AZURE_CLIENT_ID)
# VITE_PUBLIC_BASE in .env is for local `npm run dev` — on the server
# we always build for the prod URL. The `base` baked into the bundle
# determines where Apache looks for /<slug>/assets/... so getting this
# wrong gives a 404 on the JS bundle and a blank page.
PUBLIC_BASE="https://optical-dev.oliver.solutions${URL_PATH}"
log "Building Vite SPA in node:20 (bypass=${BYPASS:-false} tenant=${TENANT:+set} client=${CLIENT:+set})…"
docker run --rm \
-v "$REPO_ROOT/frontend:/app" \
-w /app \
-e VITE_DEV_AUTH_BYPASS="${BYPASS:-false}" \
-e VITE_AZURE_TENANT_ID="$TENANT" \
-e VITE_AZURE_CLIENT_ID="$CLIENT" \
-e VITE_PUBLIC_BASE="$PUBLIC_BASE" \
node:20-alpine \
sh -c "npm install --silent && npm run build"
if [[ ! -d "$REPO_ROOT/frontend/dist" ]]; then
err "Vite build did not produce frontend/dist — aborting frontend sync."
exit 1
fi
log "Syncing frontend/dist/ → $WEB_ROOT/"
if [[ ! -d "$WEB_ROOT" ]]; then
if command -v sudo >/dev/null 2>&1; then sudo mkdir -p "$WEB_ROOT"
else mkdir -p "$WEB_ROOT"; fi
fi
if command -v rsync >/dev/null 2>&1; then
if [[ -w "$WEB_ROOT" ]]; then
rsync -a --delete "$REPO_ROOT/frontend/dist/" "$WEB_ROOT/"
else
sudo rsync -a --delete "$REPO_ROOT/frontend/dist/" "$WEB_ROOT/"
fi
else
if [[ -w "$WEB_ROOT" ]]; then
rm -rf "$WEB_ROOT"/*
cp -a "$REPO_ROOT/frontend/dist/." "$WEB_ROOT/"
else
sudo rm -rf "$WEB_ROOT"/*
sudo cp -a "$REPO_ROOT/frontend/dist/." "$WEB_ROOT/"
fi
fi
ok "SPA synced to $WEB_ROOT"
fi
fi
# ---------- 7. Readiness ----------
log "Waiting for API /api/health on :$API_PORT (max 60s)…"
for i in $(seq 1 30); do
if curl -fsS "http://127.0.0.1:${API_PORT}/api/health" >/dev/null 2>&1; then
ok "API healthy"
break
fi
sleep 2
if (( i == 30 )); then
err "API did not become healthy within 60s. Recent logs:"
docker compose logs api --tail 60 || true
exit 1
fi
done
API_STATE=$(docker compose ps api --format '{{.State}}' 2>/dev/null | head -1)
if [[ "$API_STATE" != "running" ]]; then
err "API container is not running (state=${API_STATE:-unknown}). Recent logs:"
docker compose logs api --tail 60 || true
exit 1
fi
ok "API container running (manual-only mode — no scheduler container)"
# ---------- 8. Optional: trigger an immediate pass via the API ----------
if (( RUN_NOW )); then
log "Firing a tagging pass via /api/runs…"
# With DEV_AUTH_BYPASS=true the call works unauthenticated. Otherwise
# a real token is required; we just print the curl command in that case.
BYPASS_NOW=$(get_env_var DEV_AUTH_BYPASS); BYPASS_NOW=$(echo "${BYPASS_NOW:-true}" | tr '[:upper:]' '[:lower:]')
if [[ "$BYPASS_NOW" == "true" || "$BYPASS_NOW" == "1" || "$BYPASS_NOW" == "yes" ]]; then
curl -fsS -X POST "http://127.0.0.1:${API_PORT}/api/runs" || \
warn "POST /api/runs failed — see api logs."
echo
else
warn "DEV_AUTH_BYPASS != true → can't auth from a shell. Run a pass from the SPA, or:"
warn " docker compose exec tagger python main.py"
fi
fi
# ---------- 9. Report ----------
POSTGRES_USER_VAL=$(get_env_var POSTGRES_USER); POSTGRES_USER_VAL=${POSTGRES_USER_VAL:-marriott}
POSTGRES_DB_VAL=$(get_env_var POSTGRES_DB); POSTGRES_DB_VAL=${POSTGRES_DB_VAL:-marriott_tagging}
SCHED=$(get_env_var SCHEDULE_CRON); SCHED=${SCHED:-0 2 * * *}
TZ_VAL=$(get_env_var TZ); TZ_VAL=${TZ_VAL:-system}
ok "Deploy complete."
echo
echo " Compose project: $COMPOSE_PROJECT"
echo " API (local): http://127.0.0.1:${API_PORT}/api/health"
echo " Public URL: https://optical-dev.oliver.solutions${URL_PATH}/"
echo " SPA on disk: $WEB_ROOT"
echo " Postgres (local): 127.0.0.1:${DB_PORT} (db=$POSTGRES_DB_VAL user=$POSTGRES_USER_VAL)"
echo " Schedule (cron): '$SCHED' (TZ=$TZ_VAL)"
echo " Persisted volume: marriott-tagging_pgdata"
echo
echo " Apache include line for the merged vhost:"
echo " Include $REPO_ROOT/deploy/apache-marriott-tagging.conf"
NEEDS_RELOAD=0
if [[ "$API_PORT" != "$PREV_API_PORT" ]]; then NEEDS_RELOAD=1; fi
if ! grep -qF "$REPO_ROOT/deploy/apache-marriott-tagging.conf" /etc/apache2/sites-enabled/*.conf 2>/dev/null; then
NEEDS_RELOAD=1
fi
if (( NEEDS_RELOAD )); then
echo
warn "API port changed (or first deploy). After adding the Include line:"
echo " sudo apachectl configtest && sudo systemctl reload apache2"
fi
echo
if (( TAIL_LOGS )); then
log "Tailing api logs (Ctrl-C to stop)…"
docker compose logs -f api
fi