Previously a nightly APScheduler container fired the tagger on every
file in the configured Box folder. With ~5000 files coming, that's
~5000 Box HTTP calls every night just to ask "is this tagged?". Move
to manual-only mode and source the skip decision from the local DB.
- `db.is_file_already_tagged(conn, file_id)` — returns True iff the
DB has a row with status IN ('success','backfilled'). Used by both
image and video loops in main.py instead of the previous
`check_existing_metadata(box_client, file_id)` Box round-trip.
- `fetch_existing_metadata(box_client, file_id)` (main.py) — returns
the user-defined template fields as a flat dict by stripping the
Box `$id`/`$type`/etc. attrs from the SDK response.
- `_run_backfill(run_id, db_conn)` (main.py) — walks the Box folder
and inserts a `status='backfilled'` row for every file Box already
has marriottUsa metadata for. Read-only against Box; safe to re-run.
Use this after first deploy, or to repopulate the DB from Box.
- `POST /api/backfill` mirrors `POST /api/runs` (background thread,
same live-state record).
- SPA: new "Backfill from Box" button next to "Run now" (with a
confirm dialog and a yellow `.status-backfilled` event treatment).
- docker-compose.yml: removed the `tagger` (scheduler) service.
Manual triggers via the SPA / `POST /api/runs` only. scheduler.py
stays in the repo for archival / opt-back-in.
- deploy.sh: readiness now checks the `api` container instead of
`tagger`; `--logs` tails api logs.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
320 lines
12 KiB
Bash
Executable file
320 lines
12 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# Marriott Box Tagger — deploy script.
|
|
#
|
|
# Idempotent. Safe to re-run on the dev server.
|
|
# Public URL: https://optical-dev.oliver.solutions/marriott-tagging/
|
|
#
|
|
# Server layout (mirrors /oliver-sales-ops-platform/, /adeo-maturity/, etc.):
|
|
# /opt/marriott-box-image-video-tagging/ — repo + docker-compose
|
|
# /var/www/html/marriott-tagging/ — built SPA, served by Apache
|
|
# docker volume marriott-tagging_pgdata — Postgres data (survives rebuilds)
|
|
#
|
|
# What it does:
|
|
# 1. Sanity (.env, box_config.json, docker, git, compose v2 on PATH).
|
|
# 2. Auto-pick free host ports:
|
|
# - prefers POSTGRES_HOST_PORT (default 5435, range 5435-5499)
|
|
# - prefers MARRIOTT_API_PORT (default 8004, range 8003-8099)
|
|
# Persisted to .env so re-deploys keep using them.
|
|
# 3. Render deploy/apache-marriott-tagging.conf from .tmpl with the picked
|
|
# API port.
|
|
# 4. git pull --ff-only origin main (--no-pull to skip).
|
|
# 5. docker compose build && up -d (--no-build to skip).
|
|
# 6. Build the Vite SPA in a one-shot node:20 container and rsync dist/
|
|
# into /var/www/html/marriott-tagging/ (--no-frontend to skip).
|
|
# 7. Poll /api/health for up to 60s; verify tagger container running.
|
|
# 8. Print URLs, ports, the Apache Include line, and reload reminder.
|
|
#
|
|
# Flags:
|
|
# --no-pull skip git pull
|
|
# --no-build skip docker rebuild
|
|
# --no-frontend skip Vite build + SPA sync
|
|
# --run-now also fire a tagging pass immediately (via the API)
|
|
# --logs tail tagger logs after deploy
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
COMPOSE_PROJECT="marriott-tagging"
|
|
URL_PATH="/marriott-tagging"
|
|
WEB_ROOT="/var/www/html/marriott-tagging"
|
|
|
|
cd "$REPO_ROOT"
|
|
|
|
log() { printf '\033[1;36m[deploy]\033[0m %s\n' "$*"; }
|
|
err() { printf '\033[1;31m[deploy]\033[0m %s\n' "$*" >&2; }
|
|
ok() { printf '\033[1;32m[deploy]\033[0m %s\n' "$*"; }
|
|
warn() { printf '\033[1;33m[deploy]\033[0m %s\n' "$*"; }
|
|
|
|
DO_PULL=1
|
|
DO_BUILD=1
|
|
DO_FRONTEND=1
|
|
RUN_NOW=0
|
|
TAIL_LOGS=0
|
|
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--no-pull) DO_PULL=0 ;;
|
|
--no-build) DO_BUILD=0 ;;
|
|
--no-frontend) DO_FRONTEND=0 ;;
|
|
--run-now) RUN_NOW=1 ;;
|
|
--logs) TAIL_LOGS=1 ;;
|
|
--help|-h)
|
|
sed -n '2,/^set/p' "$0" | grep -E '^# ' | sed 's/^# //'
|
|
exit 0
|
|
;;
|
|
*)
|
|
err "Unknown flag: $arg (try --help)"
|
|
exit 2
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# ---------- 1. Sanity ----------
|
|
|
|
[[ -f docker-compose.yml ]] || { err "docker-compose.yml not found in $REPO_ROOT"; exit 1; }
|
|
if [[ ! -f .env ]]; then
|
|
err ".env not found. Copy .env.example and fill it in:"
|
|
err " cp .env.example .env && \$EDITOR .env"
|
|
err "Required keys: GEMINI_API_KEY, POSTGRES_PASSWORD"
|
|
err "MSAL keys: AZURE_TENANT_ID, AZURE_CLIENT_ID, VITE_AZURE_* (or set DEV_AUTH_BYPASS=true)"
|
|
exit 1
|
|
fi
|
|
if [[ ! -f box_config.json ]]; then
|
|
err "box_config.json not found in $REPO_ROOT"
|
|
err "Drop the Box JWT config (from the Box Developer Console) in the repo"
|
|
err "root before deploying. It's bind-mounted into the tagger + api containers."
|
|
exit 1
|
|
fi
|
|
command -v docker >/dev/null 2>&1 || { err "docker not on PATH"; exit 1; }
|
|
command -v git >/dev/null 2>&1 || { err "git not on PATH"; exit 1; }
|
|
docker compose version >/dev/null 2>&1 || { err "docker compose v2 not available"; exit 1; }
|
|
|
|
# ---------- helpers ----------
|
|
|
|
port_in_use() {
|
|
local port=$1 pid=""
|
|
if command -v lsof >/dev/null 2>&1; then
|
|
pid=$( { lsof -nP -iTCP:"$port" -sTCP:LISTEN 2>/dev/null || true; } | awk 'NR>1 {print $2}' | head -1 )
|
|
else
|
|
pid=$( { ss -ltnp "sport = :$port" 2>/dev/null || true; } | awk -F'pid=' 'NR>1 {print $2}' | cut -d, -f1 | head -1 )
|
|
fi
|
|
[[ -n "$pid" ]]
|
|
}
|
|
|
|
find_free_port() {
|
|
local preferred=$1 start=$2 end=$3
|
|
if ! port_in_use "$preferred"; then printf '%s' "$preferred"; return 0; fi
|
|
local p
|
|
for ((p=start; p<=end; p++)); do
|
|
if ! port_in_use "$p"; then printf '%s' "$p"; return 0; fi
|
|
done
|
|
return 1
|
|
}
|
|
|
|
set_env_var() {
|
|
local key=$1 value=$2 file="${REPO_ROOT}/.env"
|
|
if grep -q "^${key}=" "$file" 2>/dev/null; then
|
|
sed -i.bak "s#^${key}=.*#${key}=${value}#" "$file"
|
|
rm -f "${file}.bak"
|
|
else
|
|
printf '%s=%s\n' "$key" "$value" >> "$file"
|
|
fi
|
|
}
|
|
|
|
get_env_var() {
|
|
grep -E "^${1}=" "${REPO_ROOT}/.env" 2>/dev/null | head -1 | cut -d= -f2- | tr -d '"' || true
|
|
}
|
|
|
|
# ---------- 2. Pick host ports ----------
|
|
|
|
DEFAULT_DB_PORT=5435
|
|
DEFAULT_API_PORT=8004
|
|
|
|
DB_PORT=$(get_env_var POSTGRES_HOST_PORT); DB_PORT=${DB_PORT:-$DEFAULT_DB_PORT}
|
|
API_PORT=$(get_env_var MARRIOTT_API_PORT); API_PORT=${API_PORT:-$DEFAULT_API_PORT}
|
|
PREV_API_PORT="$API_PORT"
|
|
|
|
log "Resolving host ports (preferred: db=$DB_PORT api=$API_PORT)…"
|
|
|
|
RUNNING=$(docker compose ps -q 2>/dev/null | wc -l | tr -d ' ')
|
|
if [[ "$RUNNING" -gt 0 ]]; then
|
|
ok "Project '$COMPOSE_PROJECT' already has $RUNNING containers — keeping current port assignment."
|
|
else
|
|
NEW_DB_PORT=$(find_free_port "$DB_PORT" 5435 5499) || NEW_DB_PORT=""
|
|
NEW_API_PORT=$(find_free_port "$API_PORT" 8003 8099) || NEW_API_PORT=""
|
|
if [[ -z "$NEW_DB_PORT" || -z "$NEW_API_PORT" ]]; then
|
|
err "Could not find a free port in the configured ranges."
|
|
err " db desired=$DB_PORT scanned=5435-5499"
|
|
err " api desired=$API_PORT scanned=8003-8099"
|
|
exit 1
|
|
fi
|
|
[[ "$NEW_DB_PORT" != "$DB_PORT" ]] && warn "db port $DB_PORT busy → using $NEW_DB_PORT"
|
|
[[ "$NEW_API_PORT" != "$API_PORT" ]] && warn "api port $API_PORT busy → using $NEW_API_PORT"
|
|
DB_PORT=$NEW_DB_PORT
|
|
API_PORT=$NEW_API_PORT
|
|
set_env_var POSTGRES_HOST_PORT "$DB_PORT"
|
|
set_env_var MARRIOTT_API_PORT "$API_PORT"
|
|
ok "Ports: db=$DB_PORT api=$API_PORT (persisted to .env)"
|
|
fi
|
|
|
|
# ---------- 3. Render apache include ----------
|
|
|
|
APACHE_TMPL="$REPO_ROOT/deploy/apache-marriott-tagging.conf.tmpl"
|
|
APACHE_CONF="$REPO_ROOT/deploy/apache-marriott-tagging.conf"
|
|
if [[ -f "$APACHE_TMPL" ]]; then
|
|
sed "s#__API_PORT__#${API_PORT}#g" "$APACHE_TMPL" > "$APACHE_CONF"
|
|
ok "Rendered apache-marriott-tagging.conf with api port $API_PORT"
|
|
else
|
|
warn "apache-marriott-tagging.conf.tmpl missing — leaving deploy/apache-marriott-tagging.conf untouched."
|
|
fi
|
|
|
|
# ---------- 4. git pull ----------
|
|
|
|
if (( DO_PULL )); then
|
|
log "git pull --ff-only origin main"
|
|
git pull --ff-only origin main
|
|
fi
|
|
|
|
# ---------- 5. Backend build + up ----------
|
|
|
|
if (( DO_BUILD )); then
|
|
log "docker compose build"
|
|
docker compose build
|
|
fi
|
|
|
|
log "docker compose up -d (db + api)"
|
|
docker compose up -d
|
|
|
|
# ---------- 6. Frontend build + sync ----------
|
|
|
|
if (( DO_FRONTEND )); then
|
|
if [[ ! -d "$REPO_ROOT/frontend" ]]; then
|
|
warn "frontend/ directory missing — skipping SPA build."
|
|
else
|
|
BYPASS=$(get_env_var VITE_DEV_AUTH_BYPASS); BYPASS=${BYPASS:-$(get_env_var DEV_AUTH_BYPASS)}
|
|
TENANT=$(get_env_var VITE_AZURE_TENANT_ID)
|
|
CLIENT=$(get_env_var VITE_AZURE_CLIENT_ID)
|
|
# VITE_PUBLIC_BASE in .env is for local `npm run dev` — on the server
|
|
# we always build for the prod URL. The `base` baked into the bundle
|
|
# determines where Apache looks for /<slug>/assets/... so getting this
|
|
# wrong gives a 404 on the JS bundle and a blank page.
|
|
PUBLIC_BASE="https://optical-dev.oliver.solutions${URL_PATH}"
|
|
|
|
log "Building Vite SPA in node:20 (bypass=${BYPASS:-false} tenant=${TENANT:+set} client=${CLIENT:+set})…"
|
|
docker run --rm \
|
|
-v "$REPO_ROOT/frontend:/app" \
|
|
-w /app \
|
|
-e VITE_DEV_AUTH_BYPASS="${BYPASS:-false}" \
|
|
-e VITE_AZURE_TENANT_ID="$TENANT" \
|
|
-e VITE_AZURE_CLIENT_ID="$CLIENT" \
|
|
-e VITE_PUBLIC_BASE="$PUBLIC_BASE" \
|
|
node:20-alpine \
|
|
sh -c "npm install --silent && npm run build"
|
|
|
|
if [[ ! -d "$REPO_ROOT/frontend/dist" ]]; then
|
|
err "Vite build did not produce frontend/dist — aborting frontend sync."
|
|
exit 1
|
|
fi
|
|
|
|
log "Syncing frontend/dist/ → $WEB_ROOT/"
|
|
if [[ ! -d "$WEB_ROOT" ]]; then
|
|
if command -v sudo >/dev/null 2>&1; then sudo mkdir -p "$WEB_ROOT"
|
|
else mkdir -p "$WEB_ROOT"; fi
|
|
fi
|
|
if command -v rsync >/dev/null 2>&1; then
|
|
if [[ -w "$WEB_ROOT" ]]; then
|
|
rsync -a --delete "$REPO_ROOT/frontend/dist/" "$WEB_ROOT/"
|
|
else
|
|
sudo rsync -a --delete "$REPO_ROOT/frontend/dist/" "$WEB_ROOT/"
|
|
fi
|
|
else
|
|
if [[ -w "$WEB_ROOT" ]]; then
|
|
rm -rf "$WEB_ROOT"/*
|
|
cp -a "$REPO_ROOT/frontend/dist/." "$WEB_ROOT/"
|
|
else
|
|
sudo rm -rf "$WEB_ROOT"/*
|
|
sudo cp -a "$REPO_ROOT/frontend/dist/." "$WEB_ROOT/"
|
|
fi
|
|
fi
|
|
ok "SPA synced to $WEB_ROOT"
|
|
fi
|
|
fi
|
|
|
|
# ---------- 7. Readiness ----------
|
|
|
|
log "Waiting for API /api/health on :$API_PORT (max 60s)…"
|
|
for i in $(seq 1 30); do
|
|
if curl -fsS "http://127.0.0.1:${API_PORT}/api/health" >/dev/null 2>&1; then
|
|
ok "API healthy"
|
|
break
|
|
fi
|
|
sleep 2
|
|
if (( i == 30 )); then
|
|
err "API did not become healthy within 60s. Recent logs:"
|
|
docker compose logs api --tail 60 || true
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
API_STATE=$(docker compose ps api --format '{{.State}}' 2>/dev/null | head -1)
|
|
if [[ "$API_STATE" != "running" ]]; then
|
|
err "API container is not running (state=${API_STATE:-unknown}). Recent logs:"
|
|
docker compose logs api --tail 60 || true
|
|
exit 1
|
|
fi
|
|
ok "API container running (manual-only mode — no scheduler container)"
|
|
|
|
# ---------- 8. Optional: trigger an immediate pass via the API ----------
|
|
|
|
if (( RUN_NOW )); then
|
|
log "Firing a tagging pass via /api/runs…"
|
|
# With DEV_AUTH_BYPASS=true the call works unauthenticated. Otherwise
|
|
# a real token is required; we just print the curl command in that case.
|
|
BYPASS_NOW=$(get_env_var DEV_AUTH_BYPASS); BYPASS_NOW=$(echo "${BYPASS_NOW:-true}" | tr '[:upper:]' '[:lower:]')
|
|
if [[ "$BYPASS_NOW" == "true" || "$BYPASS_NOW" == "1" || "$BYPASS_NOW" == "yes" ]]; then
|
|
curl -fsS -X POST "http://127.0.0.1:${API_PORT}/api/runs" || \
|
|
warn "POST /api/runs failed — see api logs."
|
|
echo
|
|
else
|
|
warn "DEV_AUTH_BYPASS != true → can't auth from a shell. Run a pass from the SPA, or:"
|
|
warn " docker compose exec tagger python main.py"
|
|
fi
|
|
fi
|
|
|
|
# ---------- 9. Report ----------
|
|
|
|
POSTGRES_USER_VAL=$(get_env_var POSTGRES_USER); POSTGRES_USER_VAL=${POSTGRES_USER_VAL:-marriott}
|
|
POSTGRES_DB_VAL=$(get_env_var POSTGRES_DB); POSTGRES_DB_VAL=${POSTGRES_DB_VAL:-marriott_tagging}
|
|
SCHED=$(get_env_var SCHEDULE_CRON); SCHED=${SCHED:-0 2 * * *}
|
|
TZ_VAL=$(get_env_var TZ); TZ_VAL=${TZ_VAL:-system}
|
|
|
|
ok "Deploy complete."
|
|
echo
|
|
echo " Compose project: $COMPOSE_PROJECT"
|
|
echo " API (local): http://127.0.0.1:${API_PORT}/api/health"
|
|
echo " Public URL: https://optical-dev.oliver.solutions${URL_PATH}/"
|
|
echo " SPA on disk: $WEB_ROOT"
|
|
echo " Postgres (local): 127.0.0.1:${DB_PORT} (db=$POSTGRES_DB_VAL user=$POSTGRES_USER_VAL)"
|
|
echo " Schedule (cron): '$SCHED' (TZ=$TZ_VAL)"
|
|
echo " Persisted volume: marriott-tagging_pgdata"
|
|
echo
|
|
echo " Apache include line for the merged vhost:"
|
|
echo " Include $REPO_ROOT/deploy/apache-marriott-tagging.conf"
|
|
|
|
NEEDS_RELOAD=0
|
|
if [[ "$API_PORT" != "$PREV_API_PORT" ]]; then NEEDS_RELOAD=1; fi
|
|
if ! grep -qF "$REPO_ROOT/deploy/apache-marriott-tagging.conf" /etc/apache2/sites-enabled/*.conf 2>/dev/null; then
|
|
NEEDS_RELOAD=1
|
|
fi
|
|
if (( NEEDS_RELOAD )); then
|
|
echo
|
|
warn "API port changed (or first deploy). After adding the Include line:"
|
|
echo " sudo apachectl configtest && sudo systemctl reload apache2"
|
|
fi
|
|
echo
|
|
|
|
if (( TAIL_LOGS )); then
|
|
log "Tailing api logs (Ctrl-C to stop)…"
|
|
docker compose logs -f api
|
|
fi
|