Dockerize, add Postgres request log, FastAPI + React SPA
Run model: long-running scheduler container (APScheduler) replacing the
systemd timer in Docker deployments. Every Gemini-analysed file is also
persisted to a Postgres `tagging_events` table (run_id, prompt, raw
response, validated metadata, Box-write outcomes, status, error, timing)
for search and audit. Box is still updated exactly as before and remains
the source of truth for "already tagged" — `db.log_event` swallows DB
failures so an outage can't stop a tagging pass.
Backend:
- `db.py` + `schema.sql` — append-only `tagging_events` with indexes on
run_id, file_id, created_at.
- `scheduler.py` — APScheduler BlockingScheduler with `SCHEDULE_CRON`
(default daily 02:00), `RUN_AT_STARTUP`, SIGTERM handling.
- `api.py` (FastAPI) — `/api/health`, `/api/me`, `/api/events?q=…`
(single-input search across file_name, folder_path, description,
status, file_id, validated_metadata::text, raw_response::text,
scenes::text), `POST /api/runs` (fire-and-forget pass in a background
thread), `/api/runs`, `/api/runs/{id}/events`. Every event response
carries a synthesised `box_url`.
- `auth.py` — Azure AD bearer-token validation against the tenant JWKS
(signature + aud + iss). `DEV_AUTH_BYPASS=true` short-circuits to a
configurable dev user, mirrored on the frontend by
`VITE_DEV_AUTH_BYPASS`.
Frontend (Vite + React + TS):
- `frontend/` SPA, Montserrat + black/white/#FFC407 palette.
- @azure/msal-react with the bypass switch (auto-signin when bypass off).
- Search bar across all logged fields, results list with metadata tags,
status pills, and "Open in Box ↗" links.
- "Run now" button kicks off a tagging pass via `POST /api/runs` and
polls `/api/runs/{id}/events` every 2 s for live progress.
Docker / compose:
- `docker-compose.yml` pins `name: marriott-tagging`. Three services:
`db` (postgres:16, named volume, bound to 127.0.0.1 only), `tagger`
(scheduler.py), `api` (uvicorn). Same image, different `command`.
- `Dockerfile` — python:3.12-slim, non-root user.
Deploy (optical-dev.oliver.solutions):
- `deploy/deploy.sh` — idempotent. Auto-picks free host ports
(POSTGRES_HOST_PORT 5435-5499, MARRIOTT_API_PORT 8003-8099), renders
`apache-marriott-tagging.conf` from the .tmpl, builds the SPA in a
one-shot node:20-alpine container, rsyncs `dist/` to
`/var/www/html/marriott-tagging/`, polls `/api/health`, and prints the
shared-vhost Include line.
- `apache-marriott-tagging.conf.tmpl` — proxy `/marriott-tagging/api/`
to the API container, alias `/marriott-tagging` to the SPA web-root,
SPA fallback to `index.html`.
systemd unit files left in place for the existing Ubuntu deployment
path; do not run both on the same host (would double-fire the tagger).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
010a3955a8
commit
99e978b895
26 changed files with 4151 additions and 4 deletions
20
.dockerignore
Normal file
20
.dockerignore
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
.git
|
||||
.gitignore
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
env/
|
||||
venv/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.py[cod]
|
||||
.DS_Store
|
||||
README.md
|
||||
CLAUDE.md
|
||||
box_config.json
|
||||
marriott-tagger.service
|
||||
marriott-tagger.timer
|
||||
# Frontend has its own build pipeline (one-shot node container in deploy.sh);
|
||||
# don't bake it into the Python image.
|
||||
frontend/
|
||||
deploy/
|
||||
51
.env.example
Normal file
51
.env.example
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Copy to `.env` and fill in. `.env` is gitignored.
|
||||
|
||||
# ── Gemini ─────────────────────────────────────────────────────────────────────
|
||||
GEMINI_API_KEY=your_gemini_api_key_here
|
||||
|
||||
# ── Postgres ───────────────────────────────────────────────────────────────────
|
||||
POSTGRES_USER=marriott
|
||||
POSTGRES_PASSWORD=change_me
|
||||
POSTGRES_DB=marriott_tagging
|
||||
# Host port to publish Postgres on (bound to 127.0.0.1 only — never reachable
|
||||
# from outside the host). Change if 5432 is already taken locally. On the dev
|
||||
# server `deploy/deploy.sh` auto-picks a free port from 5435-5499 and persists
|
||||
# the choice back to .env.
|
||||
POSTGRES_HOST_PORT=5432
|
||||
|
||||
# ── FastAPI backend ────────────────────────────────────────────────────────────
|
||||
# Host port for the API (bound to 127.0.0.1; Apache reverse-proxies to it).
|
||||
# On the dev server `deploy/deploy.sh` auto-picks from 8003-8099 and persists.
|
||||
MARRIOTT_API_PORT=8004
|
||||
# Comma-separated origins for CORS — only needed when running Vite dev server
|
||||
# against a Dockerised API. Leave empty in production (Apache same-origin).
|
||||
# Example: http://localhost:5173
|
||||
CORS_ORIGINS=
|
||||
|
||||
# ── Auth (Azure AD / Entra ID) ────────────────────────────────────────────────
|
||||
# Master switch. true = skip MSAL entirely, every request is the DEV_AUTH_* user.
|
||||
# Set to false once tenant/client IDs are filled in to enforce SSO.
|
||||
DEV_AUTH_BYPASS=true
|
||||
DEV_AUTH_NAME=Dev User
|
||||
DEV_AUTH_EMAIL=dev@oliver.agency
|
||||
# Fill from an existing Oliver Azure AD app registration. The same client ID
|
||||
# goes in both the backend (token validation) and the frontend (MSAL login).
|
||||
AZURE_TENANT_ID=
|
||||
AZURE_CLIENT_ID=
|
||||
# Frontend mirrors — Vite reads VITE_* at build time and bakes them into dist.
|
||||
# Keep these in sync with the values above.
|
||||
VITE_DEV_AUTH_BYPASS=true
|
||||
VITE_AZURE_TENANT_ID=
|
||||
VITE_AZURE_CLIENT_ID=
|
||||
# Public base URL for the SPA — used as the MSAL redirect URI root.
|
||||
# Local dev: http://localhost:5173 ; prod: https://optical-dev.oliver.solutions/marriott-tagging
|
||||
VITE_PUBLIC_BASE=http://localhost:5173
|
||||
|
||||
# ── Scheduler ──────────────────────────────────────────────────────────────────
|
||||
# Cron expression for the recurring tagging pass. Default = daily at 02:00.
|
||||
SCHEDULE_CRON=0 2 * * *
|
||||
# Set to 1 to also fire one pass immediately when the container starts (useful
|
||||
# for verification / first run after a redeploy).
|
||||
RUN_AT_STARTUP=0
|
||||
# Timezone the cron expression is interpreted in (e.g. Europe/London, UTC).
|
||||
TZ=UTC
|
||||
15
.gitignore
vendored
15
.gitignore
vendored
|
|
@ -1,8 +1,10 @@
|
|||
# ── Project-specific (security-critical, do NOT commit) ──────────────────────
|
||||
# Box JWT keypair + client secrets
|
||||
box_config.json
|
||||
# Gemini API key
|
||||
# Gemini API key + DB creds + scheduler config
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
# Local virtualenv
|
||||
env/
|
||||
venv/
|
||||
|
|
@ -10,6 +12,17 @@ venv/
|
|||
__pycache__/
|
||||
*.pyc
|
||||
*.py[cod]
|
||||
# Docker / Postgres bind-mount data (if anyone switches off the named volume)
|
||||
data/
|
||||
pgdata/
|
||||
|
||||
# Generated by deploy.sh — rebuilt from .tmpl every deploy
|
||||
deploy/apache-marriott-tagging.conf
|
||||
|
||||
# Frontend build artefacts
|
||||
frontend/node_modules/
|
||||
frontend/dist/
|
||||
frontend/.vite/
|
||||
|
||||
# ── Bitbucket boilerplate ────────────────────────────────────────────────────
|
||||
# Node artifact files
|
||||
|
|
|
|||
23
Dockerfile
Normal file
23
Dockerfile
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
FROM python:3.12-slim
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# System deps: tzdata so cron schedules respect $TZ; ca-certificates for SSL.
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends ca-certificates tzdata \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
RUN pip install -r /app/requirements.txt
|
||||
|
||||
COPY main.py db.py scheduler.py api.py auth.py schema.sql /app/
|
||||
|
||||
RUN useradd --create-home --uid 10001 appuser \
|
||||
&& chown -R appuser:appuser /app
|
||||
USER appuser
|
||||
|
||||
CMD ["python", "-u", "scheduler.py"]
|
||||
172
README.md
172
README.md
|
|
@ -2,6 +2,15 @@
|
|||
|
||||
Batch-processes images **and videos** in a Box folder, analyzes them with Gemini AI, and writes structured metadata back to Box using the `marriottUsa` metadata template. Videos use Box's 480p MP4 proxy representations to keep bandwidth and Gemini token usage manageable.
|
||||
|
||||
Every Gemini-analysed file is also written to a Postgres `tagging_events` table for search/audit, and there's a small React SPA on top (search across all logged fields, click through to the original Box file, trigger an on-demand tagging pass).
|
||||
|
||||
## Components
|
||||
|
||||
- **`scheduler.py` (tagger container)** — APScheduler that fires `main.main()` on a cron (default daily 02:00).
|
||||
- **`api.py` (api container, FastAPI)** — search, list runs, kick off ad-hoc runs in a background thread.
|
||||
- **`db.py` + `schema.sql`** — Postgres logging layer.
|
||||
- **`frontend/` (Vite + React + TS)** — single-page UI, served by Apache from `/var/www/html/marriott-tagging/` in prod. Auth via `@azure/msal-react` with a `VITE_DEV_AUTH_BYPASS` switch.
|
||||
|
||||
## Setup
|
||||
|
||||
### 1. Clone and create virtual environment
|
||||
|
|
@ -47,6 +56,169 @@ The script will:
|
|||
6. For each video: fetch the 480p MP4 proxy from Box, analyze with Gemini, write metadata + description + a scene-breakdown comment to Box
|
||||
7. Print a summary of results
|
||||
|
||||
## Run with Docker
|
||||
|
||||
Brings up Postgres, the scheduler (`tagger`), and the FastAPI backend (`api`). The frontend is built separately by `deploy/deploy.sh` (or `npm run dev` locally) and consumed by the API.
|
||||
|
||||
### 1. Prereqs
|
||||
|
||||
- Docker Desktop (or Docker Engine + Compose v2)
|
||||
- `box_config.json` in the project root
|
||||
- A `.env` copied from `.env.example`, filled in
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
# minimum: GEMINI_API_KEY, POSTGRES_PASSWORD
|
||||
# leave DEV_AUTH_BYPASS=true for now if you don't have Azure IDs ready
|
||||
```
|
||||
|
||||
### 2. Build and start the backend services
|
||||
|
||||
```bash
|
||||
docker compose up --build -d
|
||||
```
|
||||
|
||||
This brings up three services:
|
||||
- `db` — Postgres 16, named volume `pgdata`, port bound to `127.0.0.1:${POSTGRES_HOST_PORT:-5432}`.
|
||||
- `tagger` — runs `scheduler.py` (cron-driven Gemini passes).
|
||||
- `api` — runs `uvicorn api:app` on container port 8000, published to `127.0.0.1:${MARRIOTT_API_PORT:-8004}`.
|
||||
|
||||
### 3. Run the frontend (local dev)
|
||||
|
||||
```bash
|
||||
cd frontend
|
||||
npm install
|
||||
npm run dev # http://localhost:5173
|
||||
```
|
||||
|
||||
Vite proxies `/api/*` to the FastAPI host port (default `8004`). With `VITE_DEV_AUTH_BYPASS=true` you'll be auto-signed-in as the dev user.
|
||||
|
||||
### 4. Fire a tagging pass
|
||||
|
||||
Three ways:
|
||||
- **UI** — open the SPA and click **Run now**. Polls live until done.
|
||||
- **API** — `curl -X POST http://127.0.0.1:8004/api/runs` (DEV_AUTH_BYPASS=true) or with a Bearer token in prod.
|
||||
- **Container** — `docker compose exec tagger python main.py` (bypasses the API entirely).
|
||||
|
||||
### 5. Inspect the DB
|
||||
|
||||
```bash
|
||||
docker compose exec db psql -U marriott marriott_tagging -c '\d tagging_events'
|
||||
|
||||
docker compose exec db psql -U marriott marriott_tagging -c \
|
||||
"SELECT status, count(*) FROM tagging_events GROUP BY status;"
|
||||
```
|
||||
|
||||
### Auth: enabling Azure AD SSO
|
||||
|
||||
1. Register (or reuse) an Azure AD app. Redirect URI:
|
||||
- Local dev: `http://localhost:5173`
|
||||
- Prod: `https://optical-dev.oliver.solutions/marriott-tagging/`
|
||||
2. Expose an API with scope `access_as_user` whose audience is the same client ID.
|
||||
3. Fill `.env`:
|
||||
```
|
||||
DEV_AUTH_BYPASS=false
|
||||
AZURE_TENANT_ID=...
|
||||
AZURE_CLIENT_ID=...
|
||||
VITE_DEV_AUTH_BYPASS=false
|
||||
VITE_AZURE_TENANT_ID=...
|
||||
VITE_AZURE_CLIENT_ID=...
|
||||
```
|
||||
4. `docker compose up -d --force-recreate api` and rebuild the SPA (`deploy.sh` does this on the server; locally `cd frontend && npm run build`).
|
||||
|
||||
Backend validates JWT signature against the tenant's JWKS, checks `aud == AZURE_CLIENT_ID` and `iss` matches one of the tenant URLs. With bypass=true, every request is logged as the `DEV_AUTH_EMAIL` user.
|
||||
|
||||
### Stop / tear down
|
||||
|
||||
```bash
|
||||
docker compose down # stops containers, keeps the DB volume
|
||||
docker compose down -v # also deletes the DB volume (destroys data)
|
||||
```
|
||||
|
||||
### Notes
|
||||
|
||||
- Postgres failures never stop the tagger — `db.log_event` swallows errors. Box is the source of truth for "already tagged".
|
||||
- The `marriott-tagger.service` / `.timer` files below remain for the older systemd deployment path; the Docker path is the recommended one. Don't run both on the same host.
|
||||
|
||||
## Server Deployment (Docker — optical-dev.oliver.solutions)
|
||||
|
||||
This is the recommended path on the shared `optical-dev.oliver.solutions` dev server. Apps live under `/opt/<slug>/` with an idempotent `deploy/deploy.sh`. Mirrors the OSOP / adeo split-build pattern: backend in Docker, SPA built and served by Apache from `/var/www/html/marriott-tagging/`.
|
||||
|
||||
**Public URL:** `https://optical-dev.oliver.solutions/marriott-tagging/`
|
||||
|
||||
### First-time setup
|
||||
|
||||
```bash
|
||||
# 1. Clone into /opt
|
||||
sudo git clone git@bitbucket.org:zlalani/marriott-box-image-video-tagging.git \
|
||||
/opt/marriott-box-image-video-tagging
|
||||
sudo chown -R "$USER:$USER" /opt/marriott-box-image-video-tagging
|
||||
cd /opt/marriott-box-image-video-tagging
|
||||
|
||||
# 2. Drop credentials in place (NOT in git)
|
||||
cp .env.example .env
|
||||
$EDITOR .env # GEMINI_API_KEY, POSTGRES_PASSWORD,
|
||||
# Azure IDs (or DEV_AUTH_BYPASS=true)
|
||||
$EDITOR box_config.json # paste the Box JWT config
|
||||
|
||||
# 3. Deploy
|
||||
./deploy/deploy.sh
|
||||
```
|
||||
|
||||
The script will:
|
||||
- Sanity-check `.env`, `box_config.json`, docker, git, compose v2.
|
||||
- Pick free host ports — Postgres (default 5435, range 5435-5499) and API (default 8004, range 8003-8099) — persisted to `.env`.
|
||||
- Render `deploy/apache-marriott-tagging.conf` from `.tmpl` with the picked API port.
|
||||
- `git pull --ff-only`, `docker compose build`, `docker compose up -d` (db + tagger + api).
|
||||
- Build the Vite SPA in a one-shot `node:20` container; rsync `frontend/dist/` to `/var/www/html/marriott-tagging/`.
|
||||
- Poll `/api/health` until ready and verify the tagger container is running.
|
||||
- Print the Apache `Include` line you need to add to the shared vhost.
|
||||
|
||||
**One-time vhost step (manual):**
|
||||
Edit `/etc/apache2/sites-enabled/optical-dev.oliver.solutions.conf` and add **inside** `</VirtualHost>`:
|
||||
```apache
|
||||
Include /opt/marriott-box-image-video-tagging/deploy/apache-marriott-tagging.conf
|
||||
```
|
||||
Then:
|
||||
```bash
|
||||
sudo apachectl configtest && sudo systemctl reload apache2
|
||||
```
|
||||
|
||||
### Re-deploying
|
||||
|
||||
```bash
|
||||
cd /opt/marriott-box-image-video-tagging
|
||||
./deploy/deploy.sh
|
||||
```
|
||||
|
||||
Flags:
|
||||
- `--no-pull` skip `git pull`
|
||||
- `--no-build` skip `docker compose build`
|
||||
- `--no-frontend` skip Vite build + SPA sync
|
||||
- `--run-now` also fire a tagging pass via `/api/runs` (works with DEV_AUTH_BYPASS=true)
|
||||
- `--logs` tail scheduler logs after deploy
|
||||
|
||||
### Verifying it ran
|
||||
|
||||
```bash
|
||||
# Scheduler logs (next cron-fired pass is at SCHEDULE_CRON; default 02:00 UTC)
|
||||
docker compose logs -f tagger
|
||||
|
||||
# API logs
|
||||
docker compose logs -f api
|
||||
|
||||
# Postgres request log
|
||||
docker compose exec db psql -U marriott marriott_tagging -c \
|
||||
"SELECT status, count(*) FROM tagging_events GROUP BY status;"
|
||||
```
|
||||
|
||||
Postgres is bound to `127.0.0.1` only — not reachable from outside the server. To inspect from your laptop, tunnel: `ssh -L 55432:127.0.0.1:<POSTGRES_HOST_PORT> user@optical-dev.oliver.solutions`, then `psql postgresql://marriott:***@127.0.0.1:55432/marriott_tagging`.
|
||||
|
||||
### Notes
|
||||
|
||||
- The Docker deploy and the `systemd` deploy below target the same `/opt/marriott-box-image-video-tagging/` directory. Pick one on any given server — don't run both, they'll both fire the tagger and double-write to Box.
|
||||
- The SPA build bakes `VITE_AZURE_*` and `VITE_DEV_AUTH_BYPASS` into the bundle. Flipping the bypass requires a re-build (`./deploy/deploy.sh` does this).
|
||||
|
||||
## Server Deployment (systemd, Ubuntu)
|
||||
|
||||
The repo includes `marriott-tagger.service` and `marriott-tagger.timer` for running the tagger as a scheduled service. These steps are written for **Ubuntu 22.04 / 24.04** but should work on any systemd-based distribution with minor path tweaks (e.g. `/sbin/nologin` instead of `/usr/sbin/nologin` on Red Hat-family).
|
||||
|
|
|
|||
245
api.py
Normal file
245
api.py
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
"""
|
||||
Marriott Box Tagger — FastAPI backend.
|
||||
|
||||
Endpoints (all under /api/, all behind require_auth except /api/health):
|
||||
GET /api/health — liveness + config flags
|
||||
GET /api/me — who am I (after auth)
|
||||
GET /api/events?q=…&limit=… — search tagging_events across all
|
||||
text + JSONB fields
|
||||
POST /api/runs — kick off a tagging pass in a
|
||||
background thread; returns run_id
|
||||
GET /api/runs — recent runs (run_id + counts)
|
||||
GET /api/runs/{run_id}/events — events for a single run, newest first
|
||||
"""
|
||||
|
||||
import os
|
||||
import threading
|
||||
import uuid
|
||||
from contextlib import contextmanager
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import Depends, FastAPI, HTTPException, Query
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from psycopg.rows import dict_row
|
||||
|
||||
import db
|
||||
from auth import User, maybe_auth_info, require_auth
|
||||
|
||||
BOX_FILE_URL = "https://app.box.com/file/{file_id}"
|
||||
|
||||
app = FastAPI(title="Marriott Box Tagger API", version="1.0.0")
|
||||
|
||||
# CORS: only meaningful in dev (when the Vite dev server hits FastAPI cross-origin).
|
||||
# In prod, Apache serves both SPA and API under the same origin.
|
||||
_cors_origins = [o.strip() for o in os.getenv("CORS_ORIGINS", "").split(",") if o.strip()]
|
||||
if _cors_origins:
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=_cors_origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# In-memory record of background runs (run_id → state). Survives only as long as the
|
||||
# api container; durable record of what each run produced is in tagging_events.
|
||||
_runs: dict[str, dict] = {}
|
||||
_runs_lock = threading.Lock()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _conn():
|
||||
c = db.get_conn()
|
||||
try:
|
||||
yield c
|
||||
finally:
|
||||
c.close()
|
||||
|
||||
|
||||
# ── Health / identity ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@app.get("/api/health")
|
||||
def health():
|
||||
db_ok = False
|
||||
db_error = None
|
||||
try:
|
||||
with _conn() as c:
|
||||
with c.cursor() as cur:
|
||||
cur.execute("SELECT 1")
|
||||
cur.fetchone()
|
||||
db_ok = True
|
||||
except Exception as e:
|
||||
db_error = f"{type(e).__name__}: {e}"
|
||||
return {
|
||||
"ok": True,
|
||||
"db": {"ok": db_ok, "error": db_error},
|
||||
"auth": maybe_auth_info(),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/me")
|
||||
def me(user: User = Depends(require_auth)):
|
||||
return user.to_dict()
|
||||
|
||||
|
||||
# ── Search ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
SEARCH_SQL = """
|
||||
SELECT id, run_id, created_at, file_id, file_name, folder_path, media_type,
|
||||
gemini_model, description, scenes, validated_metadata, raw_response,
|
||||
metadata_write_success, description_write_success, scene_comment_write_success,
|
||||
status, error_message, duration_ms
|
||||
FROM tagging_events
|
||||
WHERE
|
||||
(%(q)s = '' OR (
|
||||
file_name ILIKE %(like)s
|
||||
OR folder_path ILIKE %(like)s
|
||||
OR description ILIKE %(like)s
|
||||
OR status ILIKE %(like)s
|
||||
OR file_id ILIKE %(like)s
|
||||
OR coalesce(validated_metadata::text, '') ILIKE %(like)s
|
||||
OR coalesce(raw_response::text, '') ILIKE %(like)s
|
||||
OR coalesce(scenes::text, '') ILIKE %(like)s
|
||||
))
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
|
||||
def _event_to_dict(row):
|
||||
out = dict(row)
|
||||
fid = out.get("file_id")
|
||||
out["box_url"] = BOX_FILE_URL.format(file_id=fid) if fid else None
|
||||
if out.get("run_id") is not None:
|
||||
out["run_id"] = str(out["run_id"])
|
||||
if out.get("created_at") is not None:
|
||||
out["created_at"] = out["created_at"].isoformat()
|
||||
return out
|
||||
|
||||
|
||||
@app.get("/api/events")
|
||||
def search_events(
|
||||
q: str = Query("", description="Free-text search across all fields"),
|
||||
limit: int = Query(100, ge=1, le=500),
|
||||
user: User = Depends(require_auth),
|
||||
):
|
||||
like = f"%{q}%"
|
||||
with _conn() as c:
|
||||
with c.cursor(row_factory=dict_row) as cur:
|
||||
cur.execute(SEARCH_SQL, {"q": q, "like": like, "limit": limit})
|
||||
rows = cur.fetchall()
|
||||
return {"q": q, "count": len(rows), "results": [_event_to_dict(r) for r in rows]}
|
||||
|
||||
|
||||
# ── Run-now ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _run_pass_in_thread(run_id: uuid.UUID):
|
||||
"""Background worker: open a fresh DB conn and call into the tagger pipeline."""
|
||||
# Import inside the thread so we don't pay tagger-side init cost at API startup.
|
||||
import main as tagger
|
||||
|
||||
with _runs_lock:
|
||||
_runs[str(run_id)] = {"run_id": str(run_id), "state": "running", "error": None}
|
||||
|
||||
db_conn = None
|
||||
try:
|
||||
db_conn = db.get_conn()
|
||||
db.ensure_schema(db_conn)
|
||||
tagger._run_pass(run_id, db_conn)
|
||||
with _runs_lock:
|
||||
_runs[str(run_id)]["state"] = "completed"
|
||||
except SystemExit as e:
|
||||
with _runs_lock:
|
||||
_runs[str(run_id)]["state"] = "failed"
|
||||
_runs[str(run_id)]["error"] = f"SystemExit({e.code})"
|
||||
except Exception as e:
|
||||
with _runs_lock:
|
||||
_runs[str(run_id)]["state"] = "failed"
|
||||
_runs[str(run_id)]["error"] = f"{type(e).__name__}: {e}"
|
||||
finally:
|
||||
if db_conn is not None:
|
||||
try:
|
||||
db_conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@app.post("/api/runs")
|
||||
def start_run(user: User = Depends(require_auth)):
|
||||
run_id = uuid.uuid4()
|
||||
t = threading.Thread(target=_run_pass_in_thread, args=(run_id,), daemon=True)
|
||||
t.start()
|
||||
return {"run_id": str(run_id), "state": "running", "started_by": user.email or user.oid}
|
||||
|
||||
|
||||
@app.get("/api/runs")
|
||||
def list_runs(user: User = Depends(require_auth), limit: int = Query(20, ge=1, le=100)):
|
||||
"""Recent runs in the DB, plus the in-memory state if the run is still active."""
|
||||
with _conn() as c:
|
||||
with c.cursor(row_factory=dict_row) as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT run_id,
|
||||
min(created_at) AS started_at,
|
||||
max(created_at) AS last_event_at,
|
||||
count(*) AS events,
|
||||
count(*) FILTER (WHERE status = 'success') AS successes,
|
||||
count(*) FILTER (WHERE status LIKE '%%_error') AS errors
|
||||
FROM tagging_events
|
||||
GROUP BY run_id
|
||||
ORDER BY max(created_at) DESC
|
||||
LIMIT %s
|
||||
""",
|
||||
(limit,),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
out = []
|
||||
for r in rows:
|
||||
rid = str(r["run_id"])
|
||||
live = _runs.get(rid)
|
||||
out.append({
|
||||
"run_id": rid,
|
||||
"started_at": r["started_at"].isoformat() if r["started_at"] else None,
|
||||
"last_event_at": r["last_event_at"].isoformat() if r["last_event_at"] else None,
|
||||
"events": r["events"],
|
||||
"successes": r["successes"],
|
||||
"errors": r["errors"],
|
||||
"live_state": live["state"] if live else None,
|
||||
"live_error": live["error"] if live else None,
|
||||
})
|
||||
return {"runs": out}
|
||||
|
||||
|
||||
@app.get("/api/runs/{run_id}/events")
|
||||
def run_events(run_id: str, user: User = Depends(require_auth), limit: int = Query(500, ge=1, le=2000)):
|
||||
try:
|
||||
uuid.UUID(run_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="run_id must be a UUID")
|
||||
with _conn() as c:
|
||||
with c.cursor(row_factory=dict_row) as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id, run_id, created_at, file_id, file_name, folder_path, media_type,
|
||||
gemini_model, description, scenes, validated_metadata,
|
||||
metadata_write_success, description_write_success,
|
||||
scene_comment_write_success, status, error_message, duration_ms
|
||||
FROM tagging_events
|
||||
WHERE run_id = %s
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %s
|
||||
""",
|
||||
(run_id, limit),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
live = _runs.get(run_id)
|
||||
return {
|
||||
"run_id": run_id,
|
||||
"live_state": live["state"] if live else None,
|
||||
"live_error": live["error"] if live else None,
|
||||
"count": len(rows),
|
||||
"events": [_event_to_dict(r) for r in rows],
|
||||
}
|
||||
110
auth.py
Normal file
110
auth.py
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
"""
|
||||
Azure AD (Entra ID) bearer-token auth for the FastAPI backend.
|
||||
|
||||
- DEV_AUTH_BYPASS=true → skip all validation, return a fixed dev user.
|
||||
- Otherwise: extract Bearer token, fetch the tenant's JWKS once and cache it,
|
||||
verify the JWT signature, and check `aud` matches AZURE_CLIENT_ID.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
import jwt
|
||||
from fastapi import Depends, Header, HTTPException, status
|
||||
from jwt import PyJWKClient
|
||||
|
||||
AZURE_TENANT_ID = os.getenv("AZURE_TENANT_ID", "").strip()
|
||||
AZURE_CLIENT_ID = os.getenv("AZURE_CLIENT_ID", "").strip()
|
||||
DEV_AUTH_BYPASS = os.getenv("DEV_AUTH_BYPASS", "").strip().lower() in ("1", "true", "yes")
|
||||
|
||||
JWKS_URL = f"https://login.microsoftonline.com/{AZURE_TENANT_ID}/discovery/v2.0/keys" if AZURE_TENANT_ID else None
|
||||
ISSUERS = (
|
||||
f"https://login.microsoftonline.com/{AZURE_TENANT_ID}/v2.0",
|
||||
f"https://sts.windows.net/{AZURE_TENANT_ID}/",
|
||||
)
|
||||
|
||||
_jwks_client: Optional[PyJWKClient] = None
|
||||
|
||||
|
||||
def _get_jwks_client() -> PyJWKClient:
|
||||
global _jwks_client
|
||||
if _jwks_client is None:
|
||||
if not JWKS_URL:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="AZURE_TENANT_ID not configured on the server",
|
||||
)
|
||||
_jwks_client = PyJWKClient(JWKS_URL)
|
||||
return _jwks_client
|
||||
|
||||
|
||||
class User:
|
||||
def __init__(self, *, oid: str, name: str, email: str, dev: bool = False):
|
||||
self.oid = oid
|
||||
self.name = name
|
||||
self.email = email
|
||||
self.dev = dev
|
||||
|
||||
def to_dict(self):
|
||||
return {"oid": self.oid, "name": self.name, "email": self.email, "dev": self.dev}
|
||||
|
||||
|
||||
def _bypass_user() -> User:
|
||||
return User(
|
||||
oid="dev-bypass",
|
||||
name=os.getenv("DEV_AUTH_NAME", "Dev User"),
|
||||
email=os.getenv("DEV_AUTH_EMAIL", "dev@oliver.agency"),
|
||||
dev=True,
|
||||
)
|
||||
|
||||
|
||||
def require_auth(authorization: Optional[str] = Header(default=None)) -> User:
|
||||
"""
|
||||
FastAPI dependency. Validates the Bearer token and returns a User, or
|
||||
raises 401. Honors DEV_AUTH_BYPASS for local/dev use.
|
||||
"""
|
||||
if DEV_AUTH_BYPASS:
|
||||
return _bypass_user()
|
||||
|
||||
if not authorization or not authorization.lower().startswith("bearer "):
|
||||
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing bearer token")
|
||||
|
||||
token = authorization.split(" ", 1)[1].strip()
|
||||
if not AZURE_TENANT_ID or not AZURE_CLIENT_ID:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Server missing AZURE_TENANT_ID / AZURE_CLIENT_ID",
|
||||
)
|
||||
|
||||
try:
|
||||
signing_key = _get_jwks_client().get_signing_key_from_jwt(token).key
|
||||
# Accept either v2.0 or v1.0 issuer URLs.
|
||||
claims = jwt.decode(
|
||||
token,
|
||||
signing_key,
|
||||
algorithms=["RS256"],
|
||||
audience=AZURE_CLIENT_ID,
|
||||
issuer=list(ISSUERS),
|
||||
options={"verify_aud": True, "verify_iss": True, "verify_exp": True},
|
||||
)
|
||||
except jwt.InvalidTokenError as e:
|
||||
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=f"Invalid token: {e}")
|
||||
except httpx.HTTPError as e:
|
||||
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=f"JWKS fetch failed: {e}")
|
||||
|
||||
return User(
|
||||
oid=claims.get("oid") or claims.get("sub", "unknown"),
|
||||
name=claims.get("name", ""),
|
||||
email=claims.get("preferred_username") or claims.get("upn") or claims.get("email", ""),
|
||||
)
|
||||
|
||||
|
||||
def maybe_auth_info():
|
||||
"""Diagnostic helper for /api/health: report whether auth is wired."""
|
||||
return {
|
||||
"dev_bypass": DEV_AUTH_BYPASS,
|
||||
"tenant_configured": bool(AZURE_TENANT_ID),
|
||||
"client_configured": bool(AZURE_CLIENT_ID),
|
||||
}
|
||||
112
db.py
Normal file
112
db.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
"""
|
||||
Postgres logging for the Marriott Box tagger.
|
||||
|
||||
One row per file Gemini was called on (success or error). The DB is auxiliary —
|
||||
all functions swallow exceptions and print to stderr so a Postgres outage cannot
|
||||
stop the tagging pass. Box remains the source of truth.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg
|
||||
from psycopg.types.json import Jsonb
|
||||
|
||||
SCHEMA_PATH = Path(__file__).parent / "schema.sql"
|
||||
|
||||
INSERT_SQL = """
|
||||
INSERT INTO tagging_events (
|
||||
run_id, file_id, file_name, folder_path, media_type, gemini_model,
|
||||
prompt, raw_response, description, scenes, validated_metadata,
|
||||
metadata_write_success, description_write_success, scene_comment_write_success,
|
||||
status, error_message, duration_ms
|
||||
) VALUES (
|
||||
%(run_id)s, %(file_id)s, %(file_name)s, %(folder_path)s, %(media_type)s, %(gemini_model)s,
|
||||
%(prompt)s, %(raw_response)s, %(description)s, %(scenes)s, %(validated_metadata)s,
|
||||
%(metadata_write_success)s, %(description_write_success)s, %(scene_comment_write_success)s,
|
||||
%(status)s, %(error_message)s, %(duration_ms)s
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
def _dsn():
|
||||
dsn = os.getenv("DATABASE_URL")
|
||||
if not dsn:
|
||||
raise RuntimeError("DATABASE_URL not set")
|
||||
return dsn
|
||||
|
||||
|
||||
def get_conn():
|
||||
"""Open a Postgres connection. Caller owns close()."""
|
||||
return psycopg.connect(_dsn(), autocommit=True)
|
||||
|
||||
|
||||
def ensure_schema(conn):
|
||||
"""Apply schema.sql idempotently."""
|
||||
sql = SCHEMA_PATH.read_text()
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
|
||||
|
||||
def _jsonable(value):
|
||||
if value is None:
|
||||
return None
|
||||
return Jsonb(value)
|
||||
|
||||
|
||||
def log_event(
|
||||
conn,
|
||||
*,
|
||||
run_id,
|
||||
file_id,
|
||||
file_name,
|
||||
folder_path,
|
||||
media_type,
|
||||
gemini_model,
|
||||
status,
|
||||
prompt=None,
|
||||
raw_response=None,
|
||||
description=None,
|
||||
scenes=None,
|
||||
validated_metadata=None,
|
||||
metadata_write_success=None,
|
||||
description_write_success=None,
|
||||
scene_comment_write_success=None,
|
||||
error_message=None,
|
||||
duration_ms=None,
|
||||
):
|
||||
"""
|
||||
Insert one tagging_events row. Never raises — DB problems are reported to stderr
|
||||
and the tagger continues.
|
||||
"""
|
||||
if conn is None:
|
||||
return
|
||||
params = {
|
||||
"run_id": str(run_id),
|
||||
"file_id": str(file_id) if file_id is not None else None,
|
||||
"file_name": file_name,
|
||||
"folder_path": folder_path,
|
||||
"media_type": media_type,
|
||||
"gemini_model": gemini_model,
|
||||
"prompt": prompt,
|
||||
"raw_response": _jsonable(raw_response),
|
||||
"description": description,
|
||||
"scenes": _jsonable(scenes),
|
||||
"validated_metadata": _jsonable(validated_metadata),
|
||||
"metadata_write_success": metadata_write_success,
|
||||
"description_write_success": description_write_success,
|
||||
"scene_comment_write_success": scene_comment_write_success,
|
||||
"status": status,
|
||||
"error_message": error_message,
|
||||
"duration_ms": duration_ms,
|
||||
}
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(INSERT_SQL, params)
|
||||
except Exception as e:
|
||||
print(
|
||||
f" WARN: DB log_event failed ({type(e).__name__}: {e}) — continuing",
|
||||
file=sys.stderr,
|
||||
)
|
||||
29
deploy/apache-marriott-tagging.conf.tmpl
Normal file
29
deploy/apache-marriott-tagging.conf.tmpl
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
# Marriott Box Tagger — Apache reverse-proxy include for the shared
|
||||
# optical-dev.oliver.solutions vhost.
|
||||
#
|
||||
# Generated by deploy/deploy.sh; __API_PORT__ is replaced with the live API
|
||||
# container port. Do NOT edit the generated apache-marriott-tagging.conf
|
||||
# by hand — it gets overwritten on every deploy.
|
||||
|
||||
ProxyTimeout 300
|
||||
TimeOut 300
|
||||
|
||||
# Proxy API calls only — SPA static files are served directly by Apache
|
||||
# from /var/www/html/marriott-tagging/.
|
||||
ProxyPass /marriott-tagging/api/ http://127.0.0.1:__API_PORT__/api/ timeout=300
|
||||
ProxyPassReverse /marriott-tagging/api/ http://127.0.0.1:__API_PORT__/api/
|
||||
|
||||
Alias /marriott-tagging /var/www/html/marriott-tagging
|
||||
<Directory /var/www/html/marriott-tagging>
|
||||
Options -Indexes +FollowSymLinks
|
||||
AllowOverride None
|
||||
Require all granted
|
||||
|
||||
# SPA fallback: any path that doesn't map to a real file/dir falls
|
||||
# through to index.html so client-side routing works.
|
||||
RewriteEngine On
|
||||
RewriteBase /marriott-tagging/
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
RewriteRule ^ index.html [L]
|
||||
</Directory>
|
||||
317
deploy/deploy.sh
Executable file
317
deploy/deploy.sh
Executable file
|
|
@ -0,0 +1,317 @@
|
|||
#!/usr/bin/env bash
|
||||
# Marriott Box Tagger — deploy script.
|
||||
#
|
||||
# Idempotent. Safe to re-run on the dev server.
|
||||
# Public URL: https://optical-dev.oliver.solutions/marriott-tagging/
|
||||
#
|
||||
# Server layout (mirrors /oliver-sales-ops-platform/, /adeo-maturity/, etc.):
|
||||
# /opt/marriott-box-image-video-tagging/ — repo + docker-compose
|
||||
# /var/www/html/marriott-tagging/ — built SPA, served by Apache
|
||||
# docker volume marriott-tagging_pgdata — Postgres data (survives rebuilds)
|
||||
#
|
||||
# What it does:
|
||||
# 1. Sanity (.env, box_config.json, docker, git, compose v2 on PATH).
|
||||
# 2. Auto-pick free host ports:
|
||||
# - prefers POSTGRES_HOST_PORT (default 5435, range 5435-5499)
|
||||
# - prefers MARRIOTT_API_PORT (default 8004, range 8003-8099)
|
||||
# Persisted to .env so re-deploys keep using them.
|
||||
# 3. Render deploy/apache-marriott-tagging.conf from .tmpl with the picked
|
||||
# API port.
|
||||
# 4. git pull --ff-only origin main (--no-pull to skip).
|
||||
# 5. docker compose build && up -d (--no-build to skip).
|
||||
# 6. Build the Vite SPA in a one-shot node:20 container and rsync dist/
|
||||
# into /var/www/html/marriott-tagging/ (--no-frontend to skip).
|
||||
# 7. Poll /api/health for up to 60s; verify tagger container running.
|
||||
# 8. Print URLs, ports, the Apache Include line, and reload reminder.
|
||||
#
|
||||
# Flags:
|
||||
# --no-pull skip git pull
|
||||
# --no-build skip docker rebuild
|
||||
# --no-frontend skip Vite build + SPA sync
|
||||
# --run-now also fire a tagging pass immediately (via the API)
|
||||
# --logs tail tagger logs after deploy
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
COMPOSE_PROJECT="marriott-tagging"
|
||||
URL_PATH="/marriott-tagging"
|
||||
WEB_ROOT="/var/www/html/marriott-tagging"
|
||||
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
log() { printf '\033[1;36m[deploy]\033[0m %s\n' "$*"; }
|
||||
err() { printf '\033[1;31m[deploy]\033[0m %s\n' "$*" >&2; }
|
||||
ok() { printf '\033[1;32m[deploy]\033[0m %s\n' "$*"; }
|
||||
warn() { printf '\033[1;33m[deploy]\033[0m %s\n' "$*"; }
|
||||
|
||||
DO_PULL=1
|
||||
DO_BUILD=1
|
||||
DO_FRONTEND=1
|
||||
RUN_NOW=0
|
||||
TAIL_LOGS=0
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--no-pull) DO_PULL=0 ;;
|
||||
--no-build) DO_BUILD=0 ;;
|
||||
--no-frontend) DO_FRONTEND=0 ;;
|
||||
--run-now) RUN_NOW=1 ;;
|
||||
--logs) TAIL_LOGS=1 ;;
|
||||
--help|-h)
|
||||
sed -n '2,/^set/p' "$0" | grep -E '^# ' | sed 's/^# //'
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
err "Unknown flag: $arg (try --help)"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ---------- 1. Sanity ----------
|
||||
|
||||
[[ -f docker-compose.yml ]] || { err "docker-compose.yml not found in $REPO_ROOT"; exit 1; }
|
||||
if [[ ! -f .env ]]; then
|
||||
err ".env not found. Copy .env.example and fill it in:"
|
||||
err " cp .env.example .env && \$EDITOR .env"
|
||||
err "Required keys: GEMINI_API_KEY, POSTGRES_PASSWORD"
|
||||
err "MSAL keys: AZURE_TENANT_ID, AZURE_CLIENT_ID, VITE_AZURE_* (or set DEV_AUTH_BYPASS=true)"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -f box_config.json ]]; then
|
||||
err "box_config.json not found in $REPO_ROOT"
|
||||
err "Drop the Box JWT config (from the Box Developer Console) in the repo"
|
||||
err "root before deploying. It's bind-mounted into the tagger + api containers."
|
||||
exit 1
|
||||
fi
|
||||
command -v docker >/dev/null 2>&1 || { err "docker not on PATH"; exit 1; }
|
||||
command -v git >/dev/null 2>&1 || { err "git not on PATH"; exit 1; }
|
||||
docker compose version >/dev/null 2>&1 || { err "docker compose v2 not available"; exit 1; }
|
||||
|
||||
# ---------- helpers ----------
|
||||
|
||||
port_in_use() {
|
||||
local port=$1 pid=""
|
||||
if command -v lsof >/dev/null 2>&1; then
|
||||
pid=$( { lsof -nP -iTCP:"$port" -sTCP:LISTEN 2>/dev/null || true; } | awk 'NR>1 {print $2}' | head -1 )
|
||||
else
|
||||
pid=$( { ss -ltnp "sport = :$port" 2>/dev/null || true; } | awk -F'pid=' 'NR>1 {print $2}' | cut -d, -f1 | head -1 )
|
||||
fi
|
||||
[[ -n "$pid" ]]
|
||||
}
|
||||
|
||||
find_free_port() {
|
||||
local preferred=$1 start=$2 end=$3
|
||||
if ! port_in_use "$preferred"; then printf '%s' "$preferred"; return 0; fi
|
||||
local p
|
||||
for ((p=start; p<=end; p++)); do
|
||||
if ! port_in_use "$p"; then printf '%s' "$p"; return 0; fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
set_env_var() {
|
||||
local key=$1 value=$2 file="${REPO_ROOT}/.env"
|
||||
if grep -q "^${key}=" "$file" 2>/dev/null; then
|
||||
sed -i.bak "s#^${key}=.*#${key}=${value}#" "$file"
|
||||
rm -f "${file}.bak"
|
||||
else
|
||||
printf '%s=%s\n' "$key" "$value" >> "$file"
|
||||
fi
|
||||
}
|
||||
|
||||
get_env_var() {
|
||||
grep -E "^${1}=" "${REPO_ROOT}/.env" 2>/dev/null | head -1 | cut -d= -f2- | tr -d '"' || true
|
||||
}
|
||||
|
||||
# ---------- 2. Pick host ports ----------
|
||||
|
||||
DEFAULT_DB_PORT=5435
|
||||
DEFAULT_API_PORT=8004
|
||||
|
||||
DB_PORT=$(get_env_var POSTGRES_HOST_PORT); DB_PORT=${DB_PORT:-$DEFAULT_DB_PORT}
|
||||
API_PORT=$(get_env_var MARRIOTT_API_PORT); API_PORT=${API_PORT:-$DEFAULT_API_PORT}
|
||||
PREV_API_PORT="$API_PORT"
|
||||
|
||||
log "Resolving host ports (preferred: db=$DB_PORT api=$API_PORT)…"
|
||||
|
||||
RUNNING=$(docker compose ps -q 2>/dev/null | wc -l | tr -d ' ')
|
||||
if [[ "$RUNNING" -gt 0 ]]; then
|
||||
ok "Project '$COMPOSE_PROJECT' already has $RUNNING containers — keeping current port assignment."
|
||||
else
|
||||
NEW_DB_PORT=$(find_free_port "$DB_PORT" 5435 5499) || NEW_DB_PORT=""
|
||||
NEW_API_PORT=$(find_free_port "$API_PORT" 8003 8099) || NEW_API_PORT=""
|
||||
if [[ -z "$NEW_DB_PORT" || -z "$NEW_API_PORT" ]]; then
|
||||
err "Could not find a free port in the configured ranges."
|
||||
err " db desired=$DB_PORT scanned=5435-5499"
|
||||
err " api desired=$API_PORT scanned=8003-8099"
|
||||
exit 1
|
||||
fi
|
||||
[[ "$NEW_DB_PORT" != "$DB_PORT" ]] && warn "db port $DB_PORT busy → using $NEW_DB_PORT"
|
||||
[[ "$NEW_API_PORT" != "$API_PORT" ]] && warn "api port $API_PORT busy → using $NEW_API_PORT"
|
||||
DB_PORT=$NEW_DB_PORT
|
||||
API_PORT=$NEW_API_PORT
|
||||
set_env_var POSTGRES_HOST_PORT "$DB_PORT"
|
||||
set_env_var MARRIOTT_API_PORT "$API_PORT"
|
||||
ok "Ports: db=$DB_PORT api=$API_PORT (persisted to .env)"
|
||||
fi
|
||||
|
||||
# ---------- 3. Render apache include ----------
|
||||
|
||||
APACHE_TMPL="$REPO_ROOT/deploy/apache-marriott-tagging.conf.tmpl"
|
||||
APACHE_CONF="$REPO_ROOT/deploy/apache-marriott-tagging.conf"
|
||||
if [[ -f "$APACHE_TMPL" ]]; then
|
||||
sed "s#__API_PORT__#${API_PORT}#g" "$APACHE_TMPL" > "$APACHE_CONF"
|
||||
ok "Rendered apache-marriott-tagging.conf with api port $API_PORT"
|
||||
else
|
||||
warn "apache-marriott-tagging.conf.tmpl missing — leaving deploy/apache-marriott-tagging.conf untouched."
|
||||
fi
|
||||
|
||||
# ---------- 4. git pull ----------
|
||||
|
||||
if (( DO_PULL )); then
|
||||
log "git pull --ff-only origin main"
|
||||
git pull --ff-only origin main
|
||||
fi
|
||||
|
||||
# ---------- 5. Backend build + up ----------
|
||||
|
||||
if (( DO_BUILD )); then
|
||||
log "docker compose build"
|
||||
docker compose build
|
||||
fi
|
||||
|
||||
log "docker compose up -d (db + tagger + api)"
|
||||
docker compose up -d
|
||||
|
||||
# ---------- 6. Frontend build + sync ----------
|
||||
|
||||
if (( DO_FRONTEND )); then
|
||||
if [[ ! -d "$REPO_ROOT/frontend" ]]; then
|
||||
warn "frontend/ directory missing — skipping SPA build."
|
||||
else
|
||||
BYPASS=$(get_env_var VITE_DEV_AUTH_BYPASS); BYPASS=${BYPASS:-$(get_env_var DEV_AUTH_BYPASS)}
|
||||
TENANT=$(get_env_var VITE_AZURE_TENANT_ID)
|
||||
CLIENT=$(get_env_var VITE_AZURE_CLIENT_ID)
|
||||
PUBLIC_BASE=$(get_env_var VITE_PUBLIC_BASE)
|
||||
PUBLIC_BASE=${PUBLIC_BASE:-"https://optical-dev.oliver.solutions${URL_PATH}"}
|
||||
|
||||
log "Building Vite SPA in node:20 (bypass=${BYPASS:-false} tenant=${TENANT:+set} client=${CLIENT:+set})…"
|
||||
docker run --rm \
|
||||
-v "$REPO_ROOT/frontend:/app" \
|
||||
-w /app \
|
||||
-e VITE_DEV_AUTH_BYPASS="${BYPASS:-false}" \
|
||||
-e VITE_AZURE_TENANT_ID="$TENANT" \
|
||||
-e VITE_AZURE_CLIENT_ID="$CLIENT" \
|
||||
-e VITE_PUBLIC_BASE="$PUBLIC_BASE" \
|
||||
node:20-alpine \
|
||||
sh -c "npm install --silent && npm run build"
|
||||
|
||||
if [[ ! -d "$REPO_ROOT/frontend/dist" ]]; then
|
||||
err "Vite build did not produce frontend/dist — aborting frontend sync."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "Syncing frontend/dist/ → $WEB_ROOT/"
|
||||
if [[ ! -d "$WEB_ROOT" ]]; then
|
||||
if command -v sudo >/dev/null 2>&1; then sudo mkdir -p "$WEB_ROOT"
|
||||
else mkdir -p "$WEB_ROOT"; fi
|
||||
fi
|
||||
if command -v rsync >/dev/null 2>&1; then
|
||||
if [[ -w "$WEB_ROOT" ]]; then
|
||||
rsync -a --delete "$REPO_ROOT/frontend/dist/" "$WEB_ROOT/"
|
||||
else
|
||||
sudo rsync -a --delete "$REPO_ROOT/frontend/dist/" "$WEB_ROOT/"
|
||||
fi
|
||||
else
|
||||
if [[ -w "$WEB_ROOT" ]]; then
|
||||
rm -rf "$WEB_ROOT"/*
|
||||
cp -a "$REPO_ROOT/frontend/dist/." "$WEB_ROOT/"
|
||||
else
|
||||
sudo rm -rf "$WEB_ROOT"/*
|
||||
sudo cp -a "$REPO_ROOT/frontend/dist/." "$WEB_ROOT/"
|
||||
fi
|
||||
fi
|
||||
ok "SPA synced to $WEB_ROOT"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ---------- 7. Readiness ----------
|
||||
|
||||
log "Waiting for API /api/health on :$API_PORT (max 60s)…"
|
||||
for i in $(seq 1 30); do
|
||||
if curl -fsS "http://127.0.0.1:${API_PORT}/api/health" >/dev/null 2>&1; then
|
||||
ok "API healthy"
|
||||
break
|
||||
fi
|
||||
sleep 2
|
||||
if (( i == 30 )); then
|
||||
err "API did not become healthy within 60s. Recent logs:"
|
||||
docker compose logs api --tail 60 || true
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
TAGGER_STATE=$(docker compose ps tagger --format '{{.State}}' 2>/dev/null | head -1)
|
||||
if [[ "$TAGGER_STATE" != "running" ]]; then
|
||||
err "Tagger (scheduler) container is not running (state=${TAGGER_STATE:-unknown}). Recent logs:"
|
||||
docker compose logs tagger --tail 60 || true
|
||||
exit 1
|
||||
fi
|
||||
ok "Tagger scheduler running"
|
||||
|
||||
# ---------- 8. Optional: trigger an immediate pass via the API ----------
|
||||
|
||||
if (( RUN_NOW )); then
|
||||
log "Firing a tagging pass via /api/runs…"
|
||||
# With DEV_AUTH_BYPASS=true the call works unauthenticated. Otherwise
|
||||
# a real token is required; we just print the curl command in that case.
|
||||
BYPASS_NOW=$(get_env_var DEV_AUTH_BYPASS); BYPASS_NOW=$(echo "${BYPASS_NOW:-true}" | tr '[:upper:]' '[:lower:]')
|
||||
if [[ "$BYPASS_NOW" == "true" || "$BYPASS_NOW" == "1" || "$BYPASS_NOW" == "yes" ]]; then
|
||||
curl -fsS -X POST "http://127.0.0.1:${API_PORT}/api/runs" || \
|
||||
warn "POST /api/runs failed — see api logs."
|
||||
echo
|
||||
else
|
||||
warn "DEV_AUTH_BYPASS != true → can't auth from a shell. Run a pass from the SPA, or:"
|
||||
warn " docker compose exec tagger python main.py"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ---------- 9. Report ----------
|
||||
|
||||
POSTGRES_USER_VAL=$(get_env_var POSTGRES_USER); POSTGRES_USER_VAL=${POSTGRES_USER_VAL:-marriott}
|
||||
POSTGRES_DB_VAL=$(get_env_var POSTGRES_DB); POSTGRES_DB_VAL=${POSTGRES_DB_VAL:-marriott_tagging}
|
||||
SCHED=$(get_env_var SCHEDULE_CRON); SCHED=${SCHED:-0 2 * * *}
|
||||
TZ_VAL=$(get_env_var TZ); TZ_VAL=${TZ_VAL:-system}
|
||||
|
||||
ok "Deploy complete."
|
||||
echo
|
||||
echo " Compose project: $COMPOSE_PROJECT"
|
||||
echo " API (local): http://127.0.0.1:${API_PORT}/api/health"
|
||||
echo " Public URL: https://optical-dev.oliver.solutions${URL_PATH}/"
|
||||
echo " SPA on disk: $WEB_ROOT"
|
||||
echo " Postgres (local): 127.0.0.1:${DB_PORT} (db=$POSTGRES_DB_VAL user=$POSTGRES_USER_VAL)"
|
||||
echo " Schedule (cron): '$SCHED' (TZ=$TZ_VAL)"
|
||||
echo " Persisted volume: marriott-tagging_pgdata"
|
||||
echo
|
||||
echo " Apache include line for the merged vhost:"
|
||||
echo " Include $REPO_ROOT/deploy/apache-marriott-tagging.conf"
|
||||
|
||||
NEEDS_RELOAD=0
|
||||
if [[ "$API_PORT" != "$PREV_API_PORT" ]]; then NEEDS_RELOAD=1; fi
|
||||
if ! grep -qF "$REPO_ROOT/deploy/apache-marriott-tagging.conf" /etc/apache2/sites-enabled/*.conf 2>/dev/null; then
|
||||
NEEDS_RELOAD=1
|
||||
fi
|
||||
if (( NEEDS_RELOAD )); then
|
||||
echo
|
||||
warn "API port changed (or first deploy). After adding the Include line:"
|
||||
echo " sudo apachectl configtest && sudo systemctl reload apache2"
|
||||
fi
|
||||
echo
|
||||
|
||||
if (( TAIL_LOGS )); then
|
||||
log "Tailing tagger logs (Ctrl-C to stop)…"
|
||||
docker compose logs -f tagger
|
||||
fi
|
||||
74
docker-compose.yml
Normal file
74
docker-compose.yml
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
# Per ~/.claude/CLAUDE.md Docker policy: every compose file must pin a unique
|
||||
# top-level `name:` so multiple apps sharing a server (or `deploy/` parent dir)
|
||||
# don't collide on container/volume names.
|
||||
name: marriott-tagging
|
||||
|
||||
services:
|
||||
db:
|
||||
image: postgres:16
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER:-marriott}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-marriott}
|
||||
POSTGRES_DB: ${POSTGRES_DB:-marriott_tagging}
|
||||
# 127.0.0.1: binding — Postgres must NOT be reachable from outside the
|
||||
# host. On the dev server this prevents the DB from sitting on the
|
||||
# public internet; on a local mac it's a no-op for `psql` from the
|
||||
# same machine. Pick a non-default port via $POSTGRES_HOST_PORT if
|
||||
# 5432 is taken locally (the deploy script auto-picks on the server).
|
||||
ports:
|
||||
- "127.0.0.1:${POSTGRES_HOST_PORT:-5432}:5432"
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-marriott} -d ${POSTGRES_DB:-marriott_tagging}"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
tagger:
|
||||
build: .
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-marriott}:${POSTGRES_PASSWORD:-marriott}@db:5432/${POSTGRES_DB:-marriott_tagging}
|
||||
GEMINI_API_KEY: ${GEMINI_API_KEY}
|
||||
SCHEDULE_CRON: ${SCHEDULE_CRON:-0 2 * * *}
|
||||
RUN_AT_STARTUP: ${RUN_AT_STARTUP:-0}
|
||||
TZ: ${TZ:-UTC}
|
||||
command: ["python", "-u", "scheduler.py"]
|
||||
volumes:
|
||||
# Box JWT config is bind-mounted read-only — never baked into the image.
|
||||
- ./box_config.json:/app/box_config.json:ro
|
||||
|
||||
api:
|
||||
build: .
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-marriott}:${POSTGRES_PASSWORD:-marriott}@db:5432/${POSTGRES_DB:-marriott_tagging}
|
||||
GEMINI_API_KEY: ${GEMINI_API_KEY}
|
||||
TZ: ${TZ:-UTC}
|
||||
# Auth — set DEV_AUTH_BYPASS=true to skip MSAL while you wire it up.
|
||||
DEV_AUTH_BYPASS: ${DEV_AUTH_BYPASS:-true}
|
||||
AZURE_TENANT_ID: ${AZURE_TENANT_ID:-}
|
||||
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-}
|
||||
DEV_AUTH_EMAIL: ${DEV_AUTH_EMAIL:-dev@oliver.agency}
|
||||
DEV_AUTH_NAME: ${DEV_AUTH_NAME:-Dev User}
|
||||
# CORS for local dev: when Vite is on :5173 and FastAPI on host:8004.
|
||||
# Empty in prod — Apache serves SPA and API under the same origin.
|
||||
CORS_ORIGINS: ${CORS_ORIGINS:-}
|
||||
command: ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
ports:
|
||||
- "127.0.0.1:${MARRIOTT_API_PORT:-8004}:8000"
|
||||
volumes:
|
||||
# The API may trigger a tagging pass via /api/runs, which calls into the
|
||||
# same Box+Gemini pipeline → it needs the same JWT config.
|
||||
- ./box_config.json:/app/box_config.json:ro
|
||||
|
||||
volumes:
|
||||
pgdata:
|
||||
18
frontend/index.html
Normal file
18
frontend/index.html
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Marriott Box Tagger</title>
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
||||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
|
||||
<link
|
||||
href="https://fonts.googleapis.com/css2?family=Montserrat:wght@400;500;600;700&display=swap"
|
||||
rel="stylesheet"
|
||||
/>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
1765
frontend/package-lock.json
generated
Normal file
1765
frontend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
24
frontend/package.json
Normal file
24
frontend/package.json
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"name": "marriott-tagging-frontend",
|
||||
"private": true,
|
||||
"version": "0.1.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "tsc -b && vite build",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@azure/msal-browser": "^3.27.0",
|
||||
"@azure/msal-react": "^2.2.0",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/react": "^18.3.12",
|
||||
"@types/react-dom": "^18.3.1",
|
||||
"@vitejs/plugin-react": "^4.3.4",
|
||||
"typescript": "^5.6.3",
|
||||
"vite": "^5.4.11"
|
||||
}
|
||||
}
|
||||
268
frontend/src/App.tsx
Normal file
268
frontend/src/App.tsx
Normal file
|
|
@ -0,0 +1,268 @@
|
|||
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||
import { useAuth } from "./auth";
|
||||
import {
|
||||
Event,
|
||||
Run,
|
||||
listRuns,
|
||||
runEvents,
|
||||
searchEvents,
|
||||
startRun,
|
||||
} from "./api";
|
||||
|
||||
export function App() {
|
||||
const auth = useAuth();
|
||||
const [q, setQ] = useState("");
|
||||
const [results, setResults] = useState<Event[]>([]);
|
||||
const [searching, setSearching] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const [activeRun, setActiveRun] = useState<string | null>(null);
|
||||
const [activeRunEvents, setActiveRunEvents] = useState<Event[]>([]);
|
||||
const [activeRunState, setActiveRunState] = useState<string | null>(null);
|
||||
const [recentRuns, setRecentRuns] = useState<Run[]>([]);
|
||||
const [starting, setStarting] = useState(false);
|
||||
|
||||
const runPollTimer = useRef<number | null>(null);
|
||||
|
||||
const doSearch = useCallback(
|
||||
async (text: string) => {
|
||||
setSearching(true);
|
||||
setError(null);
|
||||
try {
|
||||
const r = await searchEvents(auth.getToken, text);
|
||||
setResults(r.results);
|
||||
} catch (e) {
|
||||
setError((e as Error).message);
|
||||
setResults([]);
|
||||
} finally {
|
||||
setSearching(false);
|
||||
}
|
||||
},
|
||||
[auth.getToken]
|
||||
);
|
||||
|
||||
const refreshRuns = useCallback(async () => {
|
||||
try {
|
||||
const r = await listRuns(auth.getToken, 10);
|
||||
setRecentRuns(r.runs);
|
||||
} catch {
|
||||
/* non-fatal */
|
||||
}
|
||||
}, [auth.getToken]);
|
||||
|
||||
// Initial load: recent rows + recent runs.
|
||||
useEffect(() => {
|
||||
doSearch("");
|
||||
refreshRuns();
|
||||
}, [doSearch, refreshRuns]);
|
||||
|
||||
// Poll active run.
|
||||
useEffect(() => {
|
||||
if (!activeRun) {
|
||||
if (runPollTimer.current) {
|
||||
clearInterval(runPollTimer.current);
|
||||
runPollTimer.current = null;
|
||||
}
|
||||
return;
|
||||
}
|
||||
const tick = async () => {
|
||||
try {
|
||||
const r = await runEvents(auth.getToken, activeRun);
|
||||
setActiveRunEvents(r.events);
|
||||
setActiveRunState(r.live_state || (r.count > 0 ? "completed" : "running"));
|
||||
if (r.live_state && r.live_state !== "running") {
|
||||
if (runPollTimer.current) {
|
||||
clearInterval(runPollTimer.current);
|
||||
runPollTimer.current = null;
|
||||
}
|
||||
refreshRuns();
|
||||
}
|
||||
} catch (e) {
|
||||
setError((e as Error).message);
|
||||
}
|
||||
};
|
||||
tick();
|
||||
runPollTimer.current = window.setInterval(tick, 2000);
|
||||
return () => {
|
||||
if (runPollTimer.current) {
|
||||
clearInterval(runPollTimer.current);
|
||||
runPollTimer.current = null;
|
||||
}
|
||||
};
|
||||
}, [activeRun, auth.getToken, refreshRuns]);
|
||||
|
||||
const onSubmit = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
doSearch(q);
|
||||
};
|
||||
|
||||
const onRunNow = async () => {
|
||||
setStarting(true);
|
||||
setError(null);
|
||||
try {
|
||||
const r = await startRun(auth.getToken);
|
||||
setActiveRun(r.run_id);
|
||||
setActiveRunEvents([]);
|
||||
setActiveRunState("running");
|
||||
} catch (e) {
|
||||
setError((e as Error).message);
|
||||
} finally {
|
||||
setStarting(false);
|
||||
}
|
||||
};
|
||||
|
||||
const summary = useMemo(() => {
|
||||
const total = activeRunEvents.length;
|
||||
const ok = activeRunEvents.filter((e) => e.status === "success").length;
|
||||
const err = total - ok;
|
||||
return { total, ok, err };
|
||||
}, [activeRunEvents]);
|
||||
|
||||
return (
|
||||
<div className="page">
|
||||
<header className="topbar">
|
||||
<div className="brand">
|
||||
<span className="brand-mark" />
|
||||
<span className="brand-name">Marriott Box Tagger</span>
|
||||
</div>
|
||||
<div className="user">
|
||||
{auth.user ? (
|
||||
<>
|
||||
<span className="user-name">{auth.user.name || auth.user.email}</span>
|
||||
{auth.bypass ? (
|
||||
<span className="badge">DEV</span>
|
||||
) : (
|
||||
<button className="link" onClick={auth.signOut}>Sign out</button>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<button className="primary" onClick={auth.signIn}>Sign in</button>
|
||||
)}
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<main className="content">
|
||||
<section className="card">
|
||||
<h2>Search the tagging log</h2>
|
||||
<form className="searchbar" onSubmit={onSubmit}>
|
||||
<input
|
||||
autoFocus
|
||||
placeholder="Keyword across file name, folder, description, metadata…"
|
||||
value={q}
|
||||
onChange={(e) => setQ(e.target.value)}
|
||||
/>
|
||||
<button className="primary" type="submit" disabled={searching}>
|
||||
{searching ? "Searching…" : "Search"}
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
className="run-now"
|
||||
disabled={starting}
|
||||
title="Trigger a tagging pass against the Box folder right now"
|
||||
onClick={onRunNow}
|
||||
>
|
||||
{starting ? "Starting…" : "Run now"}
|
||||
</button>
|
||||
</form>
|
||||
{error && <p className="error">{error}</p>}
|
||||
</section>
|
||||
|
||||
{activeRun && (
|
||||
<section className="card">
|
||||
<h2>
|
||||
Active run
|
||||
<span className="run-id"> {activeRun.slice(0, 8)}…</span>
|
||||
{activeRunState && (
|
||||
<span className={`pill state-${activeRunState}`}>{activeRunState}</span>
|
||||
)}
|
||||
</h2>
|
||||
<p className="muted">
|
||||
{summary.total} events so far • {summary.ok} success • {summary.err} errors
|
||||
</p>
|
||||
<EventList events={activeRunEvents} />
|
||||
</section>
|
||||
)}
|
||||
|
||||
<section className="card">
|
||||
<h2>Recent runs</h2>
|
||||
{recentRuns.length === 0 ? (
|
||||
<p className="muted">No runs yet — click <em>Run now</em> to start one.</p>
|
||||
) : (
|
||||
<table className="runs">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Started</th>
|
||||
<th>Run</th>
|
||||
<th>Events</th>
|
||||
<th>OK</th>
|
||||
<th>Errors</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{recentRuns.map((r) => (
|
||||
<tr key={r.run_id}>
|
||||
<td>{r.started_at?.replace("T", " ").slice(0, 19) || "—"}</td>
|
||||
<td className="mono">{r.run_id.slice(0, 8)}…</td>
|
||||
<td>{r.events}</td>
|
||||
<td>{r.successes}</td>
|
||||
<td>{r.errors}</td>
|
||||
<td>
|
||||
<button className="link" onClick={() => setActiveRun(r.run_id)}>
|
||||
View
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
)}
|
||||
</section>
|
||||
|
||||
<section className="card">
|
||||
<h2>{q ? `Results for "${q}"` : "Latest events"}</h2>
|
||||
<p className="muted">{results.length} item{results.length === 1 ? "" : "s"}</p>
|
||||
<EventList events={results} />
|
||||
</section>
|
||||
</main>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function EventList({ events }: { events: Event[] }) {
|
||||
if (events.length === 0) return <p className="muted">Nothing here yet.</p>;
|
||||
return (
|
||||
<ul className="events">
|
||||
{events.map((e) => (
|
||||
<li key={e.id} className={`event status-${e.status}`}>
|
||||
<div className="event-head">
|
||||
<span className={`badge type-${e.media_type}`}>{e.media_type}</span>
|
||||
<span className="event-name">{e.file_name}</span>
|
||||
<span className={`pill status-${e.status}`}>{e.status}</span>
|
||||
{e.box_url && (
|
||||
<a className="boxlink" href={e.box_url} target="_blank" rel="noreferrer">
|
||||
Open in Box ↗
|
||||
</a>
|
||||
)}
|
||||
</div>
|
||||
{e.folder_path && <div className="event-meta">{e.folder_path}</div>}
|
||||
{e.description && <p className="event-desc">{e.description}</p>}
|
||||
{e.validated_metadata && Object.keys(e.validated_metadata).length > 0 && (
|
||||
<div className="tags">
|
||||
{Object.entries(e.validated_metadata).map(([k, v]) => (
|
||||
<span key={k} className="tag">
|
||||
<strong>{k}:</strong> {Array.isArray(v) ? v.join(", ") : String(v)}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
{e.error_message && <p className="event-error">{e.error_message}</p>}
|
||||
<div className="event-foot">
|
||||
<span className="mono">{e.created_at.replace("T", " ").slice(0, 19)}</span>
|
||||
{e.duration_ms != null && <span>· {(e.duration_ms / 1000).toFixed(1)}s</span>}
|
||||
</div>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
);
|
||||
}
|
||||
98
frontend/src/api.ts
Normal file
98
frontend/src/api.ts
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
/**
|
||||
* Thin API client. In prod the SPA lives at /marriott-tagging/ and the API
|
||||
* lives at /marriott-tagging/api/; in dev Vite proxies /api → FastAPI.
|
||||
*
|
||||
* We compute the API base from `import.meta.env.BASE_URL` (Vite injects this
|
||||
* as the `base` from vite.config.ts), so the same build works at "/" and at
|
||||
* "/marriott-tagging/".
|
||||
*/
|
||||
const BASE = (import.meta.env.BASE_URL || "/").replace(/\/$/, "");
|
||||
const API_BASE = `${BASE}/api`;
|
||||
|
||||
export type Event = {
|
||||
id: number;
|
||||
run_id: string;
|
||||
created_at: string;
|
||||
file_id: string;
|
||||
file_name: string;
|
||||
folder_path: string | null;
|
||||
media_type: "image" | "video";
|
||||
gemini_model: string;
|
||||
description: string | null;
|
||||
scenes: unknown;
|
||||
validated_metadata: Record<string, unknown> | null;
|
||||
metadata_write_success: boolean | null;
|
||||
description_write_success: boolean | null;
|
||||
scene_comment_write_success: boolean | null;
|
||||
status: string;
|
||||
error_message: string | null;
|
||||
duration_ms: number | null;
|
||||
box_url: string | null;
|
||||
};
|
||||
|
||||
export type Run = {
|
||||
run_id: string;
|
||||
started_at: string | null;
|
||||
last_event_at: string | null;
|
||||
events: number;
|
||||
successes: number;
|
||||
errors: number;
|
||||
live_state: "running" | "completed" | "failed" | null;
|
||||
live_error: string | null;
|
||||
};
|
||||
|
||||
type GetTokenFn = () => Promise<string>;
|
||||
|
||||
async function req<T>(
|
||||
path: string,
|
||||
getToken: GetTokenFn,
|
||||
init: RequestInit = {}
|
||||
): Promise<T> {
|
||||
const token = await getToken();
|
||||
const headers: Record<string, string> = {
|
||||
...((init.headers as Record<string, string>) || {}),
|
||||
};
|
||||
if (token) headers.Authorization = `Bearer ${token}`;
|
||||
if (init.body && !headers["Content-Type"]) headers["Content-Type"] = "application/json";
|
||||
|
||||
const res = await fetch(`${API_BASE}${path}`, { ...init, headers });
|
||||
if (!res.ok) {
|
||||
const text = await res.text().catch(() => "");
|
||||
throw new Error(`${res.status} ${res.statusText} — ${text}`);
|
||||
}
|
||||
return res.json() as Promise<T>;
|
||||
}
|
||||
|
||||
export function searchEvents(getToken: GetTokenFn, q: string, limit = 100) {
|
||||
const qs = new URLSearchParams({ q, limit: String(limit) });
|
||||
return req<{ q: string; count: number; results: Event[] }>(
|
||||
`/events?${qs.toString()}`,
|
||||
getToken
|
||||
);
|
||||
}
|
||||
|
||||
export function startRun(getToken: GetTokenFn) {
|
||||
return req<{ run_id: string; state: string; started_by: string }>(
|
||||
`/runs`,
|
||||
getToken,
|
||||
{ method: "POST", body: JSON.stringify({}) }
|
||||
);
|
||||
}
|
||||
|
||||
export function listRuns(getToken: GetTokenFn, limit = 20) {
|
||||
return req<{ runs: Run[] }>(`/runs?limit=${limit}`, getToken);
|
||||
}
|
||||
|
||||
export function runEvents(getToken: GetTokenFn, runId: string) {
|
||||
return req<{
|
||||
run_id: string;
|
||||
live_state: string | null;
|
||||
live_error: string | null;
|
||||
count: number;
|
||||
events: Event[];
|
||||
}>(`/runs/${runId}/events`, getToken);
|
||||
}
|
||||
|
||||
export function health() {
|
||||
return fetch(`${API_BASE}/health`).then((r) => r.json());
|
||||
}
|
||||
153
frontend/src/auth.tsx
Normal file
153
frontend/src/auth.tsx
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
/**
|
||||
* MSAL plumbing with a single bypass switch.
|
||||
*
|
||||
* - VITE_DEV_AUTH_BYPASS=true → no Azure roundtrip; getToken() returns "".
|
||||
* Backend matching DEV_AUTH_BYPASS=true accepts that.
|
||||
* - else → MSAL popup flow against the configured tenant/client.
|
||||
*/
|
||||
import {
|
||||
AccountInfo,
|
||||
Configuration,
|
||||
InteractionRequiredAuthError,
|
||||
PublicClientApplication,
|
||||
} from "@azure/msal-browser";
|
||||
import {
|
||||
MsalProvider,
|
||||
useIsAuthenticated,
|
||||
useMsal,
|
||||
} from "@azure/msal-react";
|
||||
import {
|
||||
createContext,
|
||||
ReactNode,
|
||||
useCallback,
|
||||
useContext,
|
||||
useEffect,
|
||||
useMemo,
|
||||
useState,
|
||||
} from "react";
|
||||
|
||||
const BYPASS =
|
||||
String(import.meta.env.VITE_DEV_AUTH_BYPASS || "").toLowerCase() === "true";
|
||||
const TENANT = String(import.meta.env.VITE_AZURE_TENANT_ID || "");
|
||||
const CLIENT = String(import.meta.env.VITE_AZURE_CLIENT_ID || "");
|
||||
const PUBLIC_BASE = String(
|
||||
import.meta.env.VITE_PUBLIC_BASE || window.location.origin
|
||||
);
|
||||
|
||||
const msalConfig: Configuration = {
|
||||
auth: {
|
||||
clientId: CLIENT,
|
||||
authority: TENANT
|
||||
? `https://login.microsoftonline.com/${TENANT}`
|
||||
: "https://login.microsoftonline.com/common",
|
||||
redirectUri: PUBLIC_BASE,
|
||||
},
|
||||
cache: { cacheLocation: "sessionStorage" },
|
||||
};
|
||||
|
||||
const REQUEST_SCOPES = CLIENT ? [`api://${CLIENT}/access_as_user`] : ["User.Read"];
|
||||
|
||||
let pca: PublicClientApplication | null = null;
|
||||
function msal(): PublicClientApplication {
|
||||
if (!pca) {
|
||||
pca = new PublicClientApplication(msalConfig);
|
||||
}
|
||||
return pca;
|
||||
}
|
||||
|
||||
type AuthCtx = {
|
||||
user: { name: string; email: string } | null;
|
||||
ready: boolean;
|
||||
signIn: () => Promise<void>;
|
||||
signOut: () => Promise<void>;
|
||||
getToken: () => Promise<string>;
|
||||
bypass: boolean;
|
||||
};
|
||||
|
||||
const Ctx = createContext<AuthCtx | null>(null);
|
||||
|
||||
export function AuthProvider({ children }: { children: ReactNode }) {
|
||||
if (BYPASS) {
|
||||
const bypassCtx: AuthCtx = {
|
||||
user: { name: "Dev User", email: "dev@oliver.agency" },
|
||||
ready: true,
|
||||
signIn: async () => {},
|
||||
signOut: async () => {},
|
||||
getToken: async () => "",
|
||||
bypass: true,
|
||||
};
|
||||
return <Ctx.Provider value={bypassCtx}>{children}</Ctx.Provider>;
|
||||
}
|
||||
return (
|
||||
<MsalProvider instance={msal()}>
|
||||
<RealAuthProvider>{children}</RealAuthProvider>
|
||||
</MsalProvider>
|
||||
);
|
||||
}
|
||||
|
||||
function RealAuthProvider({ children }: { children: ReactNode }) {
|
||||
const { instance, accounts } = useMsal();
|
||||
const authed = useIsAuthenticated();
|
||||
const [ready, setReady] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
instance
|
||||
.initialize()
|
||||
.then(() => instance.handleRedirectPromise())
|
||||
.finally(() => setReady(true));
|
||||
}, [instance]);
|
||||
|
||||
const account: AccountInfo | undefined = accounts[0];
|
||||
|
||||
const signIn = useCallback(async () => {
|
||||
await instance.loginPopup({ scopes: REQUEST_SCOPES });
|
||||
}, [instance]);
|
||||
|
||||
const signOut = useCallback(async () => {
|
||||
if (account) await instance.logoutPopup({ account });
|
||||
}, [instance, account]);
|
||||
|
||||
const getToken = useCallback(async () => {
|
||||
if (!account) throw new Error("Not signed in");
|
||||
try {
|
||||
const r = await instance.acquireTokenSilent({
|
||||
account,
|
||||
scopes: REQUEST_SCOPES,
|
||||
});
|
||||
return r.accessToken;
|
||||
} catch (e) {
|
||||
if (e instanceof InteractionRequiredAuthError) {
|
||||
const r = await instance.acquireTokenPopup({ scopes: REQUEST_SCOPES });
|
||||
return r.accessToken;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}, [instance, account]);
|
||||
|
||||
const value: AuthCtx = useMemo(
|
||||
() => ({
|
||||
user: account ? { name: account.name || "", email: account.username } : null,
|
||||
ready,
|
||||
signIn,
|
||||
signOut,
|
||||
getToken,
|
||||
bypass: false,
|
||||
}),
|
||||
[account, ready, signIn, signOut, getToken]
|
||||
);
|
||||
|
||||
// Auto-sign-in once MSAL is ready and no account is in cache.
|
||||
useEffect(() => {
|
||||
if (ready && !authed) {
|
||||
signIn().catch(() => {/* user closed popup */});
|
||||
}
|
||||
}, [ready, authed, signIn]);
|
||||
|
||||
return <Ctx.Provider value={value}>{children}</Ctx.Provider>;
|
||||
}
|
||||
|
||||
export function useAuth(): AuthCtx {
|
||||
const v = useContext(Ctx);
|
||||
if (!v) throw new Error("useAuth outside AuthProvider");
|
||||
return v;
|
||||
}
|
||||
13
frontend/src/main.tsx
Normal file
13
frontend/src/main.tsx
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import React from "react";
|
||||
import ReactDOM from "react-dom/client";
|
||||
import { App } from "./App";
|
||||
import { AuthProvider } from "./auth";
|
||||
import "./styles.css";
|
||||
|
||||
ReactDOM.createRoot(document.getElementById("root")!).render(
|
||||
<React.StrictMode>
|
||||
<AuthProvider>
|
||||
<App />
|
||||
</AuthProvider>
|
||||
</React.StrictMode>
|
||||
);
|
||||
336
frontend/src/styles.css
Normal file
336
frontend/src/styles.css
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
/* Marriott Box Tagger — black / white / #FFC407 / Montserrat. */
|
||||
|
||||
:root {
|
||||
--bg: #000;
|
||||
--surface: #0f0f0f;
|
||||
--surface-alt: #1a1a1a;
|
||||
--line: #2a2a2a;
|
||||
--text: #fff;
|
||||
--text-dim: #b5b5b5;
|
||||
--accent: #ffc407;
|
||||
--accent-dim: #cb9b00;
|
||||
--danger: #ff5151;
|
||||
--ok: #5ee597;
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
html,
|
||||
body,
|
||||
#root {
|
||||
margin: 0;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
body {
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
font-family: "Montserrat", -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
||||
font-weight: 400;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
}
|
||||
|
||||
a {
|
||||
color: var(--accent);
|
||||
}
|
||||
|
||||
button {
|
||||
font-family: inherit;
|
||||
font-weight: 600;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
padding: 0.5rem 1rem;
|
||||
cursor: pointer;
|
||||
background: var(--surface-alt);
|
||||
color: var(--text);
|
||||
}
|
||||
button:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
button.primary {
|
||||
background: var(--accent);
|
||||
color: #000;
|
||||
}
|
||||
button.primary:hover:not(:disabled) {
|
||||
background: var(--accent-dim);
|
||||
}
|
||||
button.run-now {
|
||||
background: #000;
|
||||
color: var(--accent);
|
||||
border: 2px solid var(--accent);
|
||||
}
|
||||
button.run-now:hover:not(:disabled) {
|
||||
background: var(--accent);
|
||||
color: #000;
|
||||
}
|
||||
button.link {
|
||||
background: transparent;
|
||||
color: var(--accent);
|
||||
padding: 0.25rem 0.5rem;
|
||||
}
|
||||
|
||||
input[type="text"],
|
||||
input:not([type]) {
|
||||
font-family: inherit;
|
||||
font-size: 1rem;
|
||||
background: var(--surface);
|
||||
color: var(--text);
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 4px;
|
||||
padding: 0.6rem 0.8rem;
|
||||
flex: 1;
|
||||
}
|
||||
input:focus {
|
||||
outline: none;
|
||||
border-color: var(--accent);
|
||||
}
|
||||
|
||||
.page {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
min-height: 100vh;
|
||||
}
|
||||
|
||||
.topbar {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 1rem 1.5rem;
|
||||
border-bottom: 1px solid var(--line);
|
||||
background: var(--surface);
|
||||
}
|
||||
.brand {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.75rem;
|
||||
font-weight: 700;
|
||||
letter-spacing: 0.02em;
|
||||
}
|
||||
.brand-mark {
|
||||
display: inline-block;
|
||||
width: 14px;
|
||||
height: 14px;
|
||||
background: var(--accent);
|
||||
border-radius: 2px;
|
||||
}
|
||||
.brand-name {
|
||||
font-size: 1.05rem;
|
||||
}
|
||||
.user {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.75rem;
|
||||
font-size: 0.9rem;
|
||||
color: var(--text-dim);
|
||||
}
|
||||
.user-name {
|
||||
color: var(--text);
|
||||
}
|
||||
.badge {
|
||||
background: var(--accent);
|
||||
color: #000;
|
||||
font-size: 0.7rem;
|
||||
font-weight: 700;
|
||||
letter-spacing: 0.05em;
|
||||
padding: 0.1rem 0.4rem;
|
||||
border-radius: 2px;
|
||||
}
|
||||
|
||||
.content {
|
||||
flex: 1;
|
||||
width: 100%;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 1.5rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1.5rem;
|
||||
}
|
||||
|
||||
.card {
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 6px;
|
||||
padding: 1.25rem 1.5rem;
|
||||
}
|
||||
.card h2 {
|
||||
margin: 0 0 0.75rem 0;
|
||||
font-size: 1rem;
|
||||
font-weight: 600;
|
||||
letter-spacing: 0.04em;
|
||||
text-transform: uppercase;
|
||||
color: var(--text-dim);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.card h2 .run-id {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
|
||||
color: var(--accent);
|
||||
text-transform: none;
|
||||
letter-spacing: 0;
|
||||
}
|
||||
|
||||
.muted {
|
||||
color: var(--text-dim);
|
||||
margin: 0.25rem 0 0.75rem;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
.error {
|
||||
color: var(--danger);
|
||||
margin-top: 0.5rem;
|
||||
}
|
||||
|
||||
.searchbar {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.events {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.event {
|
||||
background: var(--surface-alt);
|
||||
border: 1px solid var(--line);
|
||||
border-left: 3px solid var(--line);
|
||||
border-radius: 4px;
|
||||
padding: 0.6rem 0.8rem;
|
||||
}
|
||||
.event.status-success {
|
||||
border-left-color: var(--ok);
|
||||
}
|
||||
.event.status-gemini_error,
|
||||
.event.status-validation_error,
|
||||
.event.status-metadata_write_error {
|
||||
border-left-color: var(--danger);
|
||||
}
|
||||
.event-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.event-name {
|
||||
font-weight: 600;
|
||||
flex: 1;
|
||||
min-width: 200px;
|
||||
word-break: break-all;
|
||||
}
|
||||
.event-meta {
|
||||
color: var(--text-dim);
|
||||
font-size: 0.85rem;
|
||||
margin-top: 0.2rem;
|
||||
}
|
||||
.event-desc {
|
||||
margin: 0.5rem 0 0.25rem;
|
||||
font-size: 0.92rem;
|
||||
color: #e8e8e8;
|
||||
}
|
||||
.event-error {
|
||||
color: var(--danger);
|
||||
font-size: 0.85rem;
|
||||
margin: 0.4rem 0 0;
|
||||
}
|
||||
.event-foot {
|
||||
margin-top: 0.5rem;
|
||||
font-size: 0.75rem;
|
||||
color: var(--text-dim);
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.boxlink {
|
||||
font-weight: 600;
|
||||
font-size: 0.85rem;
|
||||
text-decoration: none;
|
||||
}
|
||||
.boxlink:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.tags {
|
||||
margin-top: 0.4rem;
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.3rem;
|
||||
}
|
||||
.tag {
|
||||
background: #000;
|
||||
border: 1px solid var(--line);
|
||||
padding: 0.15rem 0.5rem;
|
||||
border-radius: 999px;
|
||||
font-size: 0.78rem;
|
||||
color: var(--text);
|
||||
}
|
||||
.tag strong {
|
||||
color: var(--accent);
|
||||
font-weight: 600;
|
||||
margin-right: 0.2rem;
|
||||
}
|
||||
|
||||
.pill {
|
||||
display: inline-block;
|
||||
font-size: 0.7rem;
|
||||
font-weight: 700;
|
||||
letter-spacing: 0.05em;
|
||||
padding: 0.15rem 0.5rem;
|
||||
border-radius: 999px;
|
||||
text-transform: uppercase;
|
||||
background: var(--line);
|
||||
color: var(--text);
|
||||
}
|
||||
.pill.status-success,
|
||||
.pill.state-completed {
|
||||
background: var(--ok);
|
||||
color: #000;
|
||||
}
|
||||
.pill.status-gemini_error,
|
||||
.pill.status-validation_error,
|
||||
.pill.status-metadata_write_error,
|
||||
.pill.state-failed {
|
||||
background: var(--danger);
|
||||
color: #000;
|
||||
}
|
||||
.pill.state-running {
|
||||
background: var(--accent);
|
||||
color: #000;
|
||||
}
|
||||
.badge.type-image {
|
||||
background: #fff;
|
||||
color: #000;
|
||||
}
|
||||
.badge.type-video {
|
||||
background: var(--accent);
|
||||
color: #000;
|
||||
}
|
||||
|
||||
.runs {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
.runs th,
|
||||
.runs td {
|
||||
text-align: left;
|
||||
padding: 0.45rem 0.5rem;
|
||||
border-bottom: 1px solid var(--line);
|
||||
}
|
||||
.runs th {
|
||||
color: var(--text-dim);
|
||||
font-weight: 600;
|
||||
font-size: 0.78rem;
|
||||
letter-spacing: 0.04em;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.mono {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
|
||||
color: var(--text-dim);
|
||||
}
|
||||
12
frontend/src/vite-env.d.ts
vendored
Normal file
12
frontend/src/vite-env.d.ts
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
/// <reference types="vite/client" />
|
||||
|
||||
interface ImportMetaEnv {
|
||||
readonly VITE_DEV_AUTH_BYPASS?: string;
|
||||
readonly VITE_AZURE_TENANT_ID?: string;
|
||||
readonly VITE_AZURE_CLIENT_ID?: string;
|
||||
readonly VITE_PUBLIC_BASE?: string;
|
||||
}
|
||||
|
||||
interface ImportMeta {
|
||||
readonly env: ImportMetaEnv;
|
||||
}
|
||||
20
frontend/tsconfig.json
Normal file
20
frontend/tsconfig.json
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"useDefineForClassFields": true,
|
||||
"lib": ["ES2022", "DOM", "DOM.Iterable"],
|
||||
"module": "ESNext",
|
||||
"skipLibCheck": true,
|
||||
"moduleResolution": "Bundler",
|
||||
"allowImportingTsExtensions": false,
|
||||
"isolatedModules": true,
|
||||
"moduleDetection": "force",
|
||||
"noEmit": true,
|
||||
"jsx": "react-jsx",
|
||||
"strict": true,
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"noFallthroughCasesInSwitch": true
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
33
frontend/vite.config.ts
Normal file
33
frontend/vite.config.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import { defineConfig, loadEnv } from "vite";
|
||||
import react from "@vitejs/plugin-react";
|
||||
|
||||
// In prod the SPA is served from /marriott-tagging/ (Apache alias). In dev (vite)
|
||||
// it's at /. VITE_PUBLIC_BASE controls which one.
|
||||
export default defineConfig(({ mode }) => {
|
||||
const env = loadEnv(mode, process.cwd(), "");
|
||||
const publicBase = env.VITE_PUBLIC_BASE || "http://localhost:5173";
|
||||
// `base` controls the prefix Vite bakes into asset URLs.
|
||||
let base = "/";
|
||||
try {
|
||||
const u = new URL(publicBase);
|
||||
base = u.pathname.endsWith("/") ? u.pathname : u.pathname + "/";
|
||||
} catch {
|
||||
// fallthrough — base stays "/"
|
||||
}
|
||||
|
||||
return {
|
||||
base,
|
||||
plugins: [react()],
|
||||
server: {
|
||||
port: 5173,
|
||||
proxy: {
|
||||
// In dev the SPA hits /api/* directly; vite forwards to FastAPI
|
||||
// running on host port MARRIOTT_API_PORT (default 8004).
|
||||
"/api": {
|
||||
target: `http://127.0.0.1:${env.MARRIOTT_API_PORT || "8004"}`,
|
||||
changeOrigin: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
});
|
||||
130
main.py
130
main.py
|
|
@ -11,11 +11,14 @@ import os
|
|||
import re
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
from PIL import Image
|
||||
|
||||
import db
|
||||
|
||||
from box_sdk_gen import BoxClient, BoxJWTAuth, JWTConfig, BoxAPIError
|
||||
from box_sdk_gen.managers.file_metadata import (
|
||||
CreateFileMetadataByIdScope,
|
||||
|
|
@ -704,6 +707,29 @@ def main():
|
|||
print("Marriott Box Asset Tagger")
|
||||
print("=" * 60)
|
||||
|
||||
run_id = uuid.uuid4()
|
||||
print(f"Run ID: {run_id}")
|
||||
|
||||
# Open DB connection (best-effort: DB is auxiliary, never blocks the pass).
|
||||
db_conn = None
|
||||
try:
|
||||
db_conn = db.get_conn()
|
||||
db.ensure_schema(db_conn)
|
||||
print("Postgres logging enabled.")
|
||||
except Exception as e:
|
||||
print(f"WARN: Postgres unavailable ({type(e).__name__}: {e}) — continuing without DB logging.")
|
||||
|
||||
try:
|
||||
_run_pass(run_id, db_conn)
|
||||
finally:
|
||||
if db_conn is not None:
|
||||
try:
|
||||
db_conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _run_pass(run_id, db_conn):
|
||||
# Initialize clients
|
||||
box_client = init_box_client()
|
||||
gemini_client = init_gemini_client()
|
||||
|
|
@ -775,19 +801,42 @@ def main():
|
|||
image_prompt = build_gemini_prompt(template_schema, file_name, folder_path)
|
||||
|
||||
# Analyze with Gemini
|
||||
gemini_started = time.monotonic()
|
||||
raw_metadata = analyze_image_with_gemini(gemini_client, image_bytes, mime_type, image_prompt)
|
||||
gemini_elapsed_ms = int((time.monotonic() - gemini_started) * 1000)
|
||||
if raw_metadata is None:
|
||||
img_errored += 1
|
||||
db.log_event(
|
||||
db_conn, run_id=run_id, file_id=file_id, file_name=file_name,
|
||||
folder_path=folder_path, media_type="image", gemini_model=GEMINI_MODEL,
|
||||
prompt=image_prompt, status="gemini_error",
|
||||
error_message="analyze_image_with_gemini returned None",
|
||||
duration_ms=gemini_elapsed_ms,
|
||||
)
|
||||
continue
|
||||
|
||||
# Extract description before validation
|
||||
description = raw_metadata.pop("description", None)
|
||||
# Snapshot the raw Gemini response (including description) for the DB.
|
||||
raw_for_log = dict(raw_metadata)
|
||||
if description is not None:
|
||||
raw_for_log["description"] = description
|
||||
|
||||
# Validate and clean
|
||||
cleaned_metadata = validate_and_clean_metadata(raw_metadata, template_schema)
|
||||
if not cleaned_metadata:
|
||||
print(f" WARNING: No valid metadata fields after validation — skipping")
|
||||
img_errored += 1
|
||||
db.log_event(
|
||||
db_conn, run_id=run_id, file_id=file_id, file_name=file_name,
|
||||
folder_path=folder_path, media_type="image", gemini_model=GEMINI_MODEL,
|
||||
prompt=image_prompt, raw_response=raw_for_log,
|
||||
description=description if isinstance(description, str) else None,
|
||||
validated_metadata=cleaned_metadata,
|
||||
status="validation_error",
|
||||
error_message="No valid metadata fields after validation",
|
||||
duration_ms=gemini_elapsed_ms,
|
||||
)
|
||||
continue
|
||||
|
||||
print(f" Metadata: {json.dumps(cleaned_metadata, indent=2)}")
|
||||
|
|
@ -795,13 +844,36 @@ def main():
|
|||
# Write metadata to Box
|
||||
if not write_metadata_to_box(box_client, file_id, cleaned_metadata, file_name):
|
||||
img_errored += 1
|
||||
db.log_event(
|
||||
db_conn, run_id=run_id, file_id=file_id, file_name=file_name,
|
||||
folder_path=folder_path, media_type="image", gemini_model=GEMINI_MODEL,
|
||||
prompt=image_prompt, raw_response=raw_for_log,
|
||||
description=description if isinstance(description, str) else None,
|
||||
validated_metadata=cleaned_metadata,
|
||||
metadata_write_success=False,
|
||||
status="metadata_write_error",
|
||||
error_message="write_metadata_to_box returned False",
|
||||
duration_ms=gemini_elapsed_ms,
|
||||
)
|
||||
continue
|
||||
|
||||
# Write description to Box
|
||||
description_write_success = None
|
||||
if description and isinstance(description, str):
|
||||
write_description_to_box(box_client, file_id, description, file_name)
|
||||
description_write_success = write_description_to_box(box_client, file_id, description, file_name)
|
||||
|
||||
img_tagged += 1
|
||||
db.log_event(
|
||||
db_conn, run_id=run_id, file_id=file_id, file_name=file_name,
|
||||
folder_path=folder_path, media_type="image", gemini_model=GEMINI_MODEL,
|
||||
prompt=image_prompt, raw_response=raw_for_log,
|
||||
description=description if isinstance(description, str) else None,
|
||||
validated_metadata=cleaned_metadata,
|
||||
metadata_write_success=True,
|
||||
description_write_success=description_write_success,
|
||||
status="success",
|
||||
duration_ms=gemini_elapsed_ms,
|
||||
)
|
||||
|
||||
# Rate limit delay (skip after last file)
|
||||
if i < img_total or video_files:
|
||||
|
|
@ -856,20 +928,45 @@ def main():
|
|||
video_prompt = build_video_prompt(template_schema, file_name, folder_path)
|
||||
|
||||
# Analyze with Gemini
|
||||
gemini_started = time.monotonic()
|
||||
raw_metadata = analyze_video_with_gemini(gemini_client, video_bytes, mime_type, video_prompt)
|
||||
gemini_elapsed_ms = int((time.monotonic() - gemini_started) * 1000)
|
||||
if raw_metadata is None:
|
||||
vid_errored += 1
|
||||
db.log_event(
|
||||
db_conn, run_id=run_id, file_id=file_id, file_name=file_name,
|
||||
folder_path=folder_path, media_type="video", gemini_model=GEMINI_MODEL,
|
||||
prompt=video_prompt, status="gemini_error",
|
||||
error_message="analyze_video_with_gemini returned None",
|
||||
duration_ms=gemini_elapsed_ms,
|
||||
)
|
||||
continue
|
||||
|
||||
# Extract description and scenes before validation
|
||||
description = raw_metadata.pop("description", None)
|
||||
scenes = raw_metadata.pop("scenes", None)
|
||||
raw_for_log = dict(raw_metadata)
|
||||
if description is not None:
|
||||
raw_for_log["description"] = description
|
||||
if scenes is not None:
|
||||
raw_for_log["scenes"] = scenes
|
||||
|
||||
# Validate and clean
|
||||
cleaned_metadata = validate_and_clean_metadata(raw_metadata, template_schema)
|
||||
if not cleaned_metadata:
|
||||
print(f" WARNING: No valid metadata fields after validation — skipping")
|
||||
vid_errored += 1
|
||||
db.log_event(
|
||||
db_conn, run_id=run_id, file_id=file_id, file_name=file_name,
|
||||
folder_path=folder_path, media_type="video", gemini_model=GEMINI_MODEL,
|
||||
prompt=video_prompt, raw_response=raw_for_log,
|
||||
description=description if isinstance(description, str) else None,
|
||||
scenes=scenes,
|
||||
validated_metadata=cleaned_metadata,
|
||||
status="validation_error",
|
||||
error_message="No valid metadata fields after validation",
|
||||
duration_ms=gemini_elapsed_ms,
|
||||
)
|
||||
continue
|
||||
|
||||
print(f" Metadata: {json.dumps(cleaned_metadata, indent=2)}")
|
||||
|
|
@ -879,17 +976,44 @@ def main():
|
|||
# Write metadata to Box
|
||||
if not write_metadata_to_box(box_client, file_id, cleaned_metadata, file_name):
|
||||
vid_errored += 1
|
||||
db.log_event(
|
||||
db_conn, run_id=run_id, file_id=file_id, file_name=file_name,
|
||||
folder_path=folder_path, media_type="video", gemini_model=GEMINI_MODEL,
|
||||
prompt=video_prompt, raw_response=raw_for_log,
|
||||
description=description if isinstance(description, str) else None,
|
||||
scenes=scenes,
|
||||
validated_metadata=cleaned_metadata,
|
||||
metadata_write_success=False,
|
||||
status="metadata_write_error",
|
||||
error_message="write_metadata_to_box returned False",
|
||||
duration_ms=gemini_elapsed_ms,
|
||||
)
|
||||
continue
|
||||
|
||||
# Write description to Box
|
||||
description_write_success = None
|
||||
if description and isinstance(description, str):
|
||||
write_description_to_box(box_client, file_id, description, file_name)
|
||||
description_write_success = write_description_to_box(box_client, file_id, description, file_name)
|
||||
|
||||
# Write scene breakdown as comment
|
||||
scene_comment_write_success = None
|
||||
if scenes and isinstance(scenes, (str, list)):
|
||||
write_scene_comment_to_box(box_client, file_id, scenes, file_name)
|
||||
scene_comment_write_success = write_scene_comment_to_box(box_client, file_id, scenes, file_name)
|
||||
|
||||
vid_tagged += 1
|
||||
db.log_event(
|
||||
db_conn, run_id=run_id, file_id=file_id, file_name=file_name,
|
||||
folder_path=folder_path, media_type="video", gemini_model=GEMINI_MODEL,
|
||||
prompt=video_prompt, raw_response=raw_for_log,
|
||||
description=description if isinstance(description, str) else None,
|
||||
scenes=scenes,
|
||||
validated_metadata=cleaned_metadata,
|
||||
metadata_write_success=True,
|
||||
description_write_success=description_write_success,
|
||||
scene_comment_write_success=scene_comment_write_success,
|
||||
status="success",
|
||||
duration_ms=gemini_elapsed_ms,
|
||||
)
|
||||
|
||||
# Rate limit delay (skip after last video)
|
||||
if i < vid_total:
|
||||
|
|
|
|||
|
|
@ -2,3 +2,9 @@ box-sdk-gen[jwt]
|
|||
google-genai
|
||||
Pillow
|
||||
python-dotenv
|
||||
psycopg[binary]
|
||||
apscheduler
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
PyJWT[crypto]
|
||||
httpx
|
||||
|
|
|
|||
82
scheduler.py
Normal file
82
scheduler.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
"""
|
||||
Long-running scheduler entrypoint for the Marriott Box tagger Docker container.
|
||||
|
||||
Replaces the Ubuntu systemd timer when running under Docker. Fires main.main()
|
||||
on the schedule in $SCHEDULE_CRON (default: daily 02:00). If $RUN_AT_STARTUP=1,
|
||||
also fires an immediate one-off pass on boot.
|
||||
|
||||
DB schema is bootstrapped once at startup if Postgres is reachable; main() also
|
||||
re-checks per run, so a temporary DB outage during startup self-heals.
|
||||
"""
|
||||
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from dotenv import load_dotenv
|
||||
|
||||
import db
|
||||
import main as tagger
|
||||
|
||||
|
||||
def _bootstrap_db():
|
||||
"""Best-effort: open a connection, apply schema. Failures are logged, not fatal."""
|
||||
try:
|
||||
conn = db.get_conn()
|
||||
db.ensure_schema(conn)
|
||||
conn.close()
|
||||
print("[scheduler] Postgres schema ensured.")
|
||||
except Exception as e:
|
||||
print(f"[scheduler] WARN: could not bootstrap Postgres ({type(e).__name__}: {e}).")
|
||||
|
||||
|
||||
def _run_job():
|
||||
print(f"\n[scheduler] Firing tagging pass at {time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
|
||||
try:
|
||||
tagger.main()
|
||||
except SystemExit as e:
|
||||
# main() calls sys.exit() on missing credentials; let the scheduler keep running.
|
||||
print(f"[scheduler] tagging pass exited with code {e.code} — scheduler stays up.")
|
||||
except Exception as e:
|
||||
print(f"[scheduler] tagging pass raised {type(e).__name__}: {e} — scheduler stays up.")
|
||||
|
||||
|
||||
def main():
|
||||
load_dotenv()
|
||||
_bootstrap_db()
|
||||
|
||||
cron_expr = os.getenv("SCHEDULE_CRON", "0 2 * * *").strip()
|
||||
tz = os.getenv("TZ") # apscheduler reads tzinfo; if unset uses system local
|
||||
print(f"[scheduler] Cron schedule: '{cron_expr}' (TZ={tz or 'system'})")
|
||||
|
||||
scheduler = BlockingScheduler(timezone=tz) if tz else BlockingScheduler()
|
||||
scheduler.add_job(
|
||||
_run_job,
|
||||
CronTrigger.from_crontab(cron_expr, timezone=tz) if tz else CronTrigger.from_crontab(cron_expr),
|
||||
id="tagging_pass",
|
||||
max_instances=1,
|
||||
coalesce=True,
|
||||
misfire_grace_time=3600,
|
||||
)
|
||||
|
||||
# SIGTERM/SIGINT → graceful shutdown
|
||||
def _shutdown(signum, frame):
|
||||
print(f"[scheduler] Received signal {signum} — shutting down.")
|
||||
scheduler.shutdown(wait=False)
|
||||
sys.exit(0)
|
||||
signal.signal(signal.SIGTERM, _shutdown)
|
||||
signal.signal(signal.SIGINT, _shutdown)
|
||||
|
||||
if os.getenv("RUN_AT_STARTUP", "").strip() in ("1", "true", "yes"):
|
||||
print("[scheduler] RUN_AT_STARTUP set — firing one pass now.")
|
||||
_run_job()
|
||||
|
||||
print("[scheduler] Entering scheduler loop. Ctrl-C / SIGTERM to exit.")
|
||||
scheduler.start()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
29
schema.sql
Normal file
29
schema.sql
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
-- Marriott Box Tagger — request log
|
||||
-- One row per file the tagger sent to Gemini (success or error).
|
||||
-- Skipped-as-already-tagged files do not produce rows.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS tagging_events (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
run_id UUID NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
file_id TEXT NOT NULL,
|
||||
file_name TEXT NOT NULL,
|
||||
folder_path TEXT,
|
||||
media_type TEXT NOT NULL CHECK (media_type IN ('image','video')),
|
||||
gemini_model TEXT NOT NULL,
|
||||
prompt TEXT,
|
||||
raw_response JSONB,
|
||||
description TEXT,
|
||||
scenes JSONB,
|
||||
validated_metadata JSONB,
|
||||
metadata_write_success BOOLEAN,
|
||||
description_write_success BOOLEAN,
|
||||
scene_comment_write_success BOOLEAN,
|
||||
status TEXT NOT NULL,
|
||||
error_message TEXT,
|
||||
duration_ms INTEGER
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS tagging_events_run_id_idx ON tagging_events (run_id);
|
||||
CREATE INDEX IF NOT EXISTS tagging_events_file_id_idx ON tagging_events (file_id);
|
||||
CREATE INDEX IF NOT EXISTS tagging_events_created_idx ON tagging_events (created_at DESC);
|
||||
Loading…
Add table
Reference in a new issue