diff --git a/.env.example b/.env.example index 8517bf6..ff734d7 100644 --- a/.env.example +++ b/.env.example @@ -32,6 +32,13 @@ DEV_AUTH_EMAIL=dev@oliver.agency # goes in both the backend (token validation) and the frontend (MSAL login). AZURE_TENANT_ID= AZURE_CLIENT_ID= +# Comma-separated list of admin emails. Only these accounts can click +# "Run now" or "Backfill from Box". Everyone else gets a read-only UI. +# Example: ADMIN_EMAILS=alice@oliver.agency,bob@oliver.agency +ADMIN_EMAILS= +# When DEV_AUTH_BYPASS=true the dev user is admin by default. Flip to false +# to test the read-only UI without enabling SSO. +DEV_AUTH_IS_ADMIN=true # Frontend mirrors — Vite reads VITE_* at build time and bakes them into dist. # Keep these in sync with the values above. VITE_DEV_AUTH_BYPASS=true diff --git a/api.py b/api.py index 08ca8d6..906c234 100644 --- a/api.py +++ b/api.py @@ -23,7 +23,7 @@ from fastapi.middleware.cors import CORSMiddleware from psycopg.rows import dict_row import db -from auth import User, maybe_auth_info, require_auth +from auth import User, maybe_auth_info, require_admin, require_auth BOX_FILE_URL = "https://app.box.com/file/{file_id}" @@ -86,26 +86,83 @@ def me(user: User = Depends(require_auth)): # ── Search ────────────────────────────────────────────────────────────────── -SEARCH_SQL = """ -SELECT id, run_id, created_at, file_id, file_name, folder_path, media_type, - gemini_model, description, scenes, validated_metadata, raw_response, - metadata_write_success, description_write_success, scene_comment_write_success, - status, error_message, duration_ms -FROM tagging_events -WHERE - (%(q)s = '' OR ( - file_name ILIKE %(like)s - OR folder_path ILIKE %(like)s - OR description ILIKE %(like)s - OR status ILIKE %(like)s - OR file_id ILIKE %(like)s - OR coalesce(validated_metadata::text, '') ILIKE %(like)s - OR coalesce(raw_response::text, '') ILIKE %(like)s - OR coalesce(scenes::text, '') ILIKE %(like)s - )) -ORDER BY created_at DESC -LIMIT %(limit)s -""" +# Columns the search ILIKE walks over (substring match, case-insensitive). +_SEARCH_COLS = [ + "file_name", + "folder_path", + "description", + "status", + "file_id", + "coalesce(validated_metadata::text, '')", + "coalesce(raw_response::text, '')", + "coalesce(scenes::text, '')", +] + +# Single concatenated text used for trigram similarity (fuzzy / typo tolerance). +_SEARCH_BLOB = ( + "coalesce(file_name,'')||' '||coalesce(folder_path,'')||' '||" + "coalesce(description,'')||' '||coalesce(validated_metadata::text,'')||' '||" + "coalesce(scenes::text,'')" +) + +# Fuzzy threshold for trigram similarity. 0.3 catches typos like +# "femalle" → "female" without flooding the results with noise. +_SIM_THRESHOLD = 0.3 + +# Short tokens (1-2 chars) are too noisy for trigrams — fall back to substring +# match only for those. +_MIN_FUZZY_TOKEN_LEN = 3 + + +def _build_search_sql(q: str, limit: int): + """ + Tokenise the query on whitespace, AND-match every token across the columns, + where each token may match by substring OR by trigram similarity. Results + ranked by summed similarity score, then recency. + """ + tokens = [t for t in q.strip().split() if t] + common_cols = ( + "id, run_id, created_at, file_id, file_name, folder_path, media_type, " + "gemini_model, description, scenes, validated_metadata, raw_response, " + "metadata_write_success, description_write_success, scene_comment_write_success, " + "status, error_message, duration_ms" + ) + + if not tokens: + return ( + f"SELECT {common_cols} FROM tagging_events " + f"ORDER BY created_at DESC LIMIT %(limit)s", + {"limit": limit}, + ) + + params: dict = {"limit": limit} + clauses: list[str] = [] + score_terms: list[str] = [] + for i, tok in enumerate(tokens): + like_key = f"like_{i}" + sim_key = f"sim_{i}" + params[like_key] = f"%{tok}%" + params[sim_key] = tok + col_ors = " OR ".join(f"{c} ILIKE %({like_key})s" for c in _SEARCH_COLS) + if len(tok) >= _MIN_FUZZY_TOKEN_LEN: + clauses.append( + f"(({col_ors}) " + f"OR similarity({_SEARCH_BLOB}, %({sim_key})s) > {_SIM_THRESHOLD})" + ) + score_terms.append(f"similarity({_SEARCH_BLOB}, %({sim_key})s)") + else: + clauses.append(f"({col_ors})") + + where = " AND ".join(clauses) + score_sql = " + ".join(score_terms) if score_terms else "0" + sql = ( + f"SELECT {common_cols}, ({score_sql}) AS _score " + f"FROM tagging_events " + f"WHERE {where} " + f"ORDER BY _score DESC, created_at DESC " + f"LIMIT %(limit)s" + ) + return sql, params def _event_to_dict(row): @@ -125,10 +182,10 @@ def search_events( limit: int = Query(100, ge=1, le=500), user: User = Depends(require_auth), ): - like = f"%{q}%" + sql, params = _build_search_sql(q, limit) with _conn() as c: with c.cursor(row_factory=dict_row) as cur: - cur.execute(SEARCH_SQL, {"q": q, "like": like, "limit": limit}) + cur.execute(sql, params) rows = cur.fetchall() return {"q": q, "count": len(rows), "results": [_event_to_dict(r) for r in rows]} @@ -168,7 +225,7 @@ def _run_pass_in_thread(run_id: uuid.UUID): @app.post("/api/runs") -def start_run(user: User = Depends(require_auth)): +def start_run(user: User = Depends(require_admin)): run_id = uuid.uuid4() t = threading.Thread(target=_run_pass_in_thread, args=(run_id,), daemon=True) t.start() @@ -205,7 +262,7 @@ def _run_backfill_in_thread(run_id: uuid.UUID): @app.post("/api/backfill") -def start_backfill(user: User = Depends(require_auth)): +def start_backfill(user: User = Depends(require_admin)): """ Walk the Box folder and mirror any existing marriottUsa metadata into the local DB as `status='backfilled'` rows. Use this after first deploy (or diff --git a/auth.py b/auth.py index 4f40284..b907170 100644 --- a/auth.py +++ b/auth.py @@ -19,6 +19,17 @@ AZURE_TENANT_ID = os.getenv("AZURE_TENANT_ID", "").strip() AZURE_CLIENT_ID = os.getenv("AZURE_CLIENT_ID", "").strip() DEV_AUTH_BYPASS = os.getenv("DEV_AUTH_BYPASS", "").strip().lower() in ("1", "true", "yes") +# Comma-separated allowlist of admin emails. Case-insensitive. Members can +# trigger destructive endpoints (Run now, Backfill). Everyone else can read. +_ADMIN_EMAILS = { + e.strip().lower() + for e in os.getenv("ADMIN_EMAILS", "").split(",") + if e.strip() +} +# In bypass mode the dev user is admin by default — set DEV_AUTH_IS_ADMIN=false +# to test the non-admin UX without flipping to MSAL. +_DEV_AUTH_IS_ADMIN = os.getenv("DEV_AUTH_IS_ADMIN", "true").strip().lower() in ("1", "true", "yes") + JWKS_URL = f"https://login.microsoftonline.com/{AZURE_TENANT_ID}/discovery/v2.0/keys" if AZURE_TENANT_ID else None ISSUERS = ( f"https://login.microsoftonline.com/{AZURE_TENANT_ID}/v2.0", @@ -41,22 +52,35 @@ def _get_jwks_client() -> PyJWKClient: class User: - def __init__(self, *, oid: str, name: str, email: str, dev: bool = False): + def __init__(self, *, oid: str, name: str, email: str, dev: bool = False, is_admin: bool = False): self.oid = oid self.name = name self.email = email self.dev = dev + self.is_admin = is_admin def to_dict(self): - return {"oid": self.oid, "name": self.name, "email": self.email, "dev": self.dev} + return { + "oid": self.oid, + "name": self.name, + "email": self.email, + "dev": self.dev, + "is_admin": self.is_admin, + } + + +def _is_admin_email(email: str) -> bool: + return bool(email) and email.strip().lower() in _ADMIN_EMAILS def _bypass_user() -> User: + email = os.getenv("DEV_AUTH_EMAIL", "dev@oliver.agency") return User( oid="dev-bypass", name=os.getenv("DEV_AUTH_NAME", "Dev User"), - email=os.getenv("DEV_AUTH_EMAIL", "dev@oliver.agency"), + email=email, dev=True, + is_admin=_DEV_AUTH_IS_ADMIN or _is_admin_email(email), ) @@ -94,17 +118,30 @@ def require_auth(authorization: Optional[str] = Header(default=None)) -> User: except httpx.HTTPError as e: raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=f"JWKS fetch failed: {e}") + email = claims.get("preferred_username") or claims.get("upn") or claims.get("email", "") return User( oid=claims.get("oid") or claims.get("sub", "unknown"), name=claims.get("name", ""), - email=claims.get("preferred_username") or claims.get("upn") or claims.get("email", ""), + email=email, + is_admin=_is_admin_email(email), ) +def require_admin(user: User = Depends(require_auth)) -> User: + """403 unless the caller is in ADMIN_EMAILS (or is a bypass-admin dev user).""" + if not user.is_admin: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Admin-only endpoint", + ) + return user + + def maybe_auth_info(): """Diagnostic helper for /api/health: report whether auth is wired.""" return { "dev_bypass": DEV_AUTH_BYPASS, "tenant_configured": bool(AZURE_TENANT_ID), "client_configured": bool(AZURE_CLIENT_ID), + "admin_emails_configured": len(_ADMIN_EMAILS), } diff --git a/docker-compose.yml b/docker-compose.yml index a639d92..8f0ebd5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -47,8 +47,14 @@ services: DEV_AUTH_BYPASS: ${DEV_AUTH_BYPASS:-true} AZURE_TENANT_ID: ${AZURE_TENANT_ID:-} AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-} + # Comma-separated list of admin emails — only these accounts can hit + # POST /api/runs and POST /api/backfill. Everyone else is read-only. + ADMIN_EMAILS: ${ADMIN_EMAILS:-} DEV_AUTH_EMAIL: ${DEV_AUTH_EMAIL:-dev@oliver.agency} DEV_AUTH_NAME: ${DEV_AUTH_NAME:-Dev User} + # In DEV_AUTH_BYPASS mode the dev user is admin by default; flip to + # false here if you want to test the non-admin UX without enabling SSO. + DEV_AUTH_IS_ADMIN: ${DEV_AUTH_IS_ADMIN:-true} # CORS for local dev: when Vite is on :5173 and FastAPI on host:8004. # Empty in prod — Apache serves SPA and API under the same origin. CORS_ORIGINS: ${CORS_ORIGINS:-} diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 3bfa22e..69aef9d 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -4,6 +4,7 @@ import { Event, Run, listRuns, + me as fetchMe, runEvents, searchEvents, startBackfill, @@ -17,6 +18,8 @@ export function App() { const [searching, setSearching] = useState(false); const [error, setError] = useState(null); + const [isAdmin, setIsAdmin] = useState(false); + const [activeRun, setActiveRun] = useState(null); const [activeRunKind, setActiveRunKind] = useState<"tag" | "backfill" | null>(null); const [activeRunEvents, setActiveRunEvents] = useState([]); @@ -52,11 +55,14 @@ export function App() { } }, [auth.getToken]); - // Initial load: recent rows + recent runs. + // Initial load: who am I + recent rows + recent runs. useEffect(() => { + fetchMe(auth.getToken) + .then((u) => setIsAdmin(Boolean(u.is_admin))) + .catch(() => setIsAdmin(false)); doSearch(""); refreshRuns(); - }, [doSearch, refreshRuns]); + }, [auth.getToken, doSearch, refreshRuns]); // Poll active run. useEffect(() => { @@ -180,24 +186,28 @@ export function App() { - - + {isAdmin && ( + <> + + + + )} {error &&

{error}

} diff --git a/frontend/src/api.ts b/frontend/src/api.ts index 8997902..f2dd264 100644 --- a/frontend/src/api.ts +++ b/frontend/src/api.ts @@ -104,3 +104,13 @@ export function runEvents(getToken: GetTokenFn, runId: string) { export function health() { return fetch(`${API_BASE}/health`).then((r) => r.json()); } + +export function me(getToken: GetTokenFn) { + return req<{ + oid: string; + name: string; + email: string; + dev: boolean; + is_admin: boolean; + }>(`/me`, getToken); +} diff --git a/schema.sql b/schema.sql index 8b088de..9d36a21 100644 --- a/schema.sql +++ b/schema.sql @@ -2,6 +2,9 @@ -- One row per file the tagger sent to Gemini (success or error). -- Skipped-as-already-tagged files do not produce rows. +-- pg_trgm powers the fuzzy `similarity()` call in /api/events. Idempotent. +CREATE EXTENSION IF NOT EXISTS pg_trgm; + CREATE TABLE IF NOT EXISTS tagging_events ( id BIGSERIAL PRIMARY KEY, run_id UUID NOT NULL,