ai_qc/backend/box_client.py
nickviljoen 7c3945417a Compute Box OAuth redirect URI from the request
Caught a redirect_uri_mismatch on the dev server: the env file was the
localhost one (BOX_REDIRECT_URI=http://localhost:7183/auth/box/callback)
which deploy.sh resets on every deploy, so the dev server kept telling Box
"redirect me to localhost". Same thing would have hit prod.

Switched to request-based detection so the same code works on laptop, dev,
and prod:
- box_client.build_authorize_url and exchange_code_for_tokens now take
  redirect_uri as an explicit parameter (the two URIs MUST match — Box
  rejects the token exchange otherwise).
- New _box_redirect_uri() helper in api_server: prefers BOX_REDIRECT_URI
  if explicitly set (escape hatch), otherwise reads X-Forwarded-Host (set
  by Apache when behind the optical-dev / optical-prod reverse proxy,
  where the app is mounted at /ai_qc/), and falls back to request.host
  for direct local access.
- Dropped the per-env BOX_REDIRECT_URI from the four env files. Templates
  keep it commented out as documentation, and now also list all three
  redirect URIs you'll need to register in the Box developer console.
- box_client.is_configured() no longer gates on the redirect URI.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 15:50:59 +02:00

189 lines
5.7 KiB
Python

#!/usr/bin/env python3
"""
Thin Box.com OAuth + REST client.
We use raw `requests` rather than the boxsdk package — the OAuth flow plus the
handful of folder/file endpoints we need is small enough that the SDK's extra
dependencies and abstractions aren't worth it. Tokens are persisted by
box_tokens.py.
OAuth state is signed with the Flask SECRET_KEY (HMAC via PyJWT) so a callback
can't be forged: the state encodes the originating user's email and is
verified before we exchange the code.
"""
import os
import time
import urllib.parse
from typing import Optional
import jwt
import requests
import box_tokens
BOX_AUTHORIZE_URL = 'https://account.box.com/api/oauth2/authorize'
BOX_TOKEN_URL = 'https://api.box.com/oauth2/token'
BOX_REVOKE_URL = 'https://api.box.com/oauth2/revoke'
BOX_API_BASE = 'https://api.box.com/2.0'
# Permissions we need from the user. "root_readwrite" is Box's blanket
# read/write to any folder the user has access to — the watcher needs to
# read, move, and upload, so a read-only scope wouldn't be enough.
DEFAULT_SCOPES = ['root_readwrite']
STATE_TTL_SECONDS = 600 # 10 minutes between hitting /auth/box/login and the callback
def _client_id() -> str:
return os.environ.get('BOX_CLIENT_ID', '')
def _client_secret() -> str:
return os.environ.get('BOX_CLIENT_SECRET', '')
def _secret_key() -> str:
# Same secret used for MSAL session signing; avoids a separate key to manage.
return os.environ.get('SECRET_KEY', 'dev-secret-key-change-me')
def is_configured() -> bool:
"""Whether the Box client + secret are configured. The redirect URI is now
derived from each request, so we don't gate on it here."""
return bool(_client_id() and _client_secret())
def make_state(user_email: str) -> str:
payload = {
'email': (user_email or '').strip().lower(),
'purpose': 'box_oauth',
'iat': int(time.time()),
'exp': int(time.time()) + STATE_TTL_SECONDS,
}
return jwt.encode(payload, _secret_key(), algorithm='HS256')
def verify_state(state: str) -> Optional[str]:
"""Returns the originating user email, or None if state is invalid/expired."""
if not state:
return None
try:
claims = jwt.decode(state, _secret_key(), algorithms=['HS256'])
except jwt.PyJWTError:
return None
if claims.get('purpose') != 'box_oauth':
return None
email = claims.get('email')
return email if email else None
def build_authorize_url(state: str, redirect_uri: str) -> str:
params = {
'response_type': 'code',
'client_id': _client_id(),
'redirect_uri': redirect_uri,
'state': state,
'scope': ' '.join(DEFAULT_SCOPES),
}
return BOX_AUTHORIZE_URL + '?' + urllib.parse.urlencode(params)
def exchange_code_for_tokens(code: str, redirect_uri: str) -> dict:
"""Trade an authorization code for an access + refresh token pair.
`redirect_uri` MUST match the one used in the authorize call — Box rejects
the exchange otherwise.
"""
resp = requests.post(
BOX_TOKEN_URL,
data={
'grant_type': 'authorization_code',
'code': code,
'client_id': _client_id(),
'client_secret': _client_secret(),
'redirect_uri': redirect_uri,
},
timeout=15,
)
resp.raise_for_status()
return resp.json()
def refresh_tokens(refresh_token: str) -> dict:
"""Refresh and return a NEW pair. Box rotates the refresh token on every call."""
resp = requests.post(
BOX_TOKEN_URL,
data={
'grant_type': 'refresh_token',
'refresh_token': refresh_token,
'client_id': _client_id(),
'client_secret': _client_secret(),
},
timeout=15,
)
resp.raise_for_status()
return resp.json()
def revoke_tokens(token: str) -> bool:
try:
resp = requests.post(
BOX_REVOKE_URL,
data={
'client_id': _client_id(),
'client_secret': _client_secret(),
'token': token,
},
timeout=10,
)
return resp.ok
except requests.RequestException:
return False
def get_box_user(access_token: str) -> dict:
"""GET /users/me — used after token exchange so we can record the Box identity."""
resp = requests.get(
f'{BOX_API_BASE}/users/me',
headers={'Authorization': f'Bearer {access_token}'},
timeout=10,
)
resp.raise_for_status()
return resp.json()
def get_valid_access_token(user_email: str) -> Optional[str]:
"""
Return a usable access token for `user_email`, refreshing if expired.
Returns None if the user has not connected Box, or if refresh fails.
Persists rotated refresh tokens automatically.
"""
record = box_tokens.get_tokens(user_email)
if not record:
return None
if not box_tokens.access_token_is_expired(record):
return record['access_token']
# Refresh.
try:
new_tokens = refresh_tokens(record['refresh_token'])
except requests.RequestException as e:
print(f'[box_client] refresh failed for {user_email}: {e}')
return None
box_tokens.save_tokens(user_email, new_tokens)
return new_tokens.get('access_token')
def list_folder_items(access_token: str, folder_id: str, limit: int = 100) -> dict:
"""GET /folders/{id}/items — minimal smoke-test wrapper."""
resp = requests.get(
f'{BOX_API_BASE}/folders/{folder_id}/items',
headers={'Authorization': f'Bearer {access_token}'},
params={'limit': limit, 'fields': 'id,name,type,size,created_at,created_by'},
timeout=15,
)
resp.raise_for_status()
return resp.json()