- Create cloud_run/saliency: FastAPI service running DeepGaze I/IIE/III on Cloud Run (4 vCPU, 16GB RAM); pre-downloads model weights in Docker build to eliminate cold-start delays; returns saliency map + gaze sequence + hotspots + design scores - Create cloud_run/processing: lightweight FastAPI service for heatmap generation and gaze sequence visualization (2 vCPU, 4GB RAM) - Add cloud_run/deploy.sh for gcloud deployment to project optical-414516 in region europe-west2 - Refactor analysis pipeline to route via Cloud Run when CLOUD_RUN_SALIENCY_URL is set, with local fallback for dev mode - Add cloud_run_client.py with sync httpx wrappers for background tasks - Split pyproject.toml: base = API-only deps, [ml] = torch/deepgaze for local dev; production Dockerfile is now lightweight (~no PyTorch) - Preserve Dockerfile.full + docker-compose.dev.yml for local ML dev - Auth via X-Internal-Secret header (CLOUD_RUN_SECRET env var) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
82 lines
2.6 KiB
Python
82 lines
2.6 KiB
Python
"""HTTP client for OliVAS Cloud Run services.
|
||
|
||
Provides sync wrappers suitable for use inside BackgroundTasks threadpool.
|
||
Falls back to local processing if Cloud Run URLs are not configured.
|
||
"""
|
||
import base64
|
||
import logging
|
||
from typing import Any
|
||
|
||
import httpx
|
||
|
||
from app.config import settings
|
||
|
||
logger = logging.getLogger("olivas.cloud_run")
|
||
|
||
# 5-minute timeout for saliency inference (DeepGaze on CPU can take ~30–60s on warm,
|
||
# but first request after cold start may take longer)
|
||
_SALIENCY_TIMEOUT = httpx.Timeout(360.0, connect=15.0)
|
||
_PROCESSING_TIMEOUT = httpx.Timeout(120.0, connect=10.0)
|
||
|
||
|
||
def _headers() -> dict[str, str]:
|
||
if settings.CLOUD_RUN_SECRET:
|
||
return {"X-Internal-Secret": settings.CLOUD_RUN_SECRET}
|
||
return {}
|
||
|
||
|
||
def call_saliency(image_data: bytes, model_name: str) -> dict[str, Any]:
|
||
"""POST image to olivas-saliency and return JSON with saliency + metrics."""
|
||
url = settings.CLOUD_RUN_SALIENCY_URL.rstrip("/") + "/predict"
|
||
logger.info(f"Calling saliency service: model={model_name} image={len(image_data)}B")
|
||
|
||
with httpx.Client(timeout=_SALIENCY_TIMEOUT) as client:
|
||
response = client.post(
|
||
url,
|
||
files={"image": ("image.png", image_data, "image/png")},
|
||
data={"model": model_name},
|
||
headers=_headers(),
|
||
)
|
||
|
||
if response.status_code != 200:
|
||
raise RuntimeError(
|
||
f"Saliency service returned {response.status_code}: {response.text[:500]}"
|
||
)
|
||
|
||
result = response.json()
|
||
logger.info(
|
||
f"Saliency done: score={result.get('overall_score')} entropy={result.get('entropy_score')}"
|
||
)
|
||
return result
|
||
|
||
|
||
def call_processing(
|
||
image_data: bytes,
|
||
saliency_b64: str,
|
||
shape: list[int],
|
||
gaze_sequence: list[dict],
|
||
) -> dict[str, str]:
|
||
"""POST image + saliency to olivas-processing and return base64-encoded result images."""
|
||
url = settings.CLOUD_RUN_PROCESSING_URL.rstrip("/") + "/process"
|
||
image_b64 = base64.b64encode(image_data).decode()
|
||
|
||
logger.info(f"Calling processing service: image={len(image_data)}B shape={shape}")
|
||
|
||
with httpx.Client(timeout=_PROCESSING_TIMEOUT) as client:
|
||
response = client.post(
|
||
url,
|
||
json={
|
||
"image_b64": image_b64,
|
||
"saliency_b64": saliency_b64,
|
||
"shape": shape,
|
||
"gaze_sequence": gaze_sequence,
|
||
},
|
||
headers=_headers(),
|
||
)
|
||
|
||
if response.status_code != 200:
|
||
raise RuntimeError(
|
||
f"Processing service returned {response.status_code}: {response.text[:500]}"
|
||
)
|
||
|
||
return response.json()
|