olivas/backend/app/services/cloud_run_client.py
Vadym Samoilenko 2c5e17c7c4 Add Google Cloud Run offloading for ML inference and image processing
- Create cloud_run/saliency: FastAPI service running DeepGaze I/IIE/III
  on Cloud Run (4 vCPU, 16GB RAM); pre-downloads model weights in Docker
  build to eliminate cold-start delays; returns saliency map + gaze
  sequence + hotspots + design scores
- Create cloud_run/processing: lightweight FastAPI service for heatmap
  generation and gaze sequence visualization (2 vCPU, 4GB RAM)
- Add cloud_run/deploy.sh for gcloud deployment to project optical-414516
  in region europe-west2
- Refactor analysis pipeline to route via Cloud Run when
  CLOUD_RUN_SALIENCY_URL is set, with local fallback for dev mode
- Add cloud_run_client.py with sync httpx wrappers for background tasks
- Split pyproject.toml: base = API-only deps, [ml] = torch/deepgaze for
  local dev; production Dockerfile is now lightweight (~no PyTorch)
- Preserve Dockerfile.full + docker-compose.dev.yml for local ML dev
- Auth via X-Internal-Secret header (CLOUD_RUN_SECRET env var)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-04 19:39:52 +00:00

82 lines
2.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""HTTP client for OliVAS Cloud Run services.
Provides sync wrappers suitable for use inside BackgroundTasks threadpool.
Falls back to local processing if Cloud Run URLs are not configured.
"""
import base64
import logging
from typing import Any
import httpx
from app.config import settings
logger = logging.getLogger("olivas.cloud_run")
# 5-minute timeout for saliency inference (DeepGaze on CPU can take ~3060s on warm,
# but first request after cold start may take longer)
_SALIENCY_TIMEOUT = httpx.Timeout(360.0, connect=15.0)
_PROCESSING_TIMEOUT = httpx.Timeout(120.0, connect=10.0)
def _headers() -> dict[str, str]:
if settings.CLOUD_RUN_SECRET:
return {"X-Internal-Secret": settings.CLOUD_RUN_SECRET}
return {}
def call_saliency(image_data: bytes, model_name: str) -> dict[str, Any]:
"""POST image to olivas-saliency and return JSON with saliency + metrics."""
url = settings.CLOUD_RUN_SALIENCY_URL.rstrip("/") + "/predict"
logger.info(f"Calling saliency service: model={model_name} image={len(image_data)}B")
with httpx.Client(timeout=_SALIENCY_TIMEOUT) as client:
response = client.post(
url,
files={"image": ("image.png", image_data, "image/png")},
data={"model": model_name},
headers=_headers(),
)
if response.status_code != 200:
raise RuntimeError(
f"Saliency service returned {response.status_code}: {response.text[:500]}"
)
result = response.json()
logger.info(
f"Saliency done: score={result.get('overall_score')} entropy={result.get('entropy_score')}"
)
return result
def call_processing(
image_data: bytes,
saliency_b64: str,
shape: list[int],
gaze_sequence: list[dict],
) -> dict[str, str]:
"""POST image + saliency to olivas-processing and return base64-encoded result images."""
url = settings.CLOUD_RUN_PROCESSING_URL.rstrip("/") + "/process"
image_b64 = base64.b64encode(image_data).decode()
logger.info(f"Calling processing service: image={len(image_data)}B shape={shape}")
with httpx.Client(timeout=_PROCESSING_TIMEOUT) as client:
response = client.post(
url,
json={
"image_b64": image_b64,
"saliency_b64": saliency_b64,
"shape": shape,
"gaze_sequence": gaze_sequence,
},
headers=_headers(),
)
if response.status_code != 200:
raise RuntimeError(
f"Processing service returned {response.status_code}: {response.text[:500]}"
)
return response.json()