Add Google Cloud Run offloading for ML inference and image processing

- Create cloud_run/saliency: FastAPI service running DeepGaze I/IIE/III
  on Cloud Run (4 vCPU, 16GB RAM); pre-downloads model weights in Docker
  build to eliminate cold-start delays; returns saliency map + gaze
  sequence + hotspots + design scores
- Create cloud_run/processing: lightweight FastAPI service for heatmap
  generation and gaze sequence visualization (2 vCPU, 4GB RAM)
- Add cloud_run/deploy.sh for gcloud deployment to project optical-414516
  in region europe-west2
- Refactor analysis pipeline to route via Cloud Run when
  CLOUD_RUN_SALIENCY_URL is set, with local fallback for dev mode
- Add cloud_run_client.py with sync httpx wrappers for background tasks
- Split pyproject.toml: base = API-only deps, [ml] = torch/deepgaze for
  local dev; production Dockerfile is now lightweight (~no PyTorch)
- Preserve Dockerfile.full + docker-compose.dev.yml for local ML dev
- Auth via X-Internal-Secret header (CLOUD_RUN_SECRET env var)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-03-04 19:39:52 +00:00
parent c1b80eb9a7
commit 2c5e17c7c4
15 changed files with 910 additions and 87 deletions

View file

@ -3,17 +3,14 @@ FROM python:3.12-slim
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
libgl1 libglib2.0-0 curl git && \
libgl1 libglib2.0-0 curl && \
rm -rf /var/lib/apt/lists/*
# Copy dependency spec first for layer caching
COPY pyproject.toml .
# Install dependencies (non-editable, just deps)
RUN pip install --no-cache-dir . && \
pip install --no-cache-dir psycopg2-binary einops ftfy regex && \
pip install --no-cache-dir "clip @ git+https://github.com/openai/CLIP.git" && \
pip install --no-cache-dir "deepgaze-pytorch @ git+https://github.com/matthias-k/DeepGaze.git"
# Install lightweight dependencies only (no PyTorch/DeepGaze — those run on Cloud Run)
RUN pip install --no-cache-dir ".[api]"
# Copy application source
COPY . .

23
backend/Dockerfile.full Normal file
View file

@ -0,0 +1,23 @@
FROM python:3.12-slim
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
libgl1 libglib2.0-0 curl git && \
rm -rf /var/lib/apt/lists/*
# Copy dependency spec first for layer caching
COPY pyproject.toml .
# Install all dependencies including ML stack
RUN pip install --no-cache-dir ".[ml]" && \
pip install --no-cache-dir "clip @ git+https://github.com/openai/CLIP.git" && \
pip install --no-cache-dir "deepgaze-pytorch @ git+https://github.com/matthias-k/DeepGaze.git"
# Copy application source
COPY . .
RUN mkdir -p data/uploads
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

View file

@ -94,29 +94,44 @@ async def create_analysis(
return AnalysisStatus(id=analysis_id, status="pending")
def run_analysis_pipeline(analysis_id: str, image_data: bytes, model_name: str):
"""Background task: full saliency analysis pipeline. Runs sync in threadpool."""
import asyncio
import logging
import numpy as np
from app.services.saliency.model_manager import model_manager
from app.services.image_processing import prepare_for_inference, upscale_saliency
from app.services.heatmap import generate_heatmap_overlay, generate_standalone_heatmap
from app.services.gaze_sequence import extract_gaze_sequence
logger = logging.getLogger("olivas.pipeline")
# Use sync DB connection for background thread
def _make_sync_engine():
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from app.config import settings
try:
return create_engine(settings.DATABASE_URL.replace("+asyncpg", "+psycopg2"))
except Exception:
return create_engine(settings.DATABASE_URL.replace("+asyncpg", ""))
def _save_file(path, data: bytes) -> None:
with open(path, "wb") as f:
f.write(data)
def run_analysis_pipeline(analysis_id: str, image_data: bytes, model_name: str):
"""Background task: full saliency analysis pipeline. Runs sync in threadpool.
Routes to Cloud Run if CLOUD_RUN_SALIENCY_URL is configured, otherwise runs locally.
"""
from app.config import settings
sync_url = settings.DATABASE_URL.replace("+asyncpg", "").replace("postgresql://", "postgresql+psycopg2://")
# Use psycopg2 if available, otherwise fallback
try:
sync_engine = create_engine(settings.DATABASE_URL.replace("+asyncpg", "+psycopg2"))
except Exception:
sync_engine = create_engine(settings.DATABASE_URL.replace("+asyncpg", ""))
if settings.use_cloud_run:
_run_pipeline_cloud_run(analysis_id, image_data, model_name)
else:
_run_pipeline_local(analysis_id, image_data, model_name)
def _run_pipeline_cloud_run(analysis_id: str, image_data: bytes, model_name: str):
"""Pipeline using Google Cloud Run for saliency + image processing."""
import base64
import logging
import numpy as np
from sqlalchemy.orm import Session
from app.services.cloud_run_client import call_saliency, call_processing
from app.config import settings
logger = logging.getLogger("olivas.pipeline")
sync_engine = _make_sync_engine()
try:
with Session(sync_engine) as db:
@ -124,75 +139,63 @@ def run_analysis_pipeline(analysis_id: str, image_data: bytes, model_name: str):
analysis.status = "processing"
db.commit()
logger.info(f"Starting analysis {analysis_id}")
logger.info(f"[cloud-run] Starting analysis {analysis_id}")
# 1. Saliency inference via Cloud Run
sal_result = call_saliency(image_data, model_name)
saliency_b64: str = sal_result["saliency_b64"]
shape: list[int] = sal_result["shape"]
gaze_seq: list[dict] = sal_result["gaze_sequence"]
hotspots: list[dict] = sal_result["hotspots"]
overall_score: float = sal_result["overall_score"]
entropy_score: float = sal_result["entropy_score"]
# 2. Save raw saliency locally
h, w = shape
saliency_full = np.frombuffer(
base64.b64decode(saliency_b64), dtype=np.float32
).reshape(h, w)
np.save(str(storage.get_path(analysis_id, "saliency_raw.npy")), saliency_full)
# 3. Save saliency grayscale PNG locally
saliency_uint8 = (saliency_full * 255).astype(np.uint8)
saliency_img = Image.fromarray(saliency_uint8, mode="L")
buf = io.BytesIO()
saliency_img.save(buf, format="PNG")
_save_file(storage.get_path(analysis_id, "saliency_gray.png"), buf.getvalue())
# 4. Image post-processing via Cloud Run (or local fallback)
if settings.CLOUD_RUN_PROCESSING_URL:
proc_result = call_processing(image_data, saliency_b64, shape, gaze_seq)
heatmap_overlay_data = base64.b64decode(proc_result["heatmap_overlay_b64"])
heatmap_standalone_data = base64.b64decode(proc_result["heatmap_standalone_b64"])
gaze_img_data = base64.b64decode(proc_result["gaze_sequence_img_b64"])
else:
# Local fallback for image processing
from app.services.heatmap import generate_heatmap_overlay, generate_standalone_heatmap
image = Image.open(io.BytesIO(image_data)).convert("RGB")
heatmap_overlay_data = _img_to_png_bytes(generate_heatmap_overlay(image, saliency_full))
heatmap_standalone_data = _img_to_png_bytes(generate_standalone_heatmap(saliency_full))
gaze_img_data = _img_to_png_bytes(_draw_gaze_sequence(image, gaze_seq))
# 1. Resize for inference
resized, scale = prepare_for_inference(image)
logger.info(f"Image resized: {image.size} -> {resized.size}")
_save_file(storage.get_path(analysis_id, "heatmap_overlay.png"), heatmap_overlay_data)
_save_file(storage.get_path(analysis_id, "heatmap_standalone.png"), heatmap_standalone_data)
_save_file(storage.get_path(analysis_id, "gaze_sequence.png"), gaze_img_data)
# 2. Run saliency model
logger.info(f"Running {model_name} inference...")
saliency = model_manager.predict(resized, model_name)
logger.info("Inference complete")
# 3. Upscale to original dimensions
saliency_full = upscale_saliency(saliency, image.height, image.width)
# 4. Save raw saliency as .npy
np.save(str(storage.get_path(analysis_id, "saliency_raw.npy")), saliency_full)
# 5. Save saliency as grayscale PNG
saliency_uint8 = (saliency_full * 255).astype(np.uint8)
saliency_img = Image.fromarray(saliency_uint8, mode="L")
buf = io.BytesIO()
saliency_img.save(buf, format="PNG")
with open(storage.get_path(analysis_id, "saliency_gray.png"), "wb") as f:
f.write(buf.getvalue())
# 6. Generate heatmap overlay
heatmap_overlay = generate_heatmap_overlay(image, saliency_full)
buf = io.BytesIO()
heatmap_overlay.save(buf, format="PNG")
with open(storage.get_path(analysis_id, "heatmap_overlay.png"), "wb") as f:
f.write(buf.getvalue())
# 7. Generate standalone heatmap
heatmap_standalone = generate_standalone_heatmap(saliency_full)
buf = io.BytesIO()
heatmap_standalone.save(buf, format="PNG")
with open(storage.get_path(analysis_id, "heatmap_standalone.png"), "wb") as f:
f.write(buf.getvalue())
# 8. Extract gaze sequence
gaze_seq = extract_gaze_sequence(saliency_full, num_fixations=5)
# 9. Extract hotspots (needed for composite score)
hotspots = _extract_hotspots(saliency_full, num_hotspots=5)
# 10. Compute composite design effectiveness score
overall_score, entropy_score = _compute_design_score(
saliency_full, hotspots, gaze_seq
)
# 11. Generate gaze sequence image
gaze_img = _draw_gaze_sequence(image, gaze_seq)
buf = io.BytesIO()
gaze_img.save(buf, format="PNG")
with open(storage.get_path(analysis_id, "gaze_sequence.png"), "wb") as f:
f.write(buf.getvalue())
# Update DB
# 5. Update DB
with Session(sync_engine) as db:
analysis = db.get(Analysis, analysis_id)
analysis.status = "completed"
analysis.gaze_sequence = gaze_seq
analysis.hotspots = hotspots
analysis.overall_score = overall_score
analysis.entropy_score = entropy_score
db.commit()
logger.info(f"Analysis {analysis_id} completed (score={overall_score}, entropy={entropy_score})")
logger.info(f"[cloud-run] Analysis {analysis_id} completed (score={overall_score})")
except Exception as e:
logger.error(f"Analysis {analysis_id} failed: {e}", exc_info=True)
logger.error(f"[cloud-run] Analysis {analysis_id} failed: {e}", exc_info=True)
try:
with Session(sync_engine) as db:
analysis = db.get(Analysis, analysis_id)
@ -203,6 +206,84 @@ def run_analysis_pipeline(analysis_id: str, image_data: bytes, model_name: str):
pass
def _run_pipeline_local(analysis_id: str, image_data: bytes, model_name: str):
"""Pipeline running entirely locally (dev mode / no Cloud Run)."""
import logging
import numpy as np
from sqlalchemy.orm import Session
from app.services.saliency.model_manager import model_manager
from app.services.image_processing import prepare_for_inference, upscale_saliency
from app.services.heatmap import generate_heatmap_overlay, generate_standalone_heatmap
from app.services.gaze_sequence import extract_gaze_sequence
logger = logging.getLogger("olivas.pipeline")
sync_engine = _make_sync_engine()
try:
with Session(sync_engine) as db:
analysis = db.get(Analysis, analysis_id)
analysis.status = "processing"
db.commit()
logger.info(f"[local] Starting analysis {analysis_id}")
image = Image.open(io.BytesIO(image_data)).convert("RGB")
resized, scale = prepare_for_inference(image)
logger.info(f"Image resized: {image.size} -> {resized.size}")
logger.info(f"Running {model_name} inference...")
saliency = model_manager.predict(resized, model_name)
logger.info("Inference complete")
saliency_full = upscale_saliency(saliency, image.height, image.width)
np.save(str(storage.get_path(analysis_id, "saliency_raw.npy")), saliency_full)
saliency_uint8 = (saliency_full * 255).astype(np.uint8)
saliency_img = Image.fromarray(saliency_uint8, mode="L")
buf = io.BytesIO()
saliency_img.save(buf, format="PNG")
_save_file(storage.get_path(analysis_id, "saliency_gray.png"), buf.getvalue())
heatmap_overlay = generate_heatmap_overlay(image, saliency_full)
_save_file(storage.get_path(analysis_id, "heatmap_overlay.png"), _img_to_png_bytes(heatmap_overlay))
heatmap_standalone = generate_standalone_heatmap(saliency_full)
_save_file(storage.get_path(analysis_id, "heatmap_standalone.png"), _img_to_png_bytes(heatmap_standalone))
gaze_seq = extract_gaze_sequence(saliency_full, num_fixations=5)
hotspots = _extract_hotspots(saliency_full, num_hotspots=5)
overall_score, entropy_score = _compute_design_score(saliency_full, hotspots, gaze_seq)
gaze_img = _draw_gaze_sequence(image, gaze_seq)
_save_file(storage.get_path(analysis_id, "gaze_sequence.png"), _img_to_png_bytes(gaze_img))
analysis.status = "completed"
analysis.gaze_sequence = gaze_seq
analysis.hotspots = hotspots
analysis.overall_score = overall_score
analysis.entropy_score = entropy_score
db.commit()
logger.info(f"[local] Analysis {analysis_id} completed (score={overall_score}, entropy={entropy_score})")
except Exception as e:
logger.error(f"[local] Analysis {analysis_id} failed: {e}", exc_info=True)
try:
with Session(sync_engine) as db:
analysis = db.get(Analysis, analysis_id)
if analysis:
analysis.status = "failed"
db.commit()
except Exception:
pass
def _img_to_png_bytes(img: Image.Image) -> bytes:
buf = io.BytesIO()
img.save(buf, format="PNG")
return buf.getvalue()
def _compute_design_score(saliency_full, hotspots, gaze_seq):
"""Compute composite Design Effectiveness Score (0-100) and raw entropy score.

View file

@ -10,6 +10,17 @@ class Settings(BaseSettings):
BACKEND_HOST: str = "0.0.0.0"
BACKEND_PORT: int = 8000
# Google Cloud Run service URLs (empty = use local processing)
CLOUD_RUN_SALIENCY_URL: str = "" # e.g. https://olivas-saliency-xxx-ew.a.run.app
CLOUD_RUN_PROCESSING_URL: str = "" # e.g. https://olivas-processing-xxx-ew.a.run.app
CLOUD_RUN_SECRET: str = "" # Shared secret for X-Internal-Secret header
GOOGLE_CLOUD_PROJECT: str = "optical-414516"
@property
def use_cloud_run(self) -> bool:
return bool(self.CLOUD_RUN_SALIENCY_URL)
@property
def device(self) -> str:
if self.DEVICE == "auto":

View file

@ -0,0 +1,82 @@
"""HTTP client for OliVAS Cloud Run services.
Provides sync wrappers suitable for use inside BackgroundTasks threadpool.
Falls back to local processing if Cloud Run URLs are not configured.
"""
import base64
import logging
from typing import Any
import httpx
from app.config import settings
logger = logging.getLogger("olivas.cloud_run")
# 5-minute timeout for saliency inference (DeepGaze on CPU can take ~3060s on warm,
# but first request after cold start may take longer)
_SALIENCY_TIMEOUT = httpx.Timeout(360.0, connect=15.0)
_PROCESSING_TIMEOUT = httpx.Timeout(120.0, connect=10.0)
def _headers() -> dict[str, str]:
if settings.CLOUD_RUN_SECRET:
return {"X-Internal-Secret": settings.CLOUD_RUN_SECRET}
return {}
def call_saliency(image_data: bytes, model_name: str) -> dict[str, Any]:
"""POST image to olivas-saliency and return JSON with saliency + metrics."""
url = settings.CLOUD_RUN_SALIENCY_URL.rstrip("/") + "/predict"
logger.info(f"Calling saliency service: model={model_name} image={len(image_data)}B")
with httpx.Client(timeout=_SALIENCY_TIMEOUT) as client:
response = client.post(
url,
files={"image": ("image.png", image_data, "image/png")},
data={"model": model_name},
headers=_headers(),
)
if response.status_code != 200:
raise RuntimeError(
f"Saliency service returned {response.status_code}: {response.text[:500]}"
)
result = response.json()
logger.info(
f"Saliency done: score={result.get('overall_score')} entropy={result.get('entropy_score')}"
)
return result
def call_processing(
image_data: bytes,
saliency_b64: str,
shape: list[int],
gaze_sequence: list[dict],
) -> dict[str, str]:
"""POST image + saliency to olivas-processing and return base64-encoded result images."""
url = settings.CLOUD_RUN_PROCESSING_URL.rstrip("/") + "/process"
image_b64 = base64.b64encode(image_data).decode()
logger.info(f"Calling processing service: image={len(image_data)}B shape={shape}")
with httpx.Client(timeout=_PROCESSING_TIMEOUT) as client:
response = client.post(
url,
json={
"image_b64": image_b64,
"saliency_b64": saliency_b64,
"shape": shape,
"gaze_sequence": gaze_sequence,
},
headers=_headers(),
)
if response.status_code != 200:
raise RuntimeError(
f"Processing service returned {response.status_code}: {response.text[:500]}"
)
return response.json()

View file

@ -10,21 +10,33 @@ dependencies = [
"sqlalchemy[asyncio]>=2.0",
"alembic>=1.13",
"asyncpg>=0.29",
"psycopg2-binary>=2.9",
"pydantic>=2.0",
"pydantic-settings>=2.0",
"pillow>=10.0",
"numpy>=1.26",
"torch>=2.1",
"torchvision>=0.16",
"scipy>=1.11",
"matplotlib>=3.8",
"scikit-image>=0.22",
"reportlab>=4.0",
"aiofiles>=23.0",
"anthropic>=0.40",
"httpx>=0.27",
]
[project.optional-dependencies]
# Lightweight production install (same as base — alias for clarity)
api = []
# Full local ML stack for development without Cloud Run
ml = [
"torch>=2.1",
"torchvision>=0.16",
"scikit-image>=0.22",
"einops",
"ftfy",
"regex",
]
dev = [
"pytest>=8.0",
"pytest-asyncio>=0.23",

96
cloud_run/deploy.sh Executable file
View file

@ -0,0 +1,96 @@
#!/usr/bin/env bash
# Deploy OliVAS Cloud Run services to Google Cloud Run
# Project: optical-414516 | Region: europe-west2
#
# Usage:
# ./cloud_run/deploy.sh [saliency|processing|all]
#
# Requirements:
# - gcloud CLI authenticated: gcloud auth login
# - Project set: gcloud config set project optical-414516
# - Cloud Run API enabled
# - CLOUD_RUN_SECRET env var set (shared auth secret)
set -euo pipefail
PROJECT="optical-414516"
REGION="europe-west2"
SECRET="${CLOUD_RUN_SECRET:-}"
if [ -z "$SECRET" ]; then
echo "WARNING: CLOUD_RUN_SECRET is not set. Services will be unauthenticated."
fi
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
gcloud config set project "$PROJECT"
deploy_saliency() {
echo "=== Deploying olivas-saliency ==="
gcloud run deploy olivas-saliency \
--source "$SCRIPT_DIR/saliency" \
--region "$REGION" \
--platform managed \
--allow-unauthenticated \
--cpu 4 \
--memory 16Gi \
--timeout 300 \
--concurrency 1 \
--min-instances 0 \
--max-instances 3 \
--set-env-vars "CLOUD_RUN_SECRET=${SECRET},DEVICE=cpu" \
--no-cpu-throttling
SALIENCY_URL=$(gcloud run services describe olivas-saliency \
--region "$REGION" --format "value(status.url)")
echo "olivas-saliency deployed: $SALIENCY_URL"
}
deploy_processing() {
echo "=== Deploying olivas-processing ==="
gcloud run deploy olivas-processing \
--source "$SCRIPT_DIR/processing" \
--region "$REGION" \
--platform managed \
--allow-unauthenticated \
--cpu 2 \
--memory 4Gi \
--timeout 120 \
--concurrency 2 \
--min-instances 0 \
--max-instances 5 \
--set-env-vars "CLOUD_RUN_SECRET=${SECRET}"
PROCESSING_URL=$(gcloud run services describe olivas-processing \
--region "$REGION" --format "value(status.url)")
echo "olivas-processing deployed: $PROCESSING_URL"
}
TARGET="${1:-all}"
case "$TARGET" in
saliency)
deploy_saliency
;;
processing)
deploy_processing
;;
all)
deploy_saliency
deploy_processing
echo ""
echo "=== Deployment complete ==="
echo "Set these env vars on your VPS (in .env or docker-compose.yml):"
SALIENCY_URL=$(gcloud run services describe olivas-saliency \
--region "$REGION" --format "value(status.url)" 2>/dev/null || echo "<saliency-url>")
PROCESSING_URL=$(gcloud run services describe olivas-processing \
--region "$REGION" --format "value(status.url)" 2>/dev/null || echo "<processing-url>")
echo " CLOUD_RUN_SALIENCY_URL=$SALIENCY_URL"
echo " CLOUD_RUN_PROCESSING_URL=$PROCESSING_URL"
echo " CLOUD_RUN_SECRET=$SECRET"
;;
*)
echo "Usage: $0 [saliency|processing|all]"
exit 1
;;
esac

View file

@ -0,0 +1,19 @@
FROM python:3.12-slim
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
libgl1 libglib2.0-0 && \
rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY main.py .
ENV PYTHONUNBUFFERED=1
ENV PORT=8080
EXPOSE 8080
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

View file

@ -0,0 +1,112 @@
"""OliVAS Processing Cloud Run Service.
Handles image post-processing from saliency maps:
- Heatmap overlay generation
- Standalone heatmap generation
- Gaze sequence visualization image
"""
import base64
import io
import logging
import os
import matplotlib
matplotlib.use("Agg")
import matplotlib.colormaps
import numpy as np
from fastapi import FastAPI, Header, HTTPException
from PIL import Image, ImageDraw, ImageFont
from pydantic import BaseModel
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("olivas.processing")
INTERNAL_SECRET = os.environ.get("CLOUD_RUN_SECRET", "")
app = FastAPI(title="OliVAS Processing Service")
def _check_auth(x_internal_secret: str | None) -> None:
if INTERNAL_SECRET and x_internal_secret != INTERNAL_SECRET:
raise HTTPException(status_code=401, detail="Unauthorized")
class ProcessRequest(BaseModel):
image_b64: str
saliency_b64: str
shape: list[int] # [H, W]
gaze_sequence: list[dict]
def _img_to_b64(img: Image.Image) -> str:
buf = io.BytesIO()
img.save(buf, format="PNG")
return base64.b64encode(buf.getvalue()).decode()
def _decode_saliency(saliency_b64: str, shape: list[int]) -> np.ndarray:
h, w = shape
raw = base64.b64decode(saliency_b64)
return np.frombuffer(raw, dtype=np.float32).reshape(h, w)
def _generate_heatmap_overlay(image: Image.Image, saliency: np.ndarray) -> Image.Image:
cmap = matplotlib.colormaps.get_cmap("jet")
heatmap_rgba = cmap(saliency)
heatmap_rgb = (heatmap_rgba[:, :, :3] * 255).astype(np.uint8)
heatmap_img = Image.fromarray(heatmap_rgb).resize(image.size, Image.LANCZOS)
return Image.blend(image.convert("RGB"), heatmap_img, 0.5)
def _generate_standalone_heatmap(saliency: np.ndarray) -> Image.Image:
cmap = matplotlib.colormaps.get_cmap("jet")
heatmap_rgba = cmap(saliency)
return Image.fromarray((heatmap_rgba[:, :, :3] * 255).astype(np.uint8))
def _draw_gaze_sequence(image: Image.Image, gaze_seq: list[dict]) -> Image.Image:
img = image.copy()
draw = ImageDraw.Draw(img)
font = ImageFont.load_default(size=24)
colors = ["#FF4444", "#FF8800", "#FFCC00", "#44CC44", "#4488FF"]
for i, point in enumerate(gaze_seq):
x, y = point["x"], point["y"]
color = colors[i % len(colors)]
r = 25
draw.ellipse([x - r, y - r, x + r, y + r], outline=color, width=3)
draw.text((x - 6, y - 12), str(point["rank"]), fill=color, font=font)
if i < len(gaze_seq) - 1:
nx, ny = gaze_seq[i + 1]["x"], gaze_seq[i + 1]["y"]
draw.line([x, y, nx, ny], fill=color, width=2)
return img
@app.get("/health")
async def health():
return {"status": "ok"}
@app.post("/process")
async def process_images(
request: ProcessRequest,
x_internal_secret: str | None = Header(None),
):
_check_auth(x_internal_secret)
image_data = base64.b64decode(request.image_b64)
saliency = _decode_saliency(request.saliency_b64, request.shape)
image = Image.open(io.BytesIO(image_data)).convert("RGB")
logger.info(f"Processing image {image.size}, saliency {saliency.shape}")
overlay = _generate_heatmap_overlay(image, saliency)
standalone = _generate_standalone_heatmap(saliency)
gaze_img = _draw_gaze_sequence(image, request.gaze_sequence)
return {
"heatmap_overlay_b64": _img_to_b64(overlay),
"heatmap_standalone_b64": _img_to_b64(standalone),
"gaze_sequence_img_b64": _img_to_b64(gaze_img),
}

View file

@ -0,0 +1,8 @@
fastapi>=0.115.0
uvicorn[standard]>=0.30.0
pillow>=10.0
numpy>=1.26
matplotlib>=3.8
scipy>=1.11
reportlab>=4.0
pydantic>=2.0

View file

@ -0,0 +1,30 @@
FROM python:3.12-slim
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
libgl1 libglib2.0-0 curl git && \
rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt && \
pip install --no-cache-dir einops ftfy regex && \
pip install --no-cache-dir "clip @ git+https://github.com/openai/CLIP.git" && \
pip install --no-cache-dir "deepgaze-pytorch @ git+https://github.com/matthias-k/DeepGaze.git"
# Pre-download DeepGaze IIE weights during build to eliminate cold-start downloads
RUN python -c "\
import deepgaze_pytorch; \
print('Pre-loading DeepGaze IIE weights...'); \
deepgaze_pytorch.DeepGazeIIE(pretrained=True); \
print('Weights cached.')"
COPY main.py .
ENV PYTHONUNBUFFERED=1
ENV PORT=8080
EXPOSE 8080
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

331
cloud_run/saliency/main.py Normal file
View file

@ -0,0 +1,331 @@
"""OliVAS Saliency Cloud Run Service.
Runs DeepGaze saliency inference and returns:
- saliency map (base64 float32 bytes)
- gaze sequence
- hotspots
- design effectiveness scores
"""
import base64
import io
import logging
import os
from contextlib import asynccontextmanager
import numpy as np
from fastapi import FastAPI, File, Form, Header, HTTPException, UploadFile
from PIL import Image
from scipy.ndimage import gaussian_filter, zoom
from scipy.special import logsumexp
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("olivas.saliency")
INTERNAL_SECRET = os.environ.get("CLOUD_RUN_SECRET", "")
DEVICE = os.environ.get("DEVICE", "auto")
# Global model cache: {model_key: {"model": ..., "centerbias": ...}}
_model_cache: dict = {}
VARIANT_MAP = {
"deepgaze_i": ("DeepGazeI", "I"),
"deepgaze_iie": ("DeepGazeIIE", "IIE"),
"deepgaze_iii": ("DeepGazeIII", "III"),
}
@asynccontextmanager
async def lifespan(app: FastAPI):
logger.info("OliVAS Saliency service starting")
yield
_model_cache.clear()
logger.info("OliVAS Saliency service stopped")
app = FastAPI(title="OliVAS Saliency Service", lifespan=lifespan)
def _check_auth(x_internal_secret: str | None) -> None:
if INTERNAL_SECRET and x_internal_secret != INTERNAL_SECRET:
raise HTTPException(status_code=401, detail="Unauthorized")
def _resolve_device() -> str:
if DEVICE == "auto":
try:
import torch
return "cuda" if torch.cuda.is_available() else "cpu"
except ImportError:
return "cpu"
return DEVICE
def _get_model(model_name: str) -> dict:
device = _resolve_device()
key = f"{model_name}:{device}"
if key in _model_cache:
return _model_cache[key]
if model_name not in VARIANT_MAP:
raise ValueError(f"Unknown model: {model_name}. Choose from {list(VARIANT_MAP)}")
class_name, _ = VARIANT_MAP[model_name]
import torch
import deepgaze_pytorch
logger.info(f"Loading {class_name} on {device}...")
device_obj = torch.device(device)
model_cls = getattr(deepgaze_pytorch, class_name)
model = model_cls(pretrained=True).to(device_obj)
model.eval()
# Pre-compute centerbias template
size = 1024
x = np.linspace(-1, 1, size)
y = np.linspace(-1, 1, size)
xx, yy = np.meshgrid(x, y)
centerbias = -0.5 * (xx**2 + yy**2) / 0.5**2
_model_cache[key] = {"model": model, "centerbias": centerbias, "device": device_obj}
logger.info(f"Loaded {class_name}")
return _model_cache[key]
def _run_inference(image: Image.Image, model_name: str) -> np.ndarray:
import torch
model_data = _get_model(model_name)
model = model_data["model"]
centerbias_template = model_data["centerbias"]
device_obj = model_data["device"]
img_np = np.array(image.convert("RGB"))
h, w = img_np.shape[:2]
image_tensor = torch.tensor([img_np.transpose(2, 0, 1)]).float().to(device_obj)
cb = zoom(
centerbias_template,
(h / centerbias_template.shape[0], w / centerbias_template.shape[1]),
order=0,
)
cb -= logsumexp(cb)
centerbias_tensor = torch.tensor([cb]).float().to(device_obj)
with torch.no_grad():
log_density = model(image_tensor, centerbias_tensor)
saliency = torch.exp(log_density).cpu().numpy().squeeze()
sal_min, sal_max = saliency.min(), saliency.max()
if sal_max - sal_min > 1e-10:
saliency = (saliency - sal_min) / (sal_max - sal_min)
else:
saliency = np.zeros_like(saliency)
return saliency
def _prepare_for_inference(image: Image.Image, max_size: int = 1024) -> tuple[Image.Image, float]:
w, h = image.size
scale = max_size / max(w, h)
if scale < 1.0:
new_size = (int(w * scale), int(h * scale))
return image.resize(new_size, Image.LANCZOS), scale
return image, 1.0
def _upscale_saliency(saliency: np.ndarray, target_h: int, target_w: int) -> np.ndarray:
if saliency.shape == (target_h, target_w):
return saliency
h_scale = target_h / saliency.shape[0]
w_scale = target_w / saliency.shape[1]
return zoom(saliency, (h_scale, w_scale), order=1)
def _extract_gaze_sequence(saliency: np.ndarray, num_fixations: int = 5) -> list[dict]:
sal = saliency.copy().astype(np.float64)
h, w = sal.shape
inhibition_radius = int(max(h, w) * 0.1)
fixations = []
for rank in range(1, num_fixations + 1):
smoothed = gaussian_filter(sal, sigma=max(h, w) * 0.01)
if smoothed.max() < 1e-10:
break
peak_idx = np.unravel_index(np.argmax(smoothed), smoothed.shape)
py, px = int(peak_idx[0]), int(peak_idx[1])
prob = float(saliency[py, px])
fixations.append({
"rank": rank,
"x": px,
"y": py,
"x_pct": round(px / w * 100, 1),
"y_pct": round(py / h * 100, 1),
"probability": round(prob, 4),
})
yy, xx = np.ogrid[:h, :w]
mask = (xx - px) ** 2 + (yy - py) ** 2 <= inhibition_radius**2
sal[mask] = 0.0
return fixations
def _extract_hotspots(saliency: np.ndarray, num_hotspots: int = 5) -> list[dict]:
sal = saliency.copy()
h, w = sal.shape
hotspots = []
radius = int(max(h, w) * 0.08)
for i in range(num_hotspots):
smoothed = gaussian_filter(sal, sigma=max(h, w) * 0.015)
peak_idx = np.unravel_index(np.argmax(smoothed), smoothed.shape)
py, px = int(peak_idx[0]), int(peak_idx[1])
intensity = float(saliency[py, px])
x1, y1 = max(0, px - radius), max(0, py - radius)
x2, y2 = min(w, px + radius), min(h, py + radius)
hotspots.append({
"rank": i + 1,
"center_x": px,
"center_y": py,
"x": x1,
"y": y1,
"width": x2 - x1,
"height": y2 - y1,
"intensity": round(intensity, 4),
})
yy, xx = np.ogrid[:h, :w]
mask = (xx - px) ** 2 + (yy - py) ** 2 <= radius**2
sal[mask] = 0.0
return hotspots
def _compute_design_score(
saliency_full: np.ndarray, hotspots: list[dict], gaze_seq: list[dict]
) -> tuple[float, float]:
sal_sum = saliency_full.sum()
if sal_sum > 0:
prob_dist = saliency_full / sal_sum
prob_dist = prob_dist[prob_dist > 0]
entropy = -np.sum(prob_dist * np.log2(prob_dist))
max_entropy = np.log2(saliency_full.size)
raw_concentration = (1 - entropy / max_entropy) * 100
else:
raw_concentration = 0.0
entropy_score = round(float(np.clip(raw_concentration, 0, 100)), 1)
entropy_adjusted = float(np.sqrt(max(raw_concentration, 0) / 100)) * 100
if len(hotspots) >= 2:
top_intensity = hotspots[0]["intensity"]
rest_mean = float(np.mean([h["intensity"] for h in hotspots[1:]]))
dominance_ratio = top_intensity / rest_mean if rest_mean > 0 else 10.0
peak_dominance = float(100 * (1 - np.exp(-0.5 * dominance_ratio)))
elif len(hotspots) == 1:
peak_dominance = 95.0
else:
peak_dominance = 50.0
intensities = [h["intensity"] for h in hotspots]
n = len(intensities)
if n >= 2:
concordant = sum(
1 for i in range(n) for j in range(i + 1, n) if intensities[i] > intensities[j]
)
total_pairs = n * (n - 1) // 2
monotonicity = concordant / total_pairs if total_pairs > 0 else 1.0
drop_ratio = 1 - (intensities[-1] / intensities[0]) if intensities[0] > 0 else 0.0
hierarchy_clarity = float((0.6 * monotonicity + 0.4 * drop_ratio) * 100)
else:
hierarchy_clarity = 70.0
gaze_points = [(g["x"], g["y"]) for g in gaze_seq]
ng = len(gaze_points)
if ng >= 3:
angles = []
for i in range(ng - 2):
ax = gaze_points[i + 1][0] - gaze_points[i][0]
ay = gaze_points[i + 1][1] - gaze_points[i][1]
bx = gaze_points[i + 2][0] - gaze_points[i + 1][0]
by = gaze_points[i + 2][1] - gaze_points[i + 1][1]
mag_a = np.sqrt(ax**2 + ay**2)
mag_b = np.sqrt(bx**2 + by**2)
if mag_a > 0 and mag_b > 0:
cos_angle = np.clip((ax * bx + ay * by) / (mag_a * mag_b), -1, 1)
angles.append(float(np.degrees(np.arccos(cos_angle))))
avg_angle = float(np.mean(angles)) if angles else 70.0
angle_smoothness = max(0.0, 100 - (avg_angle / 180) * 100)
total_path = sum(
np.sqrt(
(gaze_points[i + 1][0] - gaze_points[i][0]) ** 2
+ (gaze_points[i + 1][1] - gaze_points[i][1]) ** 2
)
for i in range(ng - 1)
)
direct_dist = np.sqrt(
(gaze_points[-1][0] - gaze_points[0][0]) ** 2
+ (gaze_points[-1][1] - gaze_points[0][1]) ** 2
)
path_efficiency = float(direct_dist / total_path) if total_path > 0 else 1.0
gaze_coherence = 0.7 * angle_smoothness + 0.3 * (path_efficiency * 100)
else:
gaze_coherence = 70.0
composite = (
0.30 * peak_dominance
+ 0.25 * hierarchy_clarity
+ 0.25 * gaze_coherence
+ 0.20 * entropy_adjusted
)
overall_score = round(float(np.clip(composite, 0, 100)), 1)
return overall_score, entropy_score
@app.get("/health")
async def health():
return {"status": "ok", "device": _resolve_device()}
@app.post("/predict")
async def predict(
image: UploadFile = File(...),
model: str = Form("deepgaze_iie"),
x_internal_secret: str | None = Header(None),
):
_check_auth(x_internal_secret)
image_data = await image.read()
pil_image = Image.open(io.BytesIO(image_data)).convert("RGB")
orig_w, orig_h = pil_image.size
resized, _ = _prepare_for_inference(pil_image)
logger.info(f"Inference: model={model} original={orig_w}x{orig_h} resized={resized.size}")
saliency = _run_inference(resized, model)
saliency_full = _upscale_saliency(saliency, orig_h, orig_w)
gaze_sequence = _extract_gaze_sequence(saliency_full, num_fixations=5)
hotspots = _extract_hotspots(saliency_full, num_hotspots=5)
overall_score, entropy_score = _compute_design_score(saliency_full, hotspots, gaze_sequence)
saliency_b64 = base64.b64encode(saliency_full.astype(np.float32).tobytes()).decode()
logger.info(f"Done: score={overall_score} entropy={entropy_score}")
return {
"saliency_b64": saliency_b64,
"shape": [orig_h, orig_w],
"gaze_sequence": gaze_sequence,
"hotspots": hotspots,
"overall_score": overall_score,
"entropy_score": entropy_score,
}

View file

@ -0,0 +1,11 @@
fastapi>=0.115.0
uvicorn[standard]>=0.30.0
python-multipart>=0.0.9
pillow>=10.0
numpy>=1.26
torch>=2.1
torchvision>=0.16
scipy>=1.11
einops
ftfy
regex

View file

@ -2,9 +2,14 @@ services:
backend:
build:
context: ./backend
dockerfile: Dockerfile.full # Full image with PyTorch + DeepGaze for local dev
command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
environment:
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
CLOUD_RUN_SALIENCY_URL: ${CLOUD_RUN_SALIENCY_URL:-}
CLOUD_RUN_PROCESSING_URL: ${CLOUD_RUN_PROCESSING_URL:-}
CLOUD_RUN_SECRET: ${CLOUD_RUN_SECRET:-}
GOOGLE_CLOUD_PROJECT: ${GOOGLE_CLOUD_PROJECT:-optical-414516}
volumes:
- ./backend:/app
- uploads:/app/data/uploads

View file

@ -25,6 +25,11 @@ services:
DEVICE: auto
CORS_ORIGINS: http://localhost:1577
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
# Google Cloud Run — set these to enable Cloud Run offloading
CLOUD_RUN_SALIENCY_URL: ${CLOUD_RUN_SALIENCY_URL:-}
CLOUD_RUN_PROCESSING_URL: ${CLOUD_RUN_PROCESSING_URL:-}
CLOUD_RUN_SECRET: ${CLOUD_RUN_SECRET:-}
GOOGLE_CLOUD_PROJECT: ${GOOGLE_CLOUD_PROJECT:-optical-414516}
volumes:
- uploads:/app/data/uploads
depends_on: