Add Google Cloud Run offloading for ML inference and image processing

- Create cloud_run/saliency: FastAPI service running DeepGaze I/IIE/III on Cloud Run (4 vCPU, 16GB RAM); pre-downloads model weights in Docker build to eliminate cold-start delays; returns saliency map + gaze sequence + hotspots + design scores - Create cloud_run/processing: lightweight FastAPI service for heatmap generation and gaze sequence visualization (2 vCPU, 4GB RAM) - Add cloud_run/deploy.sh for gcloud deployment to project optical-414516 in region europe-west2 - Refactor analysis pipeline to route via Cloud Run when CLOUD_RUN_SALIENCY_URL is set, with local fallback for dev mode - Add cloud_run_client.py with sync httpx wrappers for background tasks - Split pyproject.toml: base = API-only deps, [ml] = torch/deepgaze for local dev; production Dockerfile is now lightweight (~no PyTorch) - Preserve Dockerfile.full + docker-compose.dev.yml for local ML dev - Auth via X-Internal-Secret header (CLOUD_RUN_SECRET env var) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-04 19:39:52 +00:00 · 2026-03-04 19:39:52 +00:00 · 2c5e17c7c4
commit 2c5e17c7c4
parent c1b80eb9a7
15 changed files with 910 additions and 87 deletions
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -3,17 +3,14 @@ FROM python:3.12-slim
 WORKDIR /app

 RUN apt-get update && apt-get install -y --no-install-recommends \
-    libgl1 libglib2.0-0 curl git && \
+    libgl1 libglib2.0-0 curl && \
    rm -rf /var/lib/apt/lists/*

 # Copy dependency spec first for layer caching
 COPY pyproject.toml .

-# Install dependencies (non-editable, just deps)
-RUN pip install --no-cache-dir . && \
-    pip install --no-cache-dir psycopg2-binary einops ftfy regex && \
-    pip install --no-cache-dir "clip @ git+https://github.com/openai/CLIP.git" && \
-    pip install --no-cache-dir "deepgaze-pytorch @ git+https://github.com/matthias-k/DeepGaze.git"
+# Install lightweight dependencies only (no PyTorch/DeepGaze — those run on Cloud Run)
+RUN pip install --no-cache-dir ".[api]"

 # Copy application source
 COPY . .
--- a/backend/Dockerfile.full
+++ b/backend/Dockerfile.full
@ -0,0 +1,23 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 libglib2.0-0 curl git && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy dependency spec first for layer caching
+COPY pyproject.toml .
+
+# Install all dependencies including ML stack
+RUN pip install --no-cache-dir ".[ml]" && \
+    pip install --no-cache-dir "clip @ git+https://github.com/openai/CLIP.git" && \
+    pip install --no-cache-dir "deepgaze-pytorch @ git+https://github.com/matthias-k/DeepGaze.git"
+
+# Copy application source
+COPY . .
+
+RUN mkdir -p data/uploads
+
+EXPOSE 8000
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/backend/app/api/endpoints/analysis.py
+++ b/backend/app/api/endpoints/analysis.py
@ -94,29 +94,44 @@ async def create_analysis(
    return AnalysisStatus(id=analysis_id, status="pending")


-def run_analysis_pipeline(analysis_id: str, image_data: bytes, model_name: str):
-    """Background task: full saliency analysis pipeline. Runs sync in threadpool."""
-    import asyncio
-    import logging
-    import numpy as np
-    from app.services.saliency.model_manager import model_manager
-    from app.services.image_processing import prepare_for_inference, upscale_saliency
-    from app.services.heatmap import generate_heatmap_overlay, generate_standalone_heatmap
-    from app.services.gaze_sequence import extract_gaze_sequence
-
-    logger = logging.getLogger("olivas.pipeline")
-
-    # Use sync DB connection for background thread
+def _make_sync_engine():
    from sqlalchemy import create_engine
-    from sqlalchemy.orm import Session
+    from app.config import settings
+    try:
+        return create_engine(settings.DATABASE_URL.replace("+asyncpg", "+psycopg2"))
+    except Exception:
+        return create_engine(settings.DATABASE_URL.replace("+asyncpg", ""))
+
+
+def _save_file(path, data: bytes) -> None:
+    with open(path, "wb") as f:
+        f.write(data)
+
+
+def run_analysis_pipeline(analysis_id: str, image_data: bytes, model_name: str):
+    """Background task: full saliency analysis pipeline. Runs sync in threadpool.
+
+    Routes to Cloud Run if CLOUD_RUN_SALIENCY_URL is configured, otherwise runs locally.
+    """
    from app.config import settings

-    sync_url = settings.DATABASE_URL.replace("+asyncpg", "").replace("postgresql://", "postgresql+psycopg2://")
-    # Use psycopg2 if available, otherwise fallback
-    try:
-        sync_engine = create_engine(settings.DATABASE_URL.replace("+asyncpg", "+psycopg2"))
-    except Exception:
-        sync_engine = create_engine(settings.DATABASE_URL.replace("+asyncpg", ""))
+    if settings.use_cloud_run:
+        _run_pipeline_cloud_run(analysis_id, image_data, model_name)
+    else:
+        _run_pipeline_local(analysis_id, image_data, model_name)
+
+
+def _run_pipeline_cloud_run(analysis_id: str, image_data: bytes, model_name: str):
+    """Pipeline using Google Cloud Run for saliency + image processing."""
+    import base64
+    import logging
+    import numpy as np
+    from sqlalchemy.orm import Session
+    from app.services.cloud_run_client import call_saliency, call_processing
+    from app.config import settings
+
+    logger = logging.getLogger("olivas.pipeline")
+    sync_engine = _make_sync_engine()

    try:
        with Session(sync_engine) as db:
@ -124,75 +139,63 @@ def run_analysis_pipeline(analysis_id: str, image_data: bytes, model_name: str):
            analysis.status = "processing"
            db.commit()

-            logger.info(f"Starting analysis {analysis_id}")
+        logger.info(f"[cloud-run] Starting analysis {analysis_id}")
+
+        # 1. Saliency inference via Cloud Run
+        sal_result = call_saliency(image_data, model_name)
+
+        saliency_b64: str = sal_result["saliency_b64"]
+        shape: list[int] = sal_result["shape"]
+        gaze_seq: list[dict] = sal_result["gaze_sequence"]
+        hotspots: list[dict] = sal_result["hotspots"]
+        overall_score: float = sal_result["overall_score"]
+        entropy_score: float = sal_result["entropy_score"]
+
+        # 2. Save raw saliency locally
+        h, w = shape
+        saliency_full = np.frombuffer(
+            base64.b64decode(saliency_b64), dtype=np.float32
+        ).reshape(h, w)
+        np.save(str(storage.get_path(analysis_id, "saliency_raw.npy")), saliency_full)
+
+        # 3. Save saliency grayscale PNG locally
+        saliency_uint8 = (saliency_full * 255).astype(np.uint8)
+        saliency_img = Image.fromarray(saliency_uint8, mode="L")
+        buf = io.BytesIO()
+        saliency_img.save(buf, format="PNG")
+        _save_file(storage.get_path(analysis_id, "saliency_gray.png"), buf.getvalue())
+
+        # 4. Image post-processing via Cloud Run (or local fallback)
+        if settings.CLOUD_RUN_PROCESSING_URL:
+            proc_result = call_processing(image_data, saliency_b64, shape, gaze_seq)
+            heatmap_overlay_data = base64.b64decode(proc_result["heatmap_overlay_b64"])
+            heatmap_standalone_data = base64.b64decode(proc_result["heatmap_standalone_b64"])
+            gaze_img_data = base64.b64decode(proc_result["gaze_sequence_img_b64"])
+        else:
+            # Local fallback for image processing
+            from app.services.heatmap import generate_heatmap_overlay, generate_standalone_heatmap
            image = Image.open(io.BytesIO(image_data)).convert("RGB")
+            heatmap_overlay_data = _img_to_png_bytes(generate_heatmap_overlay(image, saliency_full))
+            heatmap_standalone_data = _img_to_png_bytes(generate_standalone_heatmap(saliency_full))
+            gaze_img_data = _img_to_png_bytes(_draw_gaze_sequence(image, gaze_seq))

-            # 1. Resize for inference
-            resized, scale = prepare_for_inference(image)
-            logger.info(f"Image resized: {image.size} -> {resized.size}")
+        _save_file(storage.get_path(analysis_id, "heatmap_overlay.png"), heatmap_overlay_data)
+        _save_file(storage.get_path(analysis_id, "heatmap_standalone.png"), heatmap_standalone_data)
+        _save_file(storage.get_path(analysis_id, "gaze_sequence.png"), gaze_img_data)

-            # 2. Run saliency model
-            logger.info(f"Running {model_name} inference...")
-            saliency = model_manager.predict(resized, model_name)
-            logger.info("Inference complete")
-
-            # 3. Upscale to original dimensions
-            saliency_full = upscale_saliency(saliency, image.height, image.width)
-
-            # 4. Save raw saliency as .npy
-            np.save(str(storage.get_path(analysis_id, "saliency_raw.npy")), saliency_full)
-
-            # 5. Save saliency as grayscale PNG
-            saliency_uint8 = (saliency_full * 255).astype(np.uint8)
-            saliency_img = Image.fromarray(saliency_uint8, mode="L")
-            buf = io.BytesIO()
-            saliency_img.save(buf, format="PNG")
-            with open(storage.get_path(analysis_id, "saliency_gray.png"), "wb") as f:
-                f.write(buf.getvalue())
-
-            # 6. Generate heatmap overlay
-            heatmap_overlay = generate_heatmap_overlay(image, saliency_full)
-            buf = io.BytesIO()
-            heatmap_overlay.save(buf, format="PNG")
-            with open(storage.get_path(analysis_id, "heatmap_overlay.png"), "wb") as f:
-                f.write(buf.getvalue())
-
-            # 7. Generate standalone heatmap
-            heatmap_standalone = generate_standalone_heatmap(saliency_full)
-            buf = io.BytesIO()
-            heatmap_standalone.save(buf, format="PNG")
-            with open(storage.get_path(analysis_id, "heatmap_standalone.png"), "wb") as f:
-                f.write(buf.getvalue())
-
-            # 8. Extract gaze sequence
-            gaze_seq = extract_gaze_sequence(saliency_full, num_fixations=5)
-
-            # 9. Extract hotspots (needed for composite score)
-            hotspots = _extract_hotspots(saliency_full, num_hotspots=5)
-
-            # 10. Compute composite design effectiveness score
-            overall_score, entropy_score = _compute_design_score(
-                saliency_full, hotspots, gaze_seq
-            )
-
-            # 11. Generate gaze sequence image
-            gaze_img = _draw_gaze_sequence(image, gaze_seq)
-            buf = io.BytesIO()
-            gaze_img.save(buf, format="PNG")
-            with open(storage.get_path(analysis_id, "gaze_sequence.png"), "wb") as f:
-                f.write(buf.getvalue())
-
-            # Update DB
+        # 5. Update DB
+        with Session(sync_engine) as db:
+            analysis = db.get(Analysis, analysis_id)
            analysis.status = "completed"
            analysis.gaze_sequence = gaze_seq
            analysis.hotspots = hotspots
            analysis.overall_score = overall_score
            analysis.entropy_score = entropy_score
            db.commit()
-            logger.info(f"Analysis {analysis_id} completed (score={overall_score}, entropy={entropy_score})")
+            logger.info(f"[cloud-run] Analysis {analysis_id} completed (score={overall_score})")

    except Exception as e:
-        logger.error(f"Analysis {analysis_id} failed: {e}", exc_info=True)
+        logger.error(f"[cloud-run] Analysis {analysis_id} failed: {e}", exc_info=True)
        try:
            with Session(sync_engine) as db:
                analysis = db.get(Analysis, analysis_id)
@ -203,6 +206,84 @@ def run_analysis_pipeline(analysis_id: str, image_data: bytes, model_name: str):
            pass


+def _run_pipeline_local(analysis_id: str, image_data: bytes, model_name: str):
+    """Pipeline running entirely locally (dev mode / no Cloud Run)."""
+    import logging
+    import numpy as np
+    from sqlalchemy.orm import Session
+    from app.services.saliency.model_manager import model_manager
+    from app.services.image_processing import prepare_for_inference, upscale_saliency
+    from app.services.heatmap import generate_heatmap_overlay, generate_standalone_heatmap
+    from app.services.gaze_sequence import extract_gaze_sequence
+
+    logger = logging.getLogger("olivas.pipeline")
+    sync_engine = _make_sync_engine()
+
+    try:
+        with Session(sync_engine) as db:
+            analysis = db.get(Analysis, analysis_id)
+            analysis.status = "processing"
+            db.commit()
+
+            logger.info(f"[local] Starting analysis {analysis_id}")
+            image = Image.open(io.BytesIO(image_data)).convert("RGB")
+
+            resized, scale = prepare_for_inference(image)
+            logger.info(f"Image resized: {image.size} -> {resized.size}")
+
+            logger.info(f"Running {model_name} inference...")
+            saliency = model_manager.predict(resized, model_name)
+            logger.info("Inference complete")
+
+            saliency_full = upscale_saliency(saliency, image.height, image.width)
+
+            np.save(str(storage.get_path(analysis_id, "saliency_raw.npy")), saliency_full)
+
+            saliency_uint8 = (saliency_full * 255).astype(np.uint8)
+            saliency_img = Image.fromarray(saliency_uint8, mode="L")
+            buf = io.BytesIO()
+            saliency_img.save(buf, format="PNG")
+            _save_file(storage.get_path(analysis_id, "saliency_gray.png"), buf.getvalue())
+
+            heatmap_overlay = generate_heatmap_overlay(image, saliency_full)
+            _save_file(storage.get_path(analysis_id, "heatmap_overlay.png"), _img_to_png_bytes(heatmap_overlay))
+
+            heatmap_standalone = generate_standalone_heatmap(saliency_full)
+            _save_file(storage.get_path(analysis_id, "heatmap_standalone.png"), _img_to_png_bytes(heatmap_standalone))
+
+            gaze_seq = extract_gaze_sequence(saliency_full, num_fixations=5)
+            hotspots = _extract_hotspots(saliency_full, num_hotspots=5)
+            overall_score, entropy_score = _compute_design_score(saliency_full, hotspots, gaze_seq)
+
+            gaze_img = _draw_gaze_sequence(image, gaze_seq)
+            _save_file(storage.get_path(analysis_id, "gaze_sequence.png"), _img_to_png_bytes(gaze_img))
+
+            analysis.status = "completed"
+            analysis.gaze_sequence = gaze_seq
+            analysis.hotspots = hotspots
+            analysis.overall_score = overall_score
+            analysis.entropy_score = entropy_score
+            db.commit()
+            logger.info(f"[local] Analysis {analysis_id} completed (score={overall_score}, entropy={entropy_score})")
+
+    except Exception as e:
+        logger.error(f"[local] Analysis {analysis_id} failed: {e}", exc_info=True)
+        try:
+            with Session(sync_engine) as db:
+                analysis = db.get(Analysis, analysis_id)
+                if analysis:
+                    analysis.status = "failed"
+                    db.commit()
+        except Exception:
+            pass
+
+
+def _img_to_png_bytes(img: Image.Image) -> bytes:
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return buf.getvalue()
+
+
 def _compute_design_score(saliency_full, hotspots, gaze_seq):
    """Compute composite Design Effectiveness Score (0-100) and raw entropy score.

--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -10,6 +10,17 @@ class Settings(BaseSettings):
    BACKEND_HOST: str = "0.0.0.0"
    BACKEND_PORT: int = 8000

+    # Google Cloud Run service URLs (empty = use local processing)
+    CLOUD_RUN_SALIENCY_URL: str = ""    # e.g. https://olivas-saliency-xxx-ew.a.run.app
+    CLOUD_RUN_PROCESSING_URL: str = ""  # e.g. https://olivas-processing-xxx-ew.a.run.app
+    CLOUD_RUN_SECRET: str = ""          # Shared secret for X-Internal-Secret header
+
+    GOOGLE_CLOUD_PROJECT: str = "optical-414516"
+
+    @property
+    def use_cloud_run(self) -> bool:
+        return bool(self.CLOUD_RUN_SALIENCY_URL)
+
    @property
    def device(self) -> str:
        if self.DEVICE == "auto":
--- a/backend/app/services/cloud_run_client.py
+++ b/backend/app/services/cloud_run_client.py
@ -0,0 +1,82 @@
+"""HTTP client for OliVAS Cloud Run services.
+
+Provides sync wrappers suitable for use inside BackgroundTasks threadpool.
+Falls back to local processing if Cloud Run URLs are not configured.
+"""
+import base64
+import logging
+from typing import Any
+
+import httpx
+
+from app.config import settings
+
+logger = logging.getLogger("olivas.cloud_run")
+
+# 5-minute timeout for saliency inference (DeepGaze on CPU can take ~30–60s on warm,
+# but first request after cold start may take longer)
+_SALIENCY_TIMEOUT = httpx.Timeout(360.0, connect=15.0)
+_PROCESSING_TIMEOUT = httpx.Timeout(120.0, connect=10.0)
+
+
+def _headers() -> dict[str, str]:
+    if settings.CLOUD_RUN_SECRET:
+        return {"X-Internal-Secret": settings.CLOUD_RUN_SECRET}
+    return {}
+
+
+def call_saliency(image_data: bytes, model_name: str) -> dict[str, Any]:
+    """POST image to olivas-saliency and return JSON with saliency + metrics."""
+    url = settings.CLOUD_RUN_SALIENCY_URL.rstrip("/") + "/predict"
+    logger.info(f"Calling saliency service: model={model_name} image={len(image_data)}B")
+
+    with httpx.Client(timeout=_SALIENCY_TIMEOUT) as client:
+        response = client.post(
+            url,
+            files={"image": ("image.png", image_data, "image/png")},
+            data={"model": model_name},
+            headers=_headers(),
+        )
+
+    if response.status_code != 200:
+        raise RuntimeError(
+            f"Saliency service returned {response.status_code}: {response.text[:500]}"
+        )
+
+    result = response.json()
+    logger.info(
+        f"Saliency done: score={result.get('overall_score')} entropy={result.get('entropy_score')}"
+    )
+    return result
+
+
+def call_processing(
+    image_data: bytes,
+    saliency_b64: str,
+    shape: list[int],
+    gaze_sequence: list[dict],
+) -> dict[str, str]:
+    """POST image + saliency to olivas-processing and return base64-encoded result images."""
+    url = settings.CLOUD_RUN_PROCESSING_URL.rstrip("/") + "/process"
+    image_b64 = base64.b64encode(image_data).decode()
+
+    logger.info(f"Calling processing service: image={len(image_data)}B shape={shape}")
+
+    with httpx.Client(timeout=_PROCESSING_TIMEOUT) as client:
+        response = client.post(
+            url,
+            json={
+                "image_b64": image_b64,
+                "saliency_b64": saliency_b64,
+                "shape": shape,
+                "gaze_sequence": gaze_sequence,
+            },
+            headers=_headers(),
+        )
+
+    if response.status_code != 200:
+        raise RuntimeError(
+            f"Processing service returned {response.status_code}: {response.text[:500]}"
+        )
+
+    return response.json()
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@ -10,21 +10,33 @@ dependencies = [
    "sqlalchemy[asyncio]>=2.0",
    "alembic>=1.13",
    "asyncpg>=0.29",
+    "psycopg2-binary>=2.9",
    "pydantic>=2.0",
    "pydantic-settings>=2.0",
    "pillow>=10.0",
    "numpy>=1.26",
-    "torch>=2.1",
-    "torchvision>=0.16",
    "scipy>=1.11",
    "matplotlib>=3.8",
-    "scikit-image>=0.22",
    "reportlab>=4.0",
    "aiofiles>=23.0",
    "anthropic>=0.40",
+    "httpx>=0.27",
 ]

 [project.optional-dependencies]
+# Lightweight production install (same as base — alias for clarity)
+api = []
+
+# Full local ML stack for development without Cloud Run
+ml = [
+    "torch>=2.1",
+    "torchvision>=0.16",
+    "scikit-image>=0.22",
+    "einops",
+    "ftfy",
+    "regex",
+]
+
 dev = [
    "pytest>=8.0",
    "pytest-asyncio>=0.23",
--- a/cloud_run/deploy.sh
+++ b/cloud_run/deploy.sh
@ -0,0 +1,96 @@
+#!/usr/bin/env bash
+# Deploy OliVAS Cloud Run services to Google Cloud Run
+# Project: optical-414516 | Region: europe-west2
+#
+# Usage:
+#   ./cloud_run/deploy.sh [saliency|processing|all]
+#
+# Requirements:
+#   - gcloud CLI authenticated: gcloud auth login
+#   - Project set: gcloud config set project optical-414516
+#   - Cloud Run API enabled
+#   - CLOUD_RUN_SECRET env var set (shared auth secret)
+
+set -euo pipefail
+
+PROJECT="optical-414516"
+REGION="europe-west2"
+SECRET="${CLOUD_RUN_SECRET:-}"
+
+if [ -z "$SECRET" ]; then
+  echo "WARNING: CLOUD_RUN_SECRET is not set. Services will be unauthenticated."
+fi
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+gcloud config set project "$PROJECT"
+
+deploy_saliency() {
+  echo "=== Deploying olivas-saliency ==="
+  gcloud run deploy olivas-saliency \
+    --source "$SCRIPT_DIR/saliency" \
+    --region "$REGION" \
+    --platform managed \
+    --allow-unauthenticated \
+    --cpu 4 \
+    --memory 16Gi \
+    --timeout 300 \
+    --concurrency 1 \
+    --min-instances 0 \
+    --max-instances 3 \
+    --set-env-vars "CLOUD_RUN_SECRET=${SECRET},DEVICE=cpu" \
+    --no-cpu-throttling
+
+  SALIENCY_URL=$(gcloud run services describe olivas-saliency \
+    --region "$REGION" --format "value(status.url)")
+  echo "olivas-saliency deployed: $SALIENCY_URL"
+}
+
+deploy_processing() {
+  echo "=== Deploying olivas-processing ==="
+  gcloud run deploy olivas-processing \
+    --source "$SCRIPT_DIR/processing" \
+    --region "$REGION" \
+    --platform managed \
+    --allow-unauthenticated \
+    --cpu 2 \
+    --memory 4Gi \
+    --timeout 120 \
+    --concurrency 2 \
+    --min-instances 0 \
+    --max-instances 5 \
+    --set-env-vars "CLOUD_RUN_SECRET=${SECRET}"
+
+  PROCESSING_URL=$(gcloud run services describe olivas-processing \
+    --region "$REGION" --format "value(status.url)")
+  echo "olivas-processing deployed: $PROCESSING_URL"
+}
+
+TARGET="${1:-all}"
+
+case "$TARGET" in
+  saliency)
+    deploy_saliency
+    ;;
+  processing)
+    deploy_processing
+    ;;
+  all)
+    deploy_saliency
+    deploy_processing
+    echo ""
+    echo "=== Deployment complete ==="
+    echo "Set these env vars on your VPS (in .env or docker-compose.yml):"
+    SALIENCY_URL=$(gcloud run services describe olivas-saliency \
+      --region "$REGION" --format "value(status.url)" 2>/dev/null || echo "<saliency-url>")
+    PROCESSING_URL=$(gcloud run services describe olivas-processing \
+      --region "$REGION" --format "value(status.url)" 2>/dev/null || echo "<processing-url>")
+    echo "  CLOUD_RUN_SALIENCY_URL=$SALIENCY_URL"
+    echo "  CLOUD_RUN_PROCESSING_URL=$PROCESSING_URL"
+    echo "  CLOUD_RUN_SECRET=$SECRET"
+    ;;
+  *)
+    echo "Usage: $0 [saliency|processing|all]"
+    exit 1
+    ;;
+esac
--- a/cloud_run/processing/Dockerfile
+++ b/cloud_run/processing/Dockerfile
@ -0,0 +1,19 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 libglib2.0-0 && \
+    rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY main.py .
+
+ENV PYTHONUNBUFFERED=1
+ENV PORT=8080
+
+EXPOSE 8080
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
--- a/cloud_run/processing/main.py
+++ b/cloud_run/processing/main.py
@ -0,0 +1,112 @@
+"""OliVAS Processing Cloud Run Service.
+
+Handles image post-processing from saliency maps:
+- Heatmap overlay generation
+- Standalone heatmap generation
+- Gaze sequence visualization image
+"""
+import base64
+import io
+import logging
+import os
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.colormaps
+import numpy as np
+from fastapi import FastAPI, Header, HTTPException
+from PIL import Image, ImageDraw, ImageFont
+from pydantic import BaseModel
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("olivas.processing")
+
+INTERNAL_SECRET = os.environ.get("CLOUD_RUN_SECRET", "")
+
+app = FastAPI(title="OliVAS Processing Service")
+
+
+def _check_auth(x_internal_secret: str | None) -> None:
+    if INTERNAL_SECRET and x_internal_secret != INTERNAL_SECRET:
+        raise HTTPException(status_code=401, detail="Unauthorized")
+
+
+class ProcessRequest(BaseModel):
+    image_b64: str
+    saliency_b64: str
+    shape: list[int]  # [H, W]
+    gaze_sequence: list[dict]
+
+
+def _img_to_b64(img: Image.Image) -> str:
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode()
+
+
+def _decode_saliency(saliency_b64: str, shape: list[int]) -> np.ndarray:
+    h, w = shape
+    raw = base64.b64decode(saliency_b64)
+    return np.frombuffer(raw, dtype=np.float32).reshape(h, w)
+
+
+def _generate_heatmap_overlay(image: Image.Image, saliency: np.ndarray) -> Image.Image:
+    cmap = matplotlib.colormaps.get_cmap("jet")
+    heatmap_rgba = cmap(saliency)
+    heatmap_rgb = (heatmap_rgba[:, :, :3] * 255).astype(np.uint8)
+    heatmap_img = Image.fromarray(heatmap_rgb).resize(image.size, Image.LANCZOS)
+    return Image.blend(image.convert("RGB"), heatmap_img, 0.5)
+
+
+def _generate_standalone_heatmap(saliency: np.ndarray) -> Image.Image:
+    cmap = matplotlib.colormaps.get_cmap("jet")
+    heatmap_rgba = cmap(saliency)
+    return Image.fromarray((heatmap_rgba[:, :, :3] * 255).astype(np.uint8))
+
+
+def _draw_gaze_sequence(image: Image.Image, gaze_seq: list[dict]) -> Image.Image:
+    img = image.copy()
+    draw = ImageDraw.Draw(img)
+    font = ImageFont.load_default(size=24)
+    colors = ["#FF4444", "#FF8800", "#FFCC00", "#44CC44", "#4488FF"]
+
+    for i, point in enumerate(gaze_seq):
+        x, y = point["x"], point["y"]
+        color = colors[i % len(colors)]
+        r = 25
+        draw.ellipse([x - r, y - r, x + r, y + r], outline=color, width=3)
+        draw.text((x - 6, y - 12), str(point["rank"]), fill=color, font=font)
+        if i < len(gaze_seq) - 1:
+            nx, ny = gaze_seq[i + 1]["x"], gaze_seq[i + 1]["y"]
+            draw.line([x, y, nx, ny], fill=color, width=2)
+
+    return img
+
+
+@app.get("/health")
+async def health():
+    return {"status": "ok"}
+
+
+@app.post("/process")
+async def process_images(
+    request: ProcessRequest,
+    x_internal_secret: str | None = Header(None),
+):
+    _check_auth(x_internal_secret)
+
+    image_data = base64.b64decode(request.image_b64)
+    saliency = _decode_saliency(request.saliency_b64, request.shape)
+    image = Image.open(io.BytesIO(image_data)).convert("RGB")
+
+    logger.info(f"Processing image {image.size}, saliency {saliency.shape}")
+
+    overlay = _generate_heatmap_overlay(image, saliency)
+    standalone = _generate_standalone_heatmap(saliency)
+    gaze_img = _draw_gaze_sequence(image, request.gaze_sequence)
+
+    return {
+        "heatmap_overlay_b64": _img_to_b64(overlay),
+        "heatmap_standalone_b64": _img_to_b64(standalone),
+        "gaze_sequence_img_b64": _img_to_b64(gaze_img),
+    }
--- a/cloud_run/processing/requirements.txt
+++ b/cloud_run/processing/requirements.txt
@ -0,0 +1,8 @@
+fastapi>=0.115.0
+uvicorn[standard]>=0.30.0
+pillow>=10.0
+numpy>=1.26
+matplotlib>=3.8
+scipy>=1.11
+reportlab>=4.0
+pydantic>=2.0
--- a/cloud_run/saliency/Dockerfile
+++ b/cloud_run/saliency/Dockerfile
@ -0,0 +1,30 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 libglib2.0-0 curl git && \
+    rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+
+RUN pip install --no-cache-dir -r requirements.txt && \
+    pip install --no-cache-dir einops ftfy regex && \
+    pip install --no-cache-dir "clip @ git+https://github.com/openai/CLIP.git" && \
+    pip install --no-cache-dir "deepgaze-pytorch @ git+https://github.com/matthias-k/DeepGaze.git"
+
+# Pre-download DeepGaze IIE weights during build to eliminate cold-start downloads
+RUN python -c "\
+import deepgaze_pytorch; \
+print('Pre-loading DeepGaze IIE weights...'); \
+deepgaze_pytorch.DeepGazeIIE(pretrained=True); \
+print('Weights cached.')"
+
+COPY main.py .
+
+ENV PYTHONUNBUFFERED=1
+ENV PORT=8080
+
+EXPOSE 8080
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
--- a/cloud_run/saliency/main.py
+++ b/cloud_run/saliency/main.py
@ -0,0 +1,331 @@
+"""OliVAS Saliency Cloud Run Service.
+
+Runs DeepGaze saliency inference and returns:
+- saliency map (base64 float32 bytes)
+- gaze sequence
+- hotspots
+- design effectiveness scores
+"""
+import base64
+import io
+import logging
+import os
+from contextlib import asynccontextmanager
+
+import numpy as np
+from fastapi import FastAPI, File, Form, Header, HTTPException, UploadFile
+from PIL import Image
+from scipy.ndimage import gaussian_filter, zoom
+from scipy.special import logsumexp
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("olivas.saliency")
+
+INTERNAL_SECRET = os.environ.get("CLOUD_RUN_SECRET", "")
+DEVICE = os.environ.get("DEVICE", "auto")
+
+# Global model cache: {model_key: {"model": ..., "centerbias": ...}}
+_model_cache: dict = {}
+
+VARIANT_MAP = {
+    "deepgaze_i": ("DeepGazeI", "I"),
+    "deepgaze_iie": ("DeepGazeIIE", "IIE"),
+    "deepgaze_iii": ("DeepGazeIII", "III"),
+}
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    logger.info("OliVAS Saliency service starting")
+    yield
+    _model_cache.clear()
+    logger.info("OliVAS Saliency service stopped")
+
+
+app = FastAPI(title="OliVAS Saliency Service", lifespan=lifespan)
+
+
+def _check_auth(x_internal_secret: str | None) -> None:
+    if INTERNAL_SECRET and x_internal_secret != INTERNAL_SECRET:
+        raise HTTPException(status_code=401, detail="Unauthorized")
+
+
+def _resolve_device() -> str:
+    if DEVICE == "auto":
+        try:
+            import torch
+            return "cuda" if torch.cuda.is_available() else "cpu"
+        except ImportError:
+            return "cpu"
+    return DEVICE
+
+
+def _get_model(model_name: str) -> dict:
+    device = _resolve_device()
+    key = f"{model_name}:{device}"
+    if key in _model_cache:
+        return _model_cache[key]
+
+    if model_name not in VARIANT_MAP:
+        raise ValueError(f"Unknown model: {model_name}. Choose from {list(VARIANT_MAP)}")
+
+    class_name, _ = VARIANT_MAP[model_name]
+
+    import torch
+    import deepgaze_pytorch
+
+    logger.info(f"Loading {class_name} on {device}...")
+    device_obj = torch.device(device)
+    model_cls = getattr(deepgaze_pytorch, class_name)
+    model = model_cls(pretrained=True).to(device_obj)
+    model.eval()
+
+    # Pre-compute centerbias template
+    size = 1024
+    x = np.linspace(-1, 1, size)
+    y = np.linspace(-1, 1, size)
+    xx, yy = np.meshgrid(x, y)
+    centerbias = -0.5 * (xx**2 + yy**2) / 0.5**2
+
+    _model_cache[key] = {"model": model, "centerbias": centerbias, "device": device_obj}
+    logger.info(f"Loaded {class_name}")
+    return _model_cache[key]
+
+
+def _run_inference(image: Image.Image, model_name: str) -> np.ndarray:
+    import torch
+
+    model_data = _get_model(model_name)
+    model = model_data["model"]
+    centerbias_template = model_data["centerbias"]
+    device_obj = model_data["device"]
+
+    img_np = np.array(image.convert("RGB"))
+    h, w = img_np.shape[:2]
+
+    image_tensor = torch.tensor([img_np.transpose(2, 0, 1)]).float().to(device_obj)
+
+    cb = zoom(
+        centerbias_template,
+        (h / centerbias_template.shape[0], w / centerbias_template.shape[1]),
+        order=0,
+    )
+    cb -= logsumexp(cb)
+    centerbias_tensor = torch.tensor([cb]).float().to(device_obj)
+
+    with torch.no_grad():
+        log_density = model(image_tensor, centerbias_tensor)
+
+    saliency = torch.exp(log_density).cpu().numpy().squeeze()
+    sal_min, sal_max = saliency.min(), saliency.max()
+    if sal_max - sal_min > 1e-10:
+        saliency = (saliency - sal_min) / (sal_max - sal_min)
+    else:
+        saliency = np.zeros_like(saliency)
+
+    return saliency
+
+
+def _prepare_for_inference(image: Image.Image, max_size: int = 1024) -> tuple[Image.Image, float]:
+    w, h = image.size
+    scale = max_size / max(w, h)
+    if scale < 1.0:
+        new_size = (int(w * scale), int(h * scale))
+        return image.resize(new_size, Image.LANCZOS), scale
+    return image, 1.0
+
+
+def _upscale_saliency(saliency: np.ndarray, target_h: int, target_w: int) -> np.ndarray:
+    if saliency.shape == (target_h, target_w):
+        return saliency
+    h_scale = target_h / saliency.shape[0]
+    w_scale = target_w / saliency.shape[1]
+    return zoom(saliency, (h_scale, w_scale), order=1)
+
+
+def _extract_gaze_sequence(saliency: np.ndarray, num_fixations: int = 5) -> list[dict]:
+    sal = saliency.copy().astype(np.float64)
+    h, w = sal.shape
+    inhibition_radius = int(max(h, w) * 0.1)
+    fixations = []
+
+    for rank in range(1, num_fixations + 1):
+        smoothed = gaussian_filter(sal, sigma=max(h, w) * 0.01)
+        if smoothed.max() < 1e-10:
+            break
+
+        peak_idx = np.unravel_index(np.argmax(smoothed), smoothed.shape)
+        py, px = int(peak_idx[0]), int(peak_idx[1])
+        prob = float(saliency[py, px])
+
+        fixations.append({
+            "rank": rank,
+            "x": px,
+            "y": py,
+            "x_pct": round(px / w * 100, 1),
+            "y_pct": round(py / h * 100, 1),
+            "probability": round(prob, 4),
+        })
+
+        yy, xx = np.ogrid[:h, :w]
+        mask = (xx - px) ** 2 + (yy - py) ** 2 <= inhibition_radius**2
+        sal[mask] = 0.0
+
+    return fixations
+
+
+def _extract_hotspots(saliency: np.ndarray, num_hotspots: int = 5) -> list[dict]:
+    sal = saliency.copy()
+    h, w = sal.shape
+    hotspots = []
+    radius = int(max(h, w) * 0.08)
+
+    for i in range(num_hotspots):
+        smoothed = gaussian_filter(sal, sigma=max(h, w) * 0.015)
+        peak_idx = np.unravel_index(np.argmax(smoothed), smoothed.shape)
+        py, px = int(peak_idx[0]), int(peak_idx[1])
+        intensity = float(saliency[py, px])
+
+        x1, y1 = max(0, px - radius), max(0, py - radius)
+        x2, y2 = min(w, px + radius), min(h, py + radius)
+
+        hotspots.append({
+            "rank": i + 1,
+            "center_x": px,
+            "center_y": py,
+            "x": x1,
+            "y": y1,
+            "width": x2 - x1,
+            "height": y2 - y1,
+            "intensity": round(intensity, 4),
+        })
+
+        yy, xx = np.ogrid[:h, :w]
+        mask = (xx - px) ** 2 + (yy - py) ** 2 <= radius**2
+        sal[mask] = 0.0
+
+    return hotspots
+
+
+def _compute_design_score(
+    saliency_full: np.ndarray, hotspots: list[dict], gaze_seq: list[dict]
+) -> tuple[float, float]:
+    sal_sum = saliency_full.sum()
+    if sal_sum > 0:
+        prob_dist = saliency_full / sal_sum
+        prob_dist = prob_dist[prob_dist > 0]
+        entropy = -np.sum(prob_dist * np.log2(prob_dist))
+        max_entropy = np.log2(saliency_full.size)
+        raw_concentration = (1 - entropy / max_entropy) * 100
+    else:
+        raw_concentration = 0.0
+
+    entropy_score = round(float(np.clip(raw_concentration, 0, 100)), 1)
+    entropy_adjusted = float(np.sqrt(max(raw_concentration, 0) / 100)) * 100
+
+    if len(hotspots) >= 2:
+        top_intensity = hotspots[0]["intensity"]
+        rest_mean = float(np.mean([h["intensity"] for h in hotspots[1:]]))
+        dominance_ratio = top_intensity / rest_mean if rest_mean > 0 else 10.0
+        peak_dominance = float(100 * (1 - np.exp(-0.5 * dominance_ratio)))
+    elif len(hotspots) == 1:
+        peak_dominance = 95.0
+    else:
+        peak_dominance = 50.0
+
+    intensities = [h["intensity"] for h in hotspots]
+    n = len(intensities)
+    if n >= 2:
+        concordant = sum(
+            1 for i in range(n) for j in range(i + 1, n) if intensities[i] > intensities[j]
+        )
+        total_pairs = n * (n - 1) // 2
+        monotonicity = concordant / total_pairs if total_pairs > 0 else 1.0
+        drop_ratio = 1 - (intensities[-1] / intensities[0]) if intensities[0] > 0 else 0.0
+        hierarchy_clarity = float((0.6 * monotonicity + 0.4 * drop_ratio) * 100)
+    else:
+        hierarchy_clarity = 70.0
+
+    gaze_points = [(g["x"], g["y"]) for g in gaze_seq]
+    ng = len(gaze_points)
+    if ng >= 3:
+        angles = []
+        for i in range(ng - 2):
+            ax = gaze_points[i + 1][0] - gaze_points[i][0]
+            ay = gaze_points[i + 1][1] - gaze_points[i][1]
+            bx = gaze_points[i + 2][0] - gaze_points[i + 1][0]
+            by = gaze_points[i + 2][1] - gaze_points[i + 1][1]
+            mag_a = np.sqrt(ax**2 + ay**2)
+            mag_b = np.sqrt(bx**2 + by**2)
+            if mag_a > 0 and mag_b > 0:
+                cos_angle = np.clip((ax * bx + ay * by) / (mag_a * mag_b), -1, 1)
+                angles.append(float(np.degrees(np.arccos(cos_angle))))
+
+        avg_angle = float(np.mean(angles)) if angles else 70.0
+        angle_smoothness = max(0.0, 100 - (avg_angle / 180) * 100)
+
+        total_path = sum(
+            np.sqrt(
+                (gaze_points[i + 1][0] - gaze_points[i][0]) ** 2
+                + (gaze_points[i + 1][1] - gaze_points[i][1]) ** 2
+            )
+            for i in range(ng - 1)
+        )
+        direct_dist = np.sqrt(
+            (gaze_points[-1][0] - gaze_points[0][0]) ** 2
+            + (gaze_points[-1][1] - gaze_points[0][1]) ** 2
+        )
+        path_efficiency = float(direct_dist / total_path) if total_path > 0 else 1.0
+        gaze_coherence = 0.7 * angle_smoothness + 0.3 * (path_efficiency * 100)
+    else:
+        gaze_coherence = 70.0
+
+    composite = (
+        0.30 * peak_dominance
+        + 0.25 * hierarchy_clarity
+        + 0.25 * gaze_coherence
+        + 0.20 * entropy_adjusted
+    )
+    overall_score = round(float(np.clip(composite, 0, 100)), 1)
+    return overall_score, entropy_score
+
+
+@app.get("/health")
+async def health():
+    return {"status": "ok", "device": _resolve_device()}
+
+
+@app.post("/predict")
+async def predict(
+    image: UploadFile = File(...),
+    model: str = Form("deepgaze_iie"),
+    x_internal_secret: str | None = Header(None),
+):
+    _check_auth(x_internal_secret)
+
+    image_data = await image.read()
+    pil_image = Image.open(io.BytesIO(image_data)).convert("RGB")
+    orig_w, orig_h = pil_image.size
+
+    resized, _ = _prepare_for_inference(pil_image)
+    logger.info(f"Inference: model={model} original={orig_w}x{orig_h} resized={resized.size}")
+
+    saliency = _run_inference(resized, model)
+    saliency_full = _upscale_saliency(saliency, orig_h, orig_w)
+
+    gaze_sequence = _extract_gaze_sequence(saliency_full, num_fixations=5)
+    hotspots = _extract_hotspots(saliency_full, num_hotspots=5)
+    overall_score, entropy_score = _compute_design_score(saliency_full, hotspots, gaze_sequence)
+
+    saliency_b64 = base64.b64encode(saliency_full.astype(np.float32).tobytes()).decode()
+
+    logger.info(f"Done: score={overall_score} entropy={entropy_score}")
+
+    return {
+        "saliency_b64": saliency_b64,
+        "shape": [orig_h, orig_w],
+        "gaze_sequence": gaze_sequence,
+        "hotspots": hotspots,
+        "overall_score": overall_score,
+        "entropy_score": entropy_score,
+    }
--- a/cloud_run/saliency/requirements.txt
+++ b/cloud_run/saliency/requirements.txt
@ -0,0 +1,11 @@
+fastapi>=0.115.0
+uvicorn[standard]>=0.30.0
+python-multipart>=0.0.9
+pillow>=10.0
+numpy>=1.26
+torch>=2.1
+torchvision>=0.16
+scipy>=1.11
+einops
+ftfy
+regex
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@ -2,9 +2,14 @@ services:
  backend:
    build:
      context: ./backend
+      dockerfile: Dockerfile.full  # Full image with PyTorch + DeepGaze for local dev
    command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
    environment:
      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
+      CLOUD_RUN_SALIENCY_URL: ${CLOUD_RUN_SALIENCY_URL:-}
+      CLOUD_RUN_PROCESSING_URL: ${CLOUD_RUN_PROCESSING_URL:-}
+      CLOUD_RUN_SECRET: ${CLOUD_RUN_SECRET:-}
+      GOOGLE_CLOUD_PROJECT: ${GOOGLE_CLOUD_PROJECT:-optical-414516}
    volumes:
      - ./backend:/app
      - uploads:/app/data/uploads
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -25,6 +25,11 @@ services:
      DEVICE: auto
      CORS_ORIGINS: http://localhost:1577
      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
+      # Google Cloud Run — set these to enable Cloud Run offloading
+      CLOUD_RUN_SALIENCY_URL: ${CLOUD_RUN_SALIENCY_URL:-}
+      CLOUD_RUN_PROCESSING_URL: ${CLOUD_RUN_PROCESSING_URL:-}
+      CLOUD_RUN_SECRET: ${CLOUD_RUN_SECRET:-}
+      GOOGLE_CLOUD_PROJECT: ${GOOGLE_CLOUD_PROJECT:-optical-414516}
    volumes:
      - uploads:/app/data/uploads
    depends_on: