video-accessibility-old/backend/Dockerfile.whisper-service

# =============================================================================
# Dockerfile for Whisper HTTP Service - Cloud Run Deployment
# =============================================================================
# This Dockerfile creates a FastAPI-based HTTP service for Whisper transcription,
# designed for deployment on Google Cloud Run with autoscaling.
#
# Key features:
# - Pre-downloads Whisper model during build (no cold start model loading)
# - Runs FastAPI with Uvicorn for HTTP handling
# - Optimized for 8 vCPU / 32GB RAM Cloud Run instances
# =============================================================================

# -----------------------------------------------------------------------------
# Stage 1: Builder - Install Python dependencies using Poetry
# -----------------------------------------------------------------------------
FROM python:3.11-slim AS builder

# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Install Poetry
RUN pip install --no-cache-dir poetry==2.1.4

# Configure Poetry to not create virtual environment
ENV POETRY_NO_INTERACTION=1 \
    POETRY_VIRTUALENVS_CREATE=false \
    POETRY_CACHE_DIR=/tmp/poetry_cache

WORKDIR /app

# Copy dependency files
COPY pyproject.toml poetry.lock ./

# Install dependencies
RUN poetry config virtualenvs.create false \
    && poetry install --only main --no-root --no-interaction --no-ansi \
    && rm -rf $POETRY_CACHE_DIR

# -----------------------------------------------------------------------------
# Stage 2: Runtime - Whisper HTTP Service
# -----------------------------------------------------------------------------
FROM python:3.11-slim AS runtime

# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    libmagic1 \
    curl \
    tini \
    ffmpeg \
    && rm -rf /var/lib/apt/lists/* \
    && apt-get clean

# Create non-root user for security
RUN groupadd --gid 1000 app \
    && useradd --uid 1000 --gid app --shell /bin/bash --create-home app

# Copy Python packages from builder
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin

# Set environment variables
ENV PYTHONPATH=/app \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    APP_ENV=prod

WORKDIR /app

# Copy application code
COPY --chown=app:app . .

# Switch to non-root user
USER app

# Pre-download Whisper medium model during build
# This prevents cold start delays when the service scales up
# Model is cached in ~/.cache/huggingface/hub (~1.5GB)
RUN python -c "from faster_whisper import WhisperModel; WhisperModel('medium', device='cpu', compute_type='int8')"

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:8080/health || exit 1

# Expose HTTP port (Cloud Run uses 8080 by default)
EXPOSE 8080

# Use tini as init system
ENTRYPOINT ["tini", "--"]

# Start Uvicorn server
# - 1 worker since Cloud Run uses containerConcurrency=1
# - Bind to 0.0.0.0:8080 for Cloud Run
# - Timeout of 300s for long transcriptions
CMD ["uvicorn", "app.services.whisper_http_service:app", \
     "--host", "0.0.0.0", \
     "--port", "8080", \
     "--workers", "1", \
     "--timeout-keep-alive", "300"]