video-accessibility-old/backend/Dockerfile.whisper-service

101 lines
3.3 KiB
Text

# =============================================================================
# Dockerfile for Whisper HTTP Service - Cloud Run Deployment
# =============================================================================
# This Dockerfile creates a FastAPI-based HTTP service for Whisper transcription,
# designed for deployment on Google Cloud Run with autoscaling.
#
# Key features:
# - Pre-downloads Whisper model during build (no cold start model loading)
# - Runs FastAPI with Uvicorn for HTTP handling
# - Optimized for 8 vCPU / 32GB RAM Cloud Run instances
# =============================================================================
# -----------------------------------------------------------------------------
# Stage 1: Builder - Install Python dependencies using Poetry
# -----------------------------------------------------------------------------
FROM python:3.11-slim AS builder
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install Poetry
RUN pip install --no-cache-dir poetry==2.1.4
# Configure Poetry to not create virtual environment
ENV POETRY_NO_INTERACTION=1 \
POETRY_VIRTUALENVS_CREATE=false \
POETRY_CACHE_DIR=/tmp/poetry_cache
WORKDIR /app
# Copy dependency files
COPY pyproject.toml poetry.lock ./
# Install dependencies
RUN poetry config virtualenvs.create false \
&& poetry install --only main --no-root --no-interaction --no-ansi \
&& rm -rf $POETRY_CACHE_DIR
# -----------------------------------------------------------------------------
# Stage 2: Runtime - Whisper HTTP Service
# -----------------------------------------------------------------------------
FROM python:3.11-slim AS runtime
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
libmagic1 \
curl \
tini \
ffmpeg \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
# Create non-root user for security
RUN groupadd --gid 1000 app \
&& useradd --uid 1000 --gid app --shell /bin/bash --create-home app
# Copy Python packages from builder
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# Set environment variables
ENV PYTHONPATH=/app \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
APP_ENV=prod
WORKDIR /app
# Copy application code
COPY --chown=app:app . .
# Switch to non-root user
USER app
# Pre-download Whisper medium model during build
# This prevents cold start delays when the service scales up
# Model is cached in ~/.cache/huggingface/hub (~1.5GB)
RUN python -c "from faster_whisper import WhisperModel; WhisperModel('medium', device='cpu', compute_type='int8')"
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8080/health || exit 1
# Expose HTTP port (Cloud Run uses 8080 by default)
EXPOSE 8080
# Use tini as init system
ENTRYPOINT ["tini", "--"]
# Start Uvicorn server
# - 1 worker since Cloud Run uses containerConcurrency=1
# - Bind to 0.0.0.0:8080 for Cloud Run
# - Timeout of 300s for long transcriptions
CMD ["uvicorn", "app.services.whisper_http_service:app", \
"--host", "0.0.0.0", \
"--port", "8080", \
"--workers", "1", \
"--timeout-keep-alive", "300"]