101 lines
3.3 KiB
Text
101 lines
3.3 KiB
Text
# =============================================================================
|
|
# Dockerfile for Whisper HTTP Service - Cloud Run Deployment
|
|
# =============================================================================
|
|
# This Dockerfile creates a FastAPI-based HTTP service for Whisper transcription,
|
|
# designed for deployment on Google Cloud Run with autoscaling.
|
|
#
|
|
# Key features:
|
|
# - Pre-downloads Whisper model during build (no cold start model loading)
|
|
# - Runs FastAPI with Uvicorn for HTTP handling
|
|
# - Optimized for 8 vCPU / 32GB RAM Cloud Run instances
|
|
# =============================================================================
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Stage 1: Builder - Install Python dependencies using Poetry
|
|
# -----------------------------------------------------------------------------
|
|
FROM python:3.11-slim AS builder
|
|
|
|
# Install build dependencies
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
curl \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Install Poetry
|
|
RUN pip install --no-cache-dir poetry==2.1.4
|
|
|
|
# Configure Poetry to not create virtual environment
|
|
ENV POETRY_NO_INTERACTION=1 \
|
|
POETRY_VIRTUALENVS_CREATE=false \
|
|
POETRY_CACHE_DIR=/tmp/poetry_cache
|
|
|
|
WORKDIR /app
|
|
|
|
# Copy dependency files
|
|
COPY pyproject.toml poetry.lock ./
|
|
|
|
# Install dependencies
|
|
RUN poetry config virtualenvs.create false \
|
|
&& poetry install --only main --no-root --no-interaction --no-ansi \
|
|
&& rm -rf $POETRY_CACHE_DIR
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Stage 2: Runtime - Whisper HTTP Service
|
|
# -----------------------------------------------------------------------------
|
|
FROM python:3.11-slim AS runtime
|
|
|
|
# Install runtime dependencies
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
libmagic1 \
|
|
curl \
|
|
tini \
|
|
ffmpeg \
|
|
&& rm -rf /var/lib/apt/lists/* \
|
|
&& apt-get clean
|
|
|
|
# Create non-root user for security
|
|
RUN groupadd --gid 1000 app \
|
|
&& useradd --uid 1000 --gid app --shell /bin/bash --create-home app
|
|
|
|
# Copy Python packages from builder
|
|
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
|
|
COPY --from=builder /usr/local/bin /usr/local/bin
|
|
|
|
# Set environment variables
|
|
ENV PYTHONPATH=/app \
|
|
PYTHONUNBUFFERED=1 \
|
|
PYTHONDONTWRITEBYTECODE=1 \
|
|
APP_ENV=prod
|
|
|
|
WORKDIR /app
|
|
|
|
# Copy application code
|
|
COPY --chown=app:app . .
|
|
|
|
# Switch to non-root user
|
|
USER app
|
|
|
|
# Pre-download Whisper medium model during build
|
|
# This prevents cold start delays when the service scales up
|
|
# Model is cached in ~/.cache/huggingface/hub (~1.5GB)
|
|
RUN python -c "from faster_whisper import WhisperModel; WhisperModel('medium', device='cpu', compute_type='int8')"
|
|
|
|
# Health check
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
|
CMD curl -f http://localhost:8080/health || exit 1
|
|
|
|
# Expose HTTP port (Cloud Run uses 8080 by default)
|
|
EXPOSE 8080
|
|
|
|
# Use tini as init system
|
|
ENTRYPOINT ["tini", "--"]
|
|
|
|
# Start Uvicorn server
|
|
# - 1 worker since Cloud Run uses containerConcurrency=1
|
|
# - Bind to 0.0.0.0:8080 for Cloud Run
|
|
# - Timeout of 300s for long transcriptions
|
|
CMD ["uvicorn", "app.services.whisper_http_service:app", \
|
|
"--host", "0.0.0.0", \
|
|
"--port", "8080", \
|
|
"--workers", "1", \
|
|
"--timeout-keep-alive", "300"]
|