feat: use all available CPU cores for Whisper transcription

Dynamically detects CPU count with os.cpu_count() instead of
hardcoded 4 threads. Falls back to 4 if detection fails.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
michael 2025-12-28 21:59:31 -06:00
parent dc78dc6fb5
commit 01c96da95c

View file

@ -2,6 +2,7 @@
from __future__ import annotations
import os
import time
from dataclasses import dataclass
@ -63,13 +64,17 @@ class WhisperService:
def model(self) -> WhisperModel:
"""Lazy-load Whisper model on first use."""
if self._model is None:
logger.info(f"Loading Whisper model '{self._model_name}' (device=cpu, compute_type=int8)...")
cpu_threads = os.cpu_count() or 4 # Fallback to 4 if cpu_count() returns None
logger.info(
f"Loading Whisper model '{self._model_name}' "
f"(device=cpu, compute_type=int8, cpu_threads={cpu_threads})..."
)
load_start = time.time()
self._model = WhisperModel(
self._model_name,
device="cpu",
compute_type="int8", # Quantized for faster CPU inference
cpu_threads=4
cpu_threads=cpu_threads
)
load_time = time.time() - load_start
logger.info(f"Whisper model '{self._model_name}' loaded successfully in {load_time:.2f}s")