feat: use all available CPU cores for Whisper transcription

Dynamically detects CPU count with os.cpu_count() instead of hardcoded 4 threads. Falls back to 4 if detection fails. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-28 21:59:31 -06:00 · 2025-12-28 21:59:31 -06:00 · 01c96da95c
commit 01c96da95c
parent dc78dc6fb5
1 changed files with 7 additions and 2 deletions
--- a/backend/app/services/whisper_service.py
+++ b/backend/app/services/whisper_service.py
@ -2,6 +2,7 @@

 from __future__ import annotations

+import os
 import time
 from dataclasses import dataclass

@ -63,13 +64,17 @@ class WhisperService:
    def model(self) -> WhisperModel:
        """Lazy-load Whisper model on first use."""
        if self._model is None:
-            logger.info(f"Loading Whisper model '{self._model_name}' (device=cpu, compute_type=int8)...")
+            cpu_threads = os.cpu_count() or 4  # Fallback to 4 if cpu_count() returns None
+            logger.info(
+                f"Loading Whisper model '{self._model_name}' "
+                f"(device=cpu, compute_type=int8, cpu_threads={cpu_threads})..."
+            )
            load_start = time.time()
            self._model = WhisperModel(
                self._model_name,
                device="cpu",
                compute_type="int8",  # Quantized for faster CPU inference
-                cpu_threads=4
+                cpu_threads=cpu_threads
            )
            load_time = time.time() - load_start
            logger.info(f"Whisper model '{self._model_name}' loaded successfully in {load_time:.2f}s")