From 01c96da95c1ba02ebbf9770b4373bdf16df86ad0 Mon Sep 17 00:00:00 2001
From: michael <michael@modernfreedom.com>
Date: Sun, 28 Dec 2025 21:59:31 -0600
Subject: [PATCH] feat: use all available CPU cores for Whisper transcription
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dynamically detects CPU count with os.cpu_count() instead of
hardcoded 4 threads. Falls back to 4 if detection fails.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/app/services/whisper_service.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/backend/app/services/whisper_service.py b/backend/app/services/whisper_service.py
index 9e57642..0c127e3 100644
--- a/backend/app/services/whisper_service.py
+++ b/backend/app/services/whisper_service.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import os
 import time
 from dataclasses import dataclass
 
@@ -63,13 +64,17 @@ class WhisperService:
     def model(self) -> WhisperModel:
         """Lazy-load Whisper model on first use."""
         if self._model is None:
-            logger.info(f"Loading Whisper model '{self._model_name}' (device=cpu, compute_type=int8)...")
+            cpu_threads = os.cpu_count() or 4  # Fallback to 4 if cpu_count() returns None
+            logger.info(
+                f"Loading Whisper model '{self._model_name}' "
+                f"(device=cpu, compute_type=int8, cpu_threads={cpu_threads})..."
+            )
             load_start = time.time()
             self._model = WhisperModel(
                 self._model_name,
                 device="cpu",
                 compute_type="int8",  # Quantized for faster CPU inference
-                cpu_threads=4
+                cpu_threads=cpu_threads
             )
             load_time = time.time() - load_start
             logger.info(f"Whisper model '{self._model_name}' loaded successfully in {load_time:.2f}s")