From 01c96da95c1ba02ebbf9770b4373bdf16df86ad0 Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 28 Dec 2025 21:59:31 -0600 Subject: [PATCH] feat: use all available CPU cores for Whisper transcription MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dynamically detects CPU count with os.cpu_count() instead of hardcoded 4 threads. Falls back to 4 if detection fails. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/app/services/whisper_service.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/app/services/whisper_service.py b/backend/app/services/whisper_service.py index 9e57642..0c127e3 100644 --- a/backend/app/services/whisper_service.py +++ b/backend/app/services/whisper_service.py @@ -2,6 +2,7 @@ from __future__ import annotations +import os import time from dataclasses import dataclass @@ -63,13 +64,17 @@ class WhisperService: def model(self) -> WhisperModel: """Lazy-load Whisper model on first use.""" if self._model is None: - logger.info(f"Loading Whisper model '{self._model_name}' (device=cpu, compute_type=int8)...") + cpu_threads = os.cpu_count() or 4 # Fallback to 4 if cpu_count() returns None + logger.info( + f"Loading Whisper model '{self._model_name}' " + f"(device=cpu, compute_type=int8, cpu_threads={cpu_threads})..." + ) load_start = time.time() self._model = WhisperModel( self._model_name, device="cpu", compute_type="int8", # Quantized for faster CPU inference - cpu_threads=4 + cpu_threads=cpu_threads ) load_time = time.time() - load_start logger.info(f"Whisper model '{self._model_name}' loaded successfully in {load_time:.2f}s")