From 53caf193afc9fe241b1350cb2d90eda7bf6802ae Mon Sep 17 00:00:00 2001 From: Cal Corum Date: Wed, 11 Feb 2026 14:38:58 -0600 Subject: [PATCH] fix: fall back to CPU when CUDA inference fails at transcribe time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CUDA model loading can succeed even when runtime libs like libcublas are missing — the error only surfaces during model.transcribe(). Catch that and retry on CPU so transcription still works without the full CUDA toolkit installed. Co-Authored-By: Claude Opus 4.6 --- src/my_memory/transcriber.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/my_memory/transcriber.py b/src/my_memory/transcriber.py index 5475a5b..9b3a81f 100644 --- a/src/my_memory/transcriber.py +++ b/src/my_memory/transcriber.py @@ -52,8 +52,30 @@ def _load_model(config: WhisperConfig): def transcribe_file(wav_path: str, config: WhisperConfig) -> str: """Transcribe a WAV file and return the text.""" model = _load_model(config) - segments, _info = model.transcribe(wav_path, beam_size=5) - return " ".join(segment.text.strip() for segment in segments).strip() + try: + segments, _info = model.transcribe(wav_path, beam_size=5) + return " ".join(segment.text.strip() for segment in segments).strip() + except Exception as e: + # CUDA may load but fail at inference (e.g. missing libcublas) + if config.device in ("auto", "cuda") and _model is not None: + logger.warning("CUDA transcription failed (%s), retrying on CPU", e) + _force_cpu_model(config) + segments, _info = _model.transcribe(wav_path, beam_size=5) + return " ".join(segment.text.strip() for segment in segments).strip() + raise + + +def _force_cpu_model(config: WhisperConfig) -> None: + """Replace the current model with a CPU-only version.""" + global _model, _model_config + from faster_whisper import WhisperModel + + logger.info("Loading Whisper model '%s' on CPU (fallback)...", config.model_size) + _model = WhisperModel( + config.model_size, device="cpu", compute_type=config.cpu_fallback_compute_type + ) + _model_config = config + logger.info("Whisper model loaded on CPU") class TranscriptionWorker(QObject):