From 53caf193afc9fe241b1350cb2d90eda7bf6802ae Mon Sep 17 00:00:00 2001
From: Cal Corum <calcorum@users.noreply.github.com>
Date: Wed, 11 Feb 2026 14:38:58 -0600
Subject: [PATCH] fix: fall back to CPU when CUDA inference fails at transcribe
 time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CUDA model loading can succeed even when runtime libs like libcublas
are missing — the error only surfaces during model.transcribe(). Catch
that and retry on CPU so transcription still works without the full
CUDA toolkit installed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/my_memory/transcriber.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/my_memory/transcriber.py b/src/my_memory/transcriber.py
index 5475a5b..9b3a81f 100644
--- a/src/my_memory/transcriber.py
+++ b/src/my_memory/transcriber.py
@@ -52,8 +52,30 @@ def _load_model(config: WhisperConfig):
 def transcribe_file(wav_path: str, config: WhisperConfig) -> str:
     """Transcribe a WAV file and return the text."""
     model = _load_model(config)
-    segments, _info = model.transcribe(wav_path, beam_size=5)
-    return " ".join(segment.text.strip() for segment in segments).strip()
+    try:
+        segments, _info = model.transcribe(wav_path, beam_size=5)
+        return " ".join(segment.text.strip() for segment in segments).strip()
+    except Exception as e:
+        # CUDA may load but fail at inference (e.g. missing libcublas)
+        if config.device in ("auto", "cuda") and _model is not None:
+            logger.warning("CUDA transcription failed (%s), retrying on CPU", e)
+            _force_cpu_model(config)
+            segments, _info = _model.transcribe(wav_path, beam_size=5)
+            return " ".join(segment.text.strip() for segment in segments).strip()
+        raise
+
+
+def _force_cpu_model(config: WhisperConfig) -> None:
+    """Replace the current model with a CPU-only version."""
+    global _model, _model_config
+    from faster_whisper import WhisperModel
+
+    logger.info("Loading Whisper model '%s' on CPU (fallback)...", config.model_size)
+    _model = WhisperModel(
+        config.model_size, device="cpu", compute_type=config.cpu_fallback_compute_type
+    )
+    _model_config = config
+    logger.info("Whisper model loaded on CPU")
 
 
 class TranscriptionWorker(QObject):