fix: fall back to CPU when CUDA inference fails at transcribe time

CUDA model loading can succeed even when runtime libs like libcublas are missing — the error only surfaces during model.transcribe(). Catch that and retry on CPU so transcription still works without the full CUDA toolkit installed. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 14:38:58 -06:00 · 2026-02-11 14:38:58 -06:00 · 53caf193af
commit 53caf193af
parent 195cd443cb
1 changed files with 24 additions and 2 deletions
--- a/src/my_memory/transcriber.py
+++ b/src/my_memory/transcriber.py
@ -52,8 +52,30 @@ def _load_model(config: WhisperConfig):
 def transcribe_file(wav_path: str, config: WhisperConfig) -> str:
    """Transcribe a WAV file and return the text."""
    model = _load_model(config)
-    segments, _info = model.transcribe(wav_path, beam_size=5)
+    try:
-    return " ".join(segment.text.strip() for segment in segments).strip()
+        segments, _info = model.transcribe(wav_path, beam_size=5)
        return " ".join(segment.text.strip() for segment in segments).strip()
    except Exception as e:
        # CUDA may load but fail at inference (e.g. missing libcublas)
        if config.device in ("auto", "cuda") and _model is not None:
            logger.warning("CUDA transcription failed (%s), retrying on CPU", e)
            _force_cpu_model(config)
            segments, _info = _model.transcribe(wav_path, beam_size=5)
            return " ".join(segment.text.strip() for segment in segments).strip()
        raise
 def _force_cpu_model(config: WhisperConfig) -> None:
    """Replace the current model with a CPU-only version."""
    global _model, _model_config
    from faster_whisper import WhisperModel
    logger.info("Loading Whisper model '%s' on CPU (fallback)...", config.model_size)
    _model = WhisperModel(
        config.model_size, device="cpu", compute_type=config.cpu_fallback_compute_type
    )
    _model_config = config
    logger.info("Whisper model loaded on CPU")
 class TranscriptionWorker(QObject):