From a410683edf10dbae05c95c8a8c48a2f915a244f7 Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Wed, 27 May 2026 13:02:42 -0400
Subject: [PATCH] fix: fail fast when LLM_BACKEND=llamacpp but LlamaCppClient
 is unconfigured
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously embed_one() silently fell back to Ollama embeddings,
which would load nomic-embed-text into VRAM alongside llama-swap —
wasting memory on an unintended model. Now returns an error with
an actionable message instead.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/ai/mod.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/ai/mod.rs b/src/ai/mod.rs
index 93a2edc..c54b113 100644
--- a/src/ai/mod.rs
+++ b/src/ai/mod.rs
@@ -78,8 +78,9 @@ pub async fn embed_one(
                 .pop()
                 .ok_or_else(|| anyhow::anyhow!("llama-swap returned no embeddings"));
         }
-        log::warn!(
-            "LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured; falling back to Ollama embeddings"
+        anyhow::bail!(
+            "LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured — \
+             set LLAMA_SWAP_URL or switch to LLM_BACKEND=ollama"
         );
     }
     ollama.generate_embedding(text).await