fix: fail fast when LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured

Previously embed_one() silently fell back to Ollama embeddings, which would load nomic-embed-text into VRAM alongside llama-swap — wasting memory on an unintended model. Now returns an error with an actionable message instead. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 13:02:42 -04:00
parent 2818936739
commit a410683edf
1 changed files with 3 additions and 2 deletions
@@ -78,8 +78,9 @@ pub async fn embed_one(
                .pop()
                .ok_or_else(|| anyhow::anyhow!("llama-swap returned no embeddings"));
        }
-        log::warn!(
-            "LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured; falling back to Ollama embeddings"
+        anyhow::bail!(
+            "LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured — \
+             set LLAMA_SWAP_URL or switch to LLM_BACKEND=ollama"
        );
    }
    ollama.generate_embedding(text).await