fix: fail fast when LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured
Previously embed_one() silently fell back to Ollama embeddings, which would load nomic-embed-text into VRAM alongside llama-swap — wasting memory on an unintended model. Now returns an error with an actionable message instead. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+3
-2
@@ -78,8 +78,9 @@ pub async fn embed_one(
|
||||
.pop()
|
||||
.ok_or_else(|| anyhow::anyhow!("llama-swap returned no embeddings"));
|
||||
}
|
||||
log::warn!(
|
||||
"LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured; falling back to Ollama embeddings"
|
||||
anyhow::bail!(
|
||||
"LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured — \
|
||||
set LLAMA_SWAP_URL or switch to LLM_BACKEND=ollama"
|
||||
);
|
||||
}
|
||||
ollama.generate_embedding(text).await
|
||||
|
||||
Reference in New Issue
Block a user