Fix RAG vector-space mismatch and search_rag retrieval quality
Queries embedded via llama-swap were searching corpora embedded via
Ollama (measured: spaces diverged). Introduce LocalLlm — the local
Ollama + llama-swap pair with LLM_BACKEND dispatch baked in — and route
all embedding writers through it; anything embedding via a concrete
client reintroduces the bug.
- search_rag: embed the model's query verbatim (no metadata boilerplate),
make date optional — no time-decay when omitted, so "when did X
happen?" queries rank purely by similarity across all time
- reembed_embeddings bin: re-embed summaries / calendar / search /
knowledge entities via the active backend, with old-new cosine report
per table and truncate-and-retry for inputs over the embed server's
physical batch size
- import_calendar, import_search_history: embed through LocalLlm
- search_messages / get_sms_messages: render sender → recipient so sent
messages are attributable to a conversation
- insight job failures: store the one-line anyhow context chain ({:#})
instead of the Debug dump the client was shown verbatim
- serialize env_dispatch tests behind a lock (parallel-runner flake)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+18
-16
@@ -186,21 +186,7 @@ impl AppState {
|
||||
impl Default for AppState {
|
||||
fn default() -> Self {
|
||||
// Initialize AI clients
|
||||
let ollama_primary_url = env::var("OLLAMA_PRIMARY_URL").unwrap_or_else(|_| {
|
||||
env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".to_string())
|
||||
});
|
||||
let ollama_fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
|
||||
let ollama_primary_model = env::var("OLLAMA_PRIMARY_MODEL")
|
||||
.or_else(|_| env::var("OLLAMA_MODEL"))
|
||||
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string());
|
||||
let ollama_fallback_model = env::var("OLLAMA_FALLBACK_MODEL").ok();
|
||||
|
||||
let ollama = OllamaClient::new(
|
||||
ollama_primary_url,
|
||||
ollama_fallback_url,
|
||||
ollama_primary_model,
|
||||
ollama_fallback_model,
|
||||
);
|
||||
let ollama = build_ollama_from_env();
|
||||
|
||||
let openrouter = build_openrouter_from_env();
|
||||
let openrouter_allowed_models = parse_openrouter_allowed_models();
|
||||
@@ -375,13 +361,29 @@ fn parse_openrouter_allowed_models() -> Vec<String> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Build the `OllamaClient` from environment variables — the canonical
|
||||
/// `OLLAMA_*` wiring shared by the server (`AppState::default`) and the
|
||||
/// standalone binaries (which predate this helper and used to copy it).
|
||||
pub fn build_ollama_from_env() -> OllamaClient {
|
||||
let primary_url = env::var("OLLAMA_PRIMARY_URL").unwrap_or_else(|_| {
|
||||
env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".to_string())
|
||||
});
|
||||
let fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
|
||||
let primary_model = env::var("OLLAMA_PRIMARY_MODEL")
|
||||
.or_else(|_| env::var("OLLAMA_MODEL"))
|
||||
.unwrap_or_else(|_| "nemotron-3-nano:30b".to_string());
|
||||
let fallback_model = env::var("OLLAMA_FALLBACK_MODEL").ok();
|
||||
|
||||
OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model)
|
||||
}
|
||||
|
||||
/// Build a `LlamaCppClient` from environment variables. Returns `None` when
|
||||
/// `LLAMA_SWAP_URL` is unset. The client is constructed unconditionally
|
||||
/// when the URL is set (so it's available even under `LLM_BACKEND=ollama`
|
||||
/// for ad-hoc tooling), but the agentic / chat paths only route through it
|
||||
/// when `LLM_BACKEND=llamacpp`. Slot ids default to the names the bundled
|
||||
/// `llama-swap/config.yaml` uses — `chat` / `vision` / `embed`.
|
||||
fn build_llamacpp_from_env() -> Option<Arc<LlamaCppClient>> {
|
||||
pub fn build_llamacpp_from_env() -> Option<Arc<LlamaCppClient>> {
|
||||
let base_url = env::var("LLAMA_SWAP_URL").ok()?;
|
||||
let primary_model = env::var("LLAMA_SWAP_PRIMARY_MODEL").ok();
|
||||
let mut client = LlamaCppClient::new(Some(base_url), primary_model);
|
||||
|
||||
Reference in New Issue
Block a user