From efd05db5234d0a28f6579a1169484bc933e1c0a7 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Thu, 11 Jun 2026 21:40:40 -0400 Subject: [PATCH] Make the embedding model swappable via env for A/B testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trialing Qwen3-Embedding-0.6B (1024-dim, instruct-prefixed queries) against nomic required code changes at every hardcoded seam; now it's a config flip plus a reembed_embeddings run. - EMBEDDING_DIM env (default 768) replaces every hardcoded dim check: daily summary / calendar / search / location DAOs, Ollama batch validation, reembed_embeddings - entities gains the dim guard it never had — a wrong-dim vector silently kills dedup/recall (cosine over mismatched lengths is 0), so store None and warn instead - embed_query / embed_document split with EMBED_QUERY_PREFIX / EMBED_DOCUMENT_PREFIX (literal \n expanded): retrieval models treat the two sides differently — nomic wants search_query:/search_document:, Qwen3 wants Instruct:...\nQuery: on queries only. All query-side call sites and all corpus writers now declare their side. - document the contract in CLAUDE.md: change the model or any of these vars → re-run reembed_embeddings or search is garbage Co-Authored-By: Claude Fable 5 --- CLAUDE.md | 8 +++++ src/ai/insight_generator.rs | 48 ++++++++++++++++++++--------- src/ai/local_llm.rs | 14 +++++---- src/ai/mod.rs | 51 +++++++++++++++++++++++++++++++ src/ai/ollama.rs | 9 +++--- src/bin/import_calendar.rs | 3 +- src/bin/import_search_history.rs | 2 +- src/bin/reembed_embeddings.rs | 15 ++++----- src/database/calendar_dao.rs | 23 ++++++++------ src/database/daily_summary_dao.rs | 21 +++++++------ src/database/location_dao.rs | 9 +++--- src/database/search_dao.rs | 23 ++++++++------ 12 files changed, 159 insertions(+), 67 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index b63ed4c..816391b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -645,6 +645,14 @@ OPENROUTER_APP_TITLE=ImageApi # Optional attribution header # re-embedding — mixed vector spaces break similarity search. LLM_BACKEND=ollama +# Embedding model contract. Corpus and queries must be embedded by the same +# model with matching prefixes — after changing the embed model or any of +# these, run `cargo run --bin reembed_embeddings` (all tables) or search is +# garbage. Prefix values may contain a literal \n (expanded to a newline). +EMBEDDING_DIM=768 # 768 = nomic-embed-text v1.5; 1024 = Qwen3-Embedding-0.6B +EMBED_QUERY_PREFIX= # nomic: "search_query: " | Qwen3: "Instruct: \nQuery: " +EMBED_DOCUMENT_PREFIX= # nomic: "search_document: " | Qwen3: leave empty + # llama.cpp / llama-swap (used when LLM_BACKEND=llamacpp). OpenAI-compatible # proxy hosting one or more llama-server processes. Chat models receive # images directly via content-parts (all models assumed vision-capable). diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 1ac4db3..645caa2 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -535,7 +535,7 @@ impl InsightGenerator { // (`LLM_BACKEND` switch). Must match the backend that populated the // daily-summary embeddings or similarity search will be garbage. let query_embedding = - crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), &query).await?; + crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), &query).await?; // Search for similar daily summaries with time-based weighting // This prioritizes summaries temporally close to the query date @@ -601,7 +601,7 @@ impl InsightGenerator { // Must use the same backend that populated the daily-summary // embeddings or similarity search is garbage (see embed_one docs). let query_embedding = - crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), query).await?; + crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), query).await?; let mut summary_dao = self .daily_summary_dao @@ -687,7 +687,7 @@ impl InsightGenerator { let calendar_cx = parent_cx.with_span(span); let query_embedding = if let Some(loc) = location { - match crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), loc).await { + match crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), loc).await { Ok(emb) => Some(emb), Err(e) => { log::warn!("Failed to generate embedding for location '{}': {}", loc, e); @@ -859,7 +859,8 @@ impl InsightGenerator { }; let query_embedding = - match crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), &query_text).await { + match crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), &query_text).await + { Ok(emb) => emb, Err(e) => { log::warn!("Failed to generate search embedding: {}", e); @@ -2942,17 +2943,34 @@ Return ONLY the summary, nothing else."#, // Generate embedding for name + description (best-effort) via the // configured local backend. let embed_text = format!("{} {}", name, description); - let embedding: Option> = - match crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), &embed_text).await { - Ok(vec) => { - let bytes: Vec = vec.iter().flat_map(|f| f.to_le_bytes()).collect(); - Some(bytes) - } - Err(e) => { - log::warn!("Embedding generation failed for entity '{}': {}", name, e); - None - } - }; + let embedding: Option> = match crate::ai::embed_document( + &self.ollama, + self.llamacpp.as_deref(), + &embed_text, + ) + .await + { + // The entities table has no dim check at the DAO layer, and a + // wrong-dim vector silently kills dedup/recall (cosine over + // mismatched lengths is 0) — guard here, store None instead. + Ok(vec) if vec.len() == crate::ai::embedding_dim() => { + let bytes: Vec = vec.iter().flat_map(|f| f.to_le_bytes()).collect(); + Some(bytes) + } + Ok(vec) => { + log::warn!( + "Entity '{}' embedding has {} dims (expected {}) — storing without embedding", + name, + vec.len(), + crate::ai::embedding_dim() + ); + None + } + Err(e) => { + log::warn!("Embedding generation failed for entity '{}': {}", name, e); + None + } + }; let now = chrono::Utc::now().timestamp(); let insert = InsertEntity { diff --git a/src/ai/local_llm.rs b/src/ai/local_llm.rs index 8344a87..bf3510c 100644 --- a/src/ai/local_llm.rs +++ b/src/ai/local_llm.rs @@ -43,14 +43,16 @@ impl LocalLlm { ) } - /// Embed one string via the `LLM_BACKEND`-selected client. - pub async fn embed(&self, text: &str) -> Result> { - super::embed_one(&self.ollama, self.llamacpp.as_deref(), text).await + /// Embed a search query (applies `EMBED_QUERY_PREFIX`). Callers must + /// pick query vs document — retrieval models treat the two sides + /// differently and an unmarked embed invites prefix-mismatch bugs. + pub async fn embed_query(&self, text: &str) -> Result> { + super::embed_query(&self.ollama, self.llamacpp.as_deref(), text).await } - /// Embed a batch via the `LLM_BACKEND`-selected client. - pub async fn embed_batch(&self, texts: &[&str]) -> Result>> { - super::embed_many(&self.ollama, self.llamacpp.as_deref(), texts).await + /// Embed corpus text (applies `EMBED_DOCUMENT_PREFIX`). + pub async fn embed_document(&self, text: &str) -> Result> { + super::embed_document(&self.ollama, self.llamacpp.as_deref(), text).await } /// Single-shot local text generation via the `LLM_BACKEND`-selected diff --git a/src/ai/mod.rs b/src/ai/mod.rs index 5125a96..d358f6c 100644 --- a/src/ai/mod.rs +++ b/src/ai/mod.rs @@ -75,6 +75,57 @@ pub fn local_backend_is_llamacpp() -> bool { ) } +/// Expected embedding dimensionality, env-overridable via `EMBEDDING_DIM` +/// (default 768, nomic-embed-text). Every store/query dim check reads this — +/// swapping to a different-dim model (e.g. Qwen3-Embedding-0.6B at 1024) is +/// then a config flip plus a `reembed_embeddings` run, not a code change. +/// Cached for the process lifetime; a flip requires a restart anyway since +/// the corpus must be re-embedded with it. +pub fn embedding_dim() -> usize { + static DIM: std::sync::OnceLock = std::sync::OnceLock::new(); + *DIM.get_or_init(|| { + std::env::var("EMBEDDING_DIM") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(768) + }) +} + +/// Read an embedding prefix from the environment. `.env` values can't hold +/// real newlines, so a literal `\n` in the value is expanded — Qwen3-style +/// query instructions need one ("Instruct: ...\nQuery: "). +fn embed_prefix(key: &str) -> String { + std::env::var(key) + .map(|v| v.replace("\\n", "\n")) + .unwrap_or_default() +} + +/// Embed a search query. Applies `EMBED_QUERY_PREFIX` (default empty) — +/// retrieval models distinguish query-side from document-side text: +/// nomic v1.5 wants `search_query: `, Qwen3-Embedding wants +/// `Instruct: \nQuery: `. Must pair with the document prefix the +/// corpus was embedded with or similarity degrades. +pub async fn embed_query( + ollama: &OllamaClient, + llamacpp: Option<&LlamaCppClient>, + text: &str, +) -> anyhow::Result> { + let prefixed = format!("{}{}", embed_prefix("EMBED_QUERY_PREFIX"), text); + embed_one(ollama, llamacpp, &prefixed).await +} + +/// Embed corpus text (the stored side of retrieval). Applies +/// `EMBED_DOCUMENT_PREFIX` (default empty; nomic v1.5 wants +/// `search_document: `, Qwen3-Embedding wants none). +pub async fn embed_document( + ollama: &OllamaClient, + llamacpp: Option<&LlamaCppClient>, + text: &str, +) -> anyhow::Result> { + let prefixed = format!("{}{}", embed_prefix("EMBED_DOCUMENT_PREFIX"), text); + embed_one(ollama, llamacpp, &prefixed).await +} + /// Embed a batch of strings via the configured local backend. Routes /// through llama-swap when `LLM_BACKEND=llamacpp` (and a client is /// configured), else Ollama. See [`local_backend_is_llamacpp`] for the diff --git a/src/ai/ollama.rs b/src/ai/ollama.rs index 75c8a02..518c7ec 100644 --- a/src/ai/ollama.rs +++ b/src/ai/ollama.rs @@ -1046,13 +1046,14 @@ Analyze the image and use specific details from both the visual content and the } }; - // Validate embedding dimensions (should be 768 for nomic-embed-text:v1.5) + // Validate embedding dimensions (EMBEDDING_DIM; 768 for nomic-embed-text:v1.5) for (i, embedding) in embeddings.iter().enumerate() { - if embedding.len() != 768 { + if embedding.len() != crate::ai::embedding_dim() { log::warn!( - "Unexpected embedding dimensions for item {}: {} (expected 768)", + "Unexpected embedding dimensions for item {}: {} (expected {})", i, - embedding.len() + embedding.len(), + crate::ai::embedding_dim() ); } } diff --git a/src/bin/import_calendar.rs b/src/bin/import_calendar.rs index 629a794..98b3f37 100644 --- a/src/bin/import_calendar.rs +++ b/src/bin/import_calendar.rs @@ -87,7 +87,8 @@ async fn main() -> Result<()> { ); match tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on(async { llm.embed(&text).await }) + tokio::runtime::Handle::current() + .block_on(async { llm.embed_document(&text).await }) }) { Ok(emb) => Some(emb), Err(e) => { diff --git a/src/bin/import_search_history.rs b/src/bin/import_search_history.rs index 7494392..93605cc 100644 --- a/src/bin/import_search_history.rs +++ b/src/bin/import_search_history.rs @@ -64,7 +64,7 @@ async fn main() -> Result<()> { async move { let mut embeddings = Vec::new(); for query in &queries { - match llm.embed(query).await { + match llm.embed_document(query).await { Ok(emb) => embeddings.push(Some(emb)), Err(e) => { pb_for_warn.println(format!("embedding failed for '{}': {}", query, e)); diff --git a/src/bin/reembed_embeddings.rs b/src/bin/reembed_embeddings.rs index 26b2fde..a2fdd4c 100644 --- a/src/bin/reembed_embeddings.rs +++ b/src/bin/reembed_embeddings.rs @@ -141,7 +141,7 @@ async fn embed_with_truncation(llm: &LocalLlm, text: &str) -> Result<(Vec, let mut text = text.to_string(); let mut truncated = false; loop { - match llm.embed(&text).await { + match llm.embed_document(&text).await { Ok(emb) => return Ok((emb, truncated)), Err(e) if e.to_string().contains("too large to process") && text.chars().count() > 64 => @@ -194,14 +194,15 @@ async fn reembed_table( } }; - // The whole pipeline (DAO checks, stored corpora) assumes 768 dims. - // A different dim means the active backend is not serving a - // nomic-compatible model — stop rather than corrupt the table. + // The whole pipeline (DAO checks, stored corpora) assumes + // EMBEDDING_DIM dims. A mismatch means the active embed slot is not + // serving the configured model — stop rather than corrupt the table. anyhow::ensure!( - new_emb.len() == 768, - "backend returned {}-dim embedding (expected 768) — '{}' is not \ - serving a nomic-embed-text-v1.5-compatible model", + new_emb.len() == image_api::ai::embedding_dim(), + "backend returned {}-dim embedding (expected {}) — '{}' does not \ + match the configured EMBEDDING_DIM", new_emb.len(), + image_api::ai::embedding_dim(), llm.embedding_model_version() ); diff --git a/src/database/calendar_dao.rs b/src/database/calendar_dao.rs index 4ebd21c..f739d87 100644 --- a/src/database/calendar_dao.rs +++ b/src/database/calendar_dao.rs @@ -222,11 +222,12 @@ impl CalendarEventDao for SqliteCalendarEventDao { // Validate embedding dimensions if provided if let Some(ref emb) = event.embedding - && emb.len() != 768 + && emb.len() != crate::ai::embedding_dim() { return Err(anyhow::anyhow!( - "Invalid embedding dimensions: {} (expected 768)", - emb.len() + "Invalid embedding dimensions: {} (expected {})", + emb.len(), + crate::ai::embedding_dim() )); } @@ -293,7 +294,7 @@ impl CalendarEventDao for SqliteCalendarEventDao { for event in events { // Validate embedding if provided if let Some(ref emb) = event.embedding - && emb.len() != 768 + && emb.len() != crate::ai::embedding_dim() { log::warn!( "Skipping event with invalid embedding dimensions: {}", @@ -385,10 +386,11 @@ impl CalendarEventDao for SqliteCalendarEventDao { trace_db_call(context, "query", "find_similar_events", |_span| { let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao"); - if query_embedding.len() != 768 { + if query_embedding.len() != crate::ai::embedding_dim() { return Err(anyhow::anyhow!( - "Invalid query embedding dimensions: {} (expected 768)", - query_embedding.len() + "Invalid query embedding dimensions: {} (expected {})", + query_embedding.len(), + crate::ai::embedding_dim() )); } @@ -461,10 +463,11 @@ impl CalendarEventDao for SqliteCalendarEventDao { // Step 2: If query embedding provided, rank by semantic similarity if let Some(query_emb) = query_embedding { - if query_emb.len() != 768 { + if query_emb.len() != crate::ai::embedding_dim() { return Err(anyhow::anyhow!( - "Invalid query embedding dimensions: {} (expected 768)", - query_emb.len() + "Invalid query embedding dimensions: {} (expected {})", + query_emb.len(), + crate::ai::embedding_dim() )); } diff --git a/src/database/daily_summary_dao.rs b/src/database/daily_summary_dao.rs index 521c1a5..af1d16f 100644 --- a/src/database/daily_summary_dao.rs +++ b/src/database/daily_summary_dao.rs @@ -150,10 +150,11 @@ impl DailySummaryDao for SqliteDailySummaryDao { .expect("Unable to get DailySummaryDao"); // Validate embedding dimensions - if summary.embedding.len() != 768 { + if summary.embedding.len() != crate::ai::embedding_dim() { return Err(anyhow::anyhow!( - "Invalid embedding dimensions: {} (expected 768)", - summary.embedding.len() + "Invalid embedding dimensions: {} (expected {})", + summary.embedding.len(), + crate::ai::embedding_dim() )); } @@ -202,10 +203,11 @@ impl DailySummaryDao for SqliteDailySummaryDao { trace_db_call(context, "query", "find_similar_summaries", |_span| { let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao"); - if query_embedding.len() != 768 { + if query_embedding.len() != crate::ai::embedding_dim() { return Err(anyhow::anyhow!( - "Invalid query embedding dimensions: {} (expected 768)", - query_embedding.len() + "Invalid query embedding dimensions: {} (expected {})", + query_embedding.len(), + crate::ai::embedding_dim() )); } @@ -299,10 +301,11 @@ impl DailySummaryDao for SqliteDailySummaryDao { trace_db_call(context, "query", "find_similar_summaries_with_time_weight", |_span| { let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao"); - if query_embedding.len() != 768 { + if query_embedding.len() != crate::ai::embedding_dim() { return Err(anyhow::anyhow!( - "Invalid query embedding dimensions: {} (expected 768)", - query_embedding.len() + "Invalid query embedding dimensions: {} (expected {})", + query_embedding.len(), + crate::ai::embedding_dim() )); } diff --git a/src/database/location_dao.rs b/src/database/location_dao.rs index 8bb0ac4..9840279 100644 --- a/src/database/location_dao.rs +++ b/src/database/location_dao.rs @@ -216,11 +216,12 @@ impl LocationHistoryDao for SqliteLocationHistoryDao { // Validate embedding dimensions if provided (rare for location data) if let Some(ref emb) = location.embedding - && emb.len() != 768 + && emb.len() != crate::ai::embedding_dim() { return Err(anyhow::anyhow!( - "Invalid embedding dimensions: {} (expected 768)", - emb.len() + "Invalid embedding dimensions: {} (expected {})", + emb.len(), + crate::ai::embedding_dim() )); } @@ -292,7 +293,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao { for location in locations { // Validate embedding if provided (rare) if let Some(ref emb) = location.embedding - && emb.len() != 768 + && emb.len() != crate::ai::embedding_dim() { log::warn!( "Skipping location with invalid embedding dimensions: {}", diff --git a/src/database/search_dao.rs b/src/database/search_dao.rs index ee7d0ad..a73c9fb 100644 --- a/src/database/search_dao.rs +++ b/src/database/search_dao.rs @@ -189,10 +189,11 @@ impl SearchHistoryDao for SqliteSearchHistoryDao { .expect("Unable to get SearchHistoryDao"); // Validate embedding dimensions (REQUIRED for searches) - if search.embedding.len() != 768 { + if search.embedding.len() != crate::ai::embedding_dim() { return Err(anyhow::anyhow!( - "Invalid embedding dimensions: {} (expected 768)", - search.embedding.len() + "Invalid embedding dimensions: {} (expected {})", + search.embedding.len(), + crate::ai::embedding_dim() )); } @@ -245,7 +246,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao { conn.transaction::<_, anyhow::Error, _>(|conn| { for search in searches { // Validate embedding (REQUIRED) - if search.embedding.len() != 768 { + if search.embedding.len() != crate::ai::embedding_dim() { log::warn!( "Skipping search with invalid embedding dimensions: {}", search.embedding.len() @@ -325,10 +326,11 @@ impl SearchHistoryDao for SqliteSearchHistoryDao { .lock() .expect("Unable to get SearchHistoryDao"); - if query_embedding.len() != 768 { + if query_embedding.len() != crate::ai::embedding_dim() { return Err(anyhow::anyhow!( - "Invalid query embedding dimensions: {} (expected 768)", - query_embedding.len() + "Invalid query embedding dimensions: {} (expected {})", + query_embedding.len(), + crate::ai::embedding_dim() )); } @@ -406,10 +408,11 @@ impl SearchHistoryDao for SqliteSearchHistoryDao { // Step 2: If query embedding provided, rank by semantic similarity if let Some(query_emb) = query_embedding { - if query_emb.len() != 768 { + if query_emb.len() != crate::ai::embedding_dim() { return Err(anyhow::anyhow!( - "Invalid query embedding dimensions: {} (expected 768)", - query_emb.len() + "Invalid query embedding dimensions: {} (expected {})", + query_emb.len(), + crate::ai::embedding_dim() )); }