Make the embedding model swappable via env for A/B testing

Trialing Qwen3-Embedding-0.6B (1024-dim, instruct-prefixed queries)
against nomic required code changes at every hardcoded seam; now it's a
config flip plus a reembed_embeddings run.

- EMBEDDING_DIM env (default 768) replaces every hardcoded dim check:
  daily summary / calendar / search / location DAOs, Ollama batch
  validation, reembed_embeddings
- entities gains the dim guard it never had — a wrong-dim vector
  silently kills dedup/recall (cosine over mismatched lengths is 0),
  so store None and warn instead
- embed_query / embed_document split with EMBED_QUERY_PREFIX /
  EMBED_DOCUMENT_PREFIX (literal \n expanded): retrieval models treat
  the two sides differently — nomic wants search_query:/search_document:,
  Qwen3 wants Instruct:...\nQuery: on queries only. All query-side
  call sites and all corpus writers now declare their side.
- document the contract in CLAUDE.md: change the model or any of these
  vars → re-run reembed_embeddings or search is garbage

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-06-11 21:40:40 -04:00
parent b1493f5aca
commit efd05db523
12 changed files with 159 additions and 67 deletions
+33 -15
View File
@@ -535,7 +535,7 @@ impl InsightGenerator {
// (`LLM_BACKEND` switch). Must match the backend that populated the
// daily-summary embeddings or similarity search will be garbage.
let query_embedding =
crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), &query).await?;
crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), &query).await?;
// Search for similar daily summaries with time-based weighting
// This prioritizes summaries temporally close to the query date
@@ -601,7 +601,7 @@ impl InsightGenerator {
// Must use the same backend that populated the daily-summary
// embeddings or similarity search is garbage (see embed_one docs).
let query_embedding =
crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), query).await?;
crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), query).await?;
let mut summary_dao = self
.daily_summary_dao
@@ -687,7 +687,7 @@ impl InsightGenerator {
let calendar_cx = parent_cx.with_span(span);
let query_embedding = if let Some(loc) = location {
match crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), loc).await {
match crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), loc).await {
Ok(emb) => Some(emb),
Err(e) => {
log::warn!("Failed to generate embedding for location '{}': {}", loc, e);
@@ -859,7 +859,8 @@ impl InsightGenerator {
};
let query_embedding =
match crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), &query_text).await {
match crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), &query_text).await
{
Ok(emb) => emb,
Err(e) => {
log::warn!("Failed to generate search embedding: {}", e);
@@ -2942,17 +2943,34 @@ Return ONLY the summary, nothing else."#,
// Generate embedding for name + description (best-effort) via the
// configured local backend.
let embed_text = format!("{} {}", name, description);
let embedding: Option<Vec<u8>> =
match crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), &embed_text).await {
Ok(vec) => {
let bytes: Vec<u8> = vec.iter().flat_map(|f| f.to_le_bytes()).collect();
Some(bytes)
}
Err(e) => {
log::warn!("Embedding generation failed for entity '{}': {}", name, e);
None
}
};
let embedding: Option<Vec<u8>> = match crate::ai::embed_document(
&self.ollama,
self.llamacpp.as_deref(),
&embed_text,
)
.await
{
// The entities table has no dim check at the DAO layer, and a
// wrong-dim vector silently kills dedup/recall (cosine over
// mismatched lengths is 0) — guard here, store None instead.
Ok(vec) if vec.len() == crate::ai::embedding_dim() => {
let bytes: Vec<u8> = vec.iter().flat_map(|f| f.to_le_bytes()).collect();
Some(bytes)
}
Ok(vec) => {
log::warn!(
"Entity '{}' embedding has {} dims (expected {}) — storing without embedding",
name,
vec.len(),
crate::ai::embedding_dim()
);
None
}
Err(e) => {
log::warn!("Embedding generation failed for entity '{}': {}", name, e);
None
}
};
let now = chrono::Utc::now().timestamp();
let insert = InsertEntity {