Make the embedding model swappable via env for A/B testing
Trialing Qwen3-Embedding-0.6B (1024-dim, instruct-prefixed queries) against nomic required code changes at every hardcoded seam; now it's a config flip plus a reembed_embeddings run. - EMBEDDING_DIM env (default 768) replaces every hardcoded dim check: daily summary / calendar / search / location DAOs, Ollama batch validation, reembed_embeddings - entities gains the dim guard it never had — a wrong-dim vector silently kills dedup/recall (cosine over mismatched lengths is 0), so store None and warn instead - embed_query / embed_document split with EMBED_QUERY_PREFIX / EMBED_DOCUMENT_PREFIX (literal \n expanded): retrieval models treat the two sides differently — nomic wants search_query:/search_document:, Qwen3 wants Instruct:...\nQuery: on queries only. All query-side call sites and all corpus writers now declare their side. - document the contract in CLAUDE.md: change the model or any of these vars → re-run reembed_embeddings or search is garbage Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+33
-15
@@ -535,7 +535,7 @@ impl InsightGenerator {
|
||||
// (`LLM_BACKEND` switch). Must match the backend that populated the
|
||||
// daily-summary embeddings or similarity search will be garbage.
|
||||
let query_embedding =
|
||||
crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), &query).await?;
|
||||
crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), &query).await?;
|
||||
|
||||
// Search for similar daily summaries with time-based weighting
|
||||
// This prioritizes summaries temporally close to the query date
|
||||
@@ -601,7 +601,7 @@ impl InsightGenerator {
|
||||
// Must use the same backend that populated the daily-summary
|
||||
// embeddings or similarity search is garbage (see embed_one docs).
|
||||
let query_embedding =
|
||||
crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), query).await?;
|
||||
crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), query).await?;
|
||||
|
||||
let mut summary_dao = self
|
||||
.daily_summary_dao
|
||||
@@ -687,7 +687,7 @@ impl InsightGenerator {
|
||||
let calendar_cx = parent_cx.with_span(span);
|
||||
|
||||
let query_embedding = if let Some(loc) = location {
|
||||
match crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), loc).await {
|
||||
match crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), loc).await {
|
||||
Ok(emb) => Some(emb),
|
||||
Err(e) => {
|
||||
log::warn!("Failed to generate embedding for location '{}': {}", loc, e);
|
||||
@@ -859,7 +859,8 @@ impl InsightGenerator {
|
||||
};
|
||||
|
||||
let query_embedding =
|
||||
match crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), &query_text).await {
|
||||
match crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), &query_text).await
|
||||
{
|
||||
Ok(emb) => emb,
|
||||
Err(e) => {
|
||||
log::warn!("Failed to generate search embedding: {}", e);
|
||||
@@ -2942,17 +2943,34 @@ Return ONLY the summary, nothing else."#,
|
||||
// Generate embedding for name + description (best-effort) via the
|
||||
// configured local backend.
|
||||
let embed_text = format!("{} {}", name, description);
|
||||
let embedding: Option<Vec<u8>> =
|
||||
match crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), &embed_text).await {
|
||||
Ok(vec) => {
|
||||
let bytes: Vec<u8> = vec.iter().flat_map(|f| f.to_le_bytes()).collect();
|
||||
Some(bytes)
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Embedding generation failed for entity '{}': {}", name, e);
|
||||
None
|
||||
}
|
||||
};
|
||||
let embedding: Option<Vec<u8>> = match crate::ai::embed_document(
|
||||
&self.ollama,
|
||||
self.llamacpp.as_deref(),
|
||||
&embed_text,
|
||||
)
|
||||
.await
|
||||
{
|
||||
// The entities table has no dim check at the DAO layer, and a
|
||||
// wrong-dim vector silently kills dedup/recall (cosine over
|
||||
// mismatched lengths is 0) — guard here, store None instead.
|
||||
Ok(vec) if vec.len() == crate::ai::embedding_dim() => {
|
||||
let bytes: Vec<u8> = vec.iter().flat_map(|f| f.to_le_bytes()).collect();
|
||||
Some(bytes)
|
||||
}
|
||||
Ok(vec) => {
|
||||
log::warn!(
|
||||
"Entity '{}' embedding has {} dims (expected {}) — storing without embedding",
|
||||
name,
|
||||
vec.len(),
|
||||
crate::ai::embedding_dim()
|
||||
);
|
||||
None
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Embedding generation failed for entity '{}': {}", name, e);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let now = chrono::Utc::now().timestamp();
|
||||
let insert = InsertEntity {
|
||||
|
||||
Reference in New Issue
Block a user