Fix RAG vector-space mismatch and search_rag retrieval quality

Queries embedded via llama-swap were searching corpora embedded via
Ollama (measured: spaces diverged). Introduce LocalLlm — the local
Ollama + llama-swap pair with LLM_BACKEND dispatch baked in — and route
all embedding writers through it; anything embedding via a concrete
client reintroduces the bug.

- search_rag: embed the model's query verbatim (no metadata boilerplate),
  make date optional — no time-decay when omitted, so "when did X
  happen?" queries rank purely by similarity across all time
- reembed_embeddings bin: re-embed summaries / calendar / search /
  knowledge entities via the active backend, with old-new cosine report
  per table and truncate-and-retry for inputs over the embed server's
  physical batch size
- import_calendar, import_search_history: embed through LocalLlm
- search_messages / get_sms_messages: render sender → recipient so sent
  messages are attributable to a conversation
- insight job failures: store the one-line anyhow context chain ({:#})
  instead of the Debug dump the client was shown verbatim
- serialize env_dispatch tests behind a lock (parallel-runner flake)

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-06-11 19:06:52 -04:00
parent 0accc4ef2f
commit a022a3d15d
8 changed files with 738 additions and 99 deletions
+121 -35
View File
@@ -575,6 +575,67 @@ impl InsightGenerator {
Ok(formatted)
}
/// Semantic search over daily summaries for the agentic `search_rag`
/// tool. Embeds the caller's query as-is (no metadata boilerplate) and
/// only applies time weighting when an anchor date is provided —
/// without one, results rank purely by similarity across all time.
async fn search_summaries_semantic(
&self,
query: &str,
date: Option<chrono::NaiveDate>,
limit: usize,
) -> Result<Vec<String>> {
let tracer = global_tracer();
let current_cx = opentelemetry::Context::current();
let mut span = tracer.start_with_context("ai.rag.search_daily_summaries", &current_cx);
span.set_attribute(KeyValue::new("query", query.to_string()));
span.set_attribute(KeyValue::new("limit", limit as i64));
span.set_attribute(KeyValue::new("time_weighted", date.is_some()));
if let Some(d) = date {
span.set_attribute(KeyValue::new("date", d.to_string()));
}
let search_cx = current_cx.with_span(span);
log::info!("RAG QUERY: {} (anchor date: {:?})", query, date);
// Must use the same backend that populated the daily-summary
// embeddings or similarity search is garbage (see embed_one docs).
let query_embedding =
crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), query).await?;
let mut summary_dao = self
.daily_summary_dao
.lock()
.expect("Unable to lock DailySummaryDao");
let similar_summaries = match date {
Some(d) => summary_dao.find_similar_summaries_with_time_weight(
&search_cx,
&query_embedding,
&d.format("%Y-%m-%d").to_string(),
limit,
),
None => summary_dao.find_similar_summaries(&search_cx, &query_embedding, limit),
}
.map_err(|e| anyhow::anyhow!("Failed to find similar summaries: {:?}", e))?;
search_cx.span().set_attribute(KeyValue::new(
"results_count",
similar_summaries.len() as i64,
));
search_cx.span().set_status(Status::Ok);
Ok(similar_summaries
.into_iter()
.map(|s| {
format!(
"[{}] {} ({} messages):\n{}",
s.date, s.contact, s.message_count, s.summary
)
})
.collect())
}
/// Build a metadata-based query (fallback when no topics available)
fn build_metadata_query(
date: chrono::NaiveDate,
@@ -1737,13 +1798,12 @@ Return ONLY the summary, nothing else."#,
Some(q) => q.to_string(),
None => return "Error: missing required parameter 'query'".to_string(),
};
let date_str = match args.get("date").and_then(|v| v.as_str()) {
Some(d) => d,
None => return "Error: missing required parameter 'date'".to_string(),
};
let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") {
Ok(d) => d,
Err(e) => return format!("Error: failed to parse date '{}': {}", date_str, e),
let date = match args.get("date").and_then(|v| v.as_str()) {
Some(d) => match NaiveDate::parse_from_str(d, "%Y-%m-%d") {
Ok(d) => Some(d),
Err(e) => return format!("Error: failed to parse date '{}': {}", d, e),
},
None => None,
};
let contact = args
.get("contact")
@@ -1756,7 +1816,7 @@ Return ONLY the summary, nothing else."#,
.clamp(1, 25) as usize;
log::info!(
"tool_search_rag: query='{}', date={}, contact={:?}, limit={}",
"tool_search_rag: query='{}', date={:?}, contact={:?}, limit={}",
query,
date,
contact,
@@ -1777,15 +1837,17 @@ Return ONLY the summary, nothing else."#,
limit
};
// Embed the model's query verbatim — a soft contact bias is the
// only decoration. The metadata boilerplate ("On <date>, it was a
// <weekday>") that find_relevant_messages_rag prepends drowns the
// semantic signal, so the tool path deliberately bypasses it.
let search_query = match contact.as_deref() {
Some(c) => format!("{} (conversation with {})", query, c),
None => query.clone(),
};
let results = match self
.find_relevant_messages_rag(
date,
None,
contact.as_deref(),
None,
candidate_limit,
Some(&query),
)
.search_summaries_semantic(&search_query, date, candidate_limit)
.await
{
Ok(results) if !results.is_empty() => results,
@@ -2062,12 +2124,15 @@ Return ONLY the summary, nothing else."#,
/// Render a list of [`SmsSearchHit`] for the LLM. Prefers the SMS-API
/// snippet (which already excerpts the matched span and is the only
/// preview MMS-attachment-only matches have) over the full body, and
/// strips the `<mark>` tags the snippet ships with.
/// strips the `<mark>` tags the snippet ships with. Each line names
/// both parties (`sender → recipient`) — results can span multiple
/// conversations, and a sender-only label leaves sent messages
/// unattributable to a thread.
fn format_search_hits(hits: &[SmsSearchHit], mode: &str, date_filtered: bool) -> String {
let user_name = user_display_name();
let mut out = String::new();
out.push_str(&format!(
"Found {} messages (mode: {}{}):\n\n",
"Found {} messages (mode: {}{}, sender → recipient):\n\n",
hits.len(),
mode,
if date_filtered { ", date-filtered" } else { "" }
@@ -2076,10 +2141,10 @@ Return ONLY the summary, nothing else."#,
let date = chrono::DateTime::from_timestamp(h.date, 0)
.map(|dt| dt.format("%Y-%m-%d").to_string())
.unwrap_or_else(|| h.date.to_string());
let direction: &str = if h.type_ == 2 {
&user_name
let direction = if h.type_ == 2 {
format!("{}{}", user_name, h.contact_name)
} else {
&h.contact_name
format!("{}{}", h.contact_name, user_name)
};
let score = h
.similarity_score
@@ -2150,11 +2215,18 @@ Return ONLY the summary, nothing else."#,
{
Ok(messages) if !messages.is_empty() => {
let user_name = user_display_name();
// Name both parties — without a contact filter the window
// spans every conversation, and a sender-only label leaves
// sent messages unattributable to a thread.
let formatted: Vec<String> = messages
.iter()
.take(limit)
.map(|m| {
let sender: &str = if m.is_sent { &user_name } else { &m.contact };
let direction = if m.is_sent {
format!("{}{}", user_name, m.contact)
} else {
format!("{}{}", m.contact, user_name)
};
let ts = DateTime::from_timestamp(m.timestamp, 0)
.map(|dt| {
dt.with_timezone(&Local)
@@ -2162,7 +2234,7 @@ Return ONLY the summary, nothing else."#,
.to_string()
})
.unwrap_or_else(|| "unknown".to_string());
format!("[{}] {}: {}", ts, sender, m.body)
format!("[{}] {}: {}", ts, direction, m.body)
})
.collect();
format!(
@@ -3206,21 +3278,25 @@ Return ONLY the summary, nothing else."#,
if opts.daily_summaries_present {
tools.push(Tool::function(
"search_rag",
"Date-anchored semantic search over the user's daily-summary corpus. \
Returns up to `limit` summaries most semantically similar to `query`, \
weighted toward summaries near `date`. For raw message text across all \
time, prefer `search_messages`. \
Examples: `{query: \"family dinner\", date: \"2018-12-24\"}` — what \
"Semantic search over the user's daily-summary corpus. Returns up to \
`limit` summaries most semantically similar to `query`. Pass `date` \
to anchor in time: summaries near that date rank higher and matches \
months away decay sharply. Omit `date` to rank purely by semantic \
similarity across all time — do this for \"when did X happen?\" \
questions where the date is unknown. For raw message text, prefer \
`search_messages`. \
Examples: `{query: \"family dinner\"}` — best matches across all \
time. `{query: \"family dinner\", date: \"2018-12-24\"}` — what \
daily summaries near Christmas Eve mention family / dinner / gathering. \
`{query: \"work travel\", date: \"2019-06-15\", contact: \"Alice\"}` — \
narrowed to summaries that involve Alice.",
biased toward summaries that involve Alice.",
serde_json::json!({
"type": "object",
"required": ["query", "date"],
"required": ["query"],
"properties": {
"query": { "type": "string", "description": "Free-text query, semantically matched." },
"date": { "type": "string", "description": "Anchor date, YYYY-MM-DD. Summaries near this date rank higher." },
"contact": { "type": "string", "description": "Optional contact name to bias toward conversations with that person." },
"date": { "type": "string", "description": "Optional anchor date, YYYY-MM-DD. When set, summaries near this date rank higher; omit to search all time evenly." },
"contact": { "type": "string", "description": "Optional contact name to bias toward conversations with that person (soft semantic bias, not a hard filter)." },
"limit": { "type": "integer", "description": "Max summaries to return (default 10, max 25)." }
}
}),
@@ -4763,12 +4839,22 @@ mod tests {
let hit = make_search_hit(1, "Sarah", "see you at the lake tomorrow", None, 1);
let out = InsightGenerator::format_search_hits(&[hit], "fts5", false);
assert!(out.starts_with("Found 1 messages (mode: fts5):"));
assert!(out.starts_with("Found 1 messages (mode: fts5"));
assert!(out.contains("see you at the lake tomorrow"));
assert!(out.contains("Sarah —"));
// Received message: contact is the sender.
assert!(out.contains("Sarah →"));
assert!(!out.contains("date-filtered"));
}
#[test]
fn format_search_hits_labels_sent_direction() {
// Sent messages must name the recipient — results can span multiple
// conversations, and a sender-only label left them unattributable.
let hit = make_search_hit(5, "Sarah", "on my way", None, 2);
let out = InsightGenerator::format_search_hits(&[hit], "fts5", false);
assert!(out.contains("→ Sarah —"));
}
#[test]
fn format_search_hits_prefers_snippet_over_body_and_strips_marks() {
let hit = make_search_hit(
@@ -4799,7 +4885,7 @@ mod tests {
assert!(out.contains("birthday_cake.jpg"));
assert!(!out.contains("<mark>"));
assert!(out.contains("Mom "));
assert!(out.contains("Mom "));
}
#[test]