Fix RAG vector-space mismatch and search_rag retrieval quality
Queries embedded via llama-swap were searching corpora embedded via
Ollama (measured: spaces diverged). Introduce LocalLlm — the local
Ollama + llama-swap pair with LLM_BACKEND dispatch baked in — and route
all embedding writers through it; anything embedding via a concrete
client reintroduces the bug.
- search_rag: embed the model's query verbatim (no metadata boilerplate),
make date optional — no time-decay when omitted, so "when did X
happen?" queries rank purely by similarity across all time
- reembed_embeddings bin: re-embed summaries / calendar / search /
knowledge entities via the active backend, with old-new cosine report
per table and truncate-and-retry for inputs over the embed server's
physical batch size
- import_calendar, import_search_history: embed through LocalLlm
- search_messages / get_sms_messages: render sender → recipient so sent
messages are attributable to a conversation
- insight job failures: store the one-line anyhow context chain ({:#})
instead of the Debug dump the client was shown verbatim
- serialize env_dispatch tests behind a lock (parallel-runner flake)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+121
-35
@@ -575,6 +575,67 @@ impl InsightGenerator {
|
||||
Ok(formatted)
|
||||
}
|
||||
|
||||
/// Semantic search over daily summaries for the agentic `search_rag`
|
||||
/// tool. Embeds the caller's query as-is (no metadata boilerplate) and
|
||||
/// only applies time weighting when an anchor date is provided —
|
||||
/// without one, results rank purely by similarity across all time.
|
||||
async fn search_summaries_semantic(
|
||||
&self,
|
||||
query: &str,
|
||||
date: Option<chrono::NaiveDate>,
|
||||
limit: usize,
|
||||
) -> Result<Vec<String>> {
|
||||
let tracer = global_tracer();
|
||||
let current_cx = opentelemetry::Context::current();
|
||||
let mut span = tracer.start_with_context("ai.rag.search_daily_summaries", ¤t_cx);
|
||||
span.set_attribute(KeyValue::new("query", query.to_string()));
|
||||
span.set_attribute(KeyValue::new("limit", limit as i64));
|
||||
span.set_attribute(KeyValue::new("time_weighted", date.is_some()));
|
||||
if let Some(d) = date {
|
||||
span.set_attribute(KeyValue::new("date", d.to_string()));
|
||||
}
|
||||
let search_cx = current_cx.with_span(span);
|
||||
|
||||
log::info!("RAG QUERY: {} (anchor date: {:?})", query, date);
|
||||
|
||||
// Must use the same backend that populated the daily-summary
|
||||
// embeddings or similarity search is garbage (see embed_one docs).
|
||||
let query_embedding =
|
||||
crate::ai::embed_one(&self.ollama, self.llamacpp.as_deref(), query).await?;
|
||||
|
||||
let mut summary_dao = self
|
||||
.daily_summary_dao
|
||||
.lock()
|
||||
.expect("Unable to lock DailySummaryDao");
|
||||
|
||||
let similar_summaries = match date {
|
||||
Some(d) => summary_dao.find_similar_summaries_with_time_weight(
|
||||
&search_cx,
|
||||
&query_embedding,
|
||||
&d.format("%Y-%m-%d").to_string(),
|
||||
limit,
|
||||
),
|
||||
None => summary_dao.find_similar_summaries(&search_cx, &query_embedding, limit),
|
||||
}
|
||||
.map_err(|e| anyhow::anyhow!("Failed to find similar summaries: {:?}", e))?;
|
||||
|
||||
search_cx.span().set_attribute(KeyValue::new(
|
||||
"results_count",
|
||||
similar_summaries.len() as i64,
|
||||
));
|
||||
search_cx.span().set_status(Status::Ok);
|
||||
|
||||
Ok(similar_summaries
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
format!(
|
||||
"[{}] {} ({} messages):\n{}",
|
||||
s.date, s.contact, s.message_count, s.summary
|
||||
)
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Build a metadata-based query (fallback when no topics available)
|
||||
fn build_metadata_query(
|
||||
date: chrono::NaiveDate,
|
||||
@@ -1737,13 +1798,12 @@ Return ONLY the summary, nothing else."#,
|
||||
Some(q) => q.to_string(),
|
||||
None => return "Error: missing required parameter 'query'".to_string(),
|
||||
};
|
||||
let date_str = match args.get("date").and_then(|v| v.as_str()) {
|
||||
Some(d) => d,
|
||||
None => return "Error: missing required parameter 'date'".to_string(),
|
||||
};
|
||||
let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") {
|
||||
Ok(d) => d,
|
||||
Err(e) => return format!("Error: failed to parse date '{}': {}", date_str, e),
|
||||
let date = match args.get("date").and_then(|v| v.as_str()) {
|
||||
Some(d) => match NaiveDate::parse_from_str(d, "%Y-%m-%d") {
|
||||
Ok(d) => Some(d),
|
||||
Err(e) => return format!("Error: failed to parse date '{}': {}", d, e),
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
let contact = args
|
||||
.get("contact")
|
||||
@@ -1756,7 +1816,7 @@ Return ONLY the summary, nothing else."#,
|
||||
.clamp(1, 25) as usize;
|
||||
|
||||
log::info!(
|
||||
"tool_search_rag: query='{}', date={}, contact={:?}, limit={}",
|
||||
"tool_search_rag: query='{}', date={:?}, contact={:?}, limit={}",
|
||||
query,
|
||||
date,
|
||||
contact,
|
||||
@@ -1777,15 +1837,17 @@ Return ONLY the summary, nothing else."#,
|
||||
limit
|
||||
};
|
||||
|
||||
// Embed the model's query verbatim — a soft contact bias is the
|
||||
// only decoration. The metadata boilerplate ("On <date>, it was a
|
||||
// <weekday>") that find_relevant_messages_rag prepends drowns the
|
||||
// semantic signal, so the tool path deliberately bypasses it.
|
||||
let search_query = match contact.as_deref() {
|
||||
Some(c) => format!("{} (conversation with {})", query, c),
|
||||
None => query.clone(),
|
||||
};
|
||||
|
||||
let results = match self
|
||||
.find_relevant_messages_rag(
|
||||
date,
|
||||
None,
|
||||
contact.as_deref(),
|
||||
None,
|
||||
candidate_limit,
|
||||
Some(&query),
|
||||
)
|
||||
.search_summaries_semantic(&search_query, date, candidate_limit)
|
||||
.await
|
||||
{
|
||||
Ok(results) if !results.is_empty() => results,
|
||||
@@ -2062,12 +2124,15 @@ Return ONLY the summary, nothing else."#,
|
||||
/// Render a list of [`SmsSearchHit`] for the LLM. Prefers the SMS-API
|
||||
/// snippet (which already excerpts the matched span and is the only
|
||||
/// preview MMS-attachment-only matches have) over the full body, and
|
||||
/// strips the `<mark>` tags the snippet ships with.
|
||||
/// strips the `<mark>` tags the snippet ships with. Each line names
|
||||
/// both parties (`sender → recipient`) — results can span multiple
|
||||
/// conversations, and a sender-only label leaves sent messages
|
||||
/// unattributable to a thread.
|
||||
fn format_search_hits(hits: &[SmsSearchHit], mode: &str, date_filtered: bool) -> String {
|
||||
let user_name = user_display_name();
|
||||
let mut out = String::new();
|
||||
out.push_str(&format!(
|
||||
"Found {} messages (mode: {}{}):\n\n",
|
||||
"Found {} messages (mode: {}{}, sender → recipient):\n\n",
|
||||
hits.len(),
|
||||
mode,
|
||||
if date_filtered { ", date-filtered" } else { "" }
|
||||
@@ -2076,10 +2141,10 @@ Return ONLY the summary, nothing else."#,
|
||||
let date = chrono::DateTime::from_timestamp(h.date, 0)
|
||||
.map(|dt| dt.format("%Y-%m-%d").to_string())
|
||||
.unwrap_or_else(|| h.date.to_string());
|
||||
let direction: &str = if h.type_ == 2 {
|
||||
&user_name
|
||||
let direction = if h.type_ == 2 {
|
||||
format!("{} → {}", user_name, h.contact_name)
|
||||
} else {
|
||||
&h.contact_name
|
||||
format!("{} → {}", h.contact_name, user_name)
|
||||
};
|
||||
let score = h
|
||||
.similarity_score
|
||||
@@ -2150,11 +2215,18 @@ Return ONLY the summary, nothing else."#,
|
||||
{
|
||||
Ok(messages) if !messages.is_empty() => {
|
||||
let user_name = user_display_name();
|
||||
// Name both parties — without a contact filter the window
|
||||
// spans every conversation, and a sender-only label leaves
|
||||
// sent messages unattributable to a thread.
|
||||
let formatted: Vec<String> = messages
|
||||
.iter()
|
||||
.take(limit)
|
||||
.map(|m| {
|
||||
let sender: &str = if m.is_sent { &user_name } else { &m.contact };
|
||||
let direction = if m.is_sent {
|
||||
format!("{} → {}", user_name, m.contact)
|
||||
} else {
|
||||
format!("{} → {}", m.contact, user_name)
|
||||
};
|
||||
let ts = DateTime::from_timestamp(m.timestamp, 0)
|
||||
.map(|dt| {
|
||||
dt.with_timezone(&Local)
|
||||
@@ -2162,7 +2234,7 @@ Return ONLY the summary, nothing else."#,
|
||||
.to_string()
|
||||
})
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
format!("[{}] {}: {}", ts, sender, m.body)
|
||||
format!("[{}] {}: {}", ts, direction, m.body)
|
||||
})
|
||||
.collect();
|
||||
format!(
|
||||
@@ -3206,21 +3278,25 @@ Return ONLY the summary, nothing else."#,
|
||||
if opts.daily_summaries_present {
|
||||
tools.push(Tool::function(
|
||||
"search_rag",
|
||||
"Date-anchored semantic search over the user's daily-summary corpus. \
|
||||
Returns up to `limit` summaries most semantically similar to `query`, \
|
||||
weighted toward summaries near `date`. For raw message text across all \
|
||||
time, prefer `search_messages`. \
|
||||
Examples: `{query: \"family dinner\", date: \"2018-12-24\"}` — what \
|
||||
"Semantic search over the user's daily-summary corpus. Returns up to \
|
||||
`limit` summaries most semantically similar to `query`. Pass `date` \
|
||||
to anchor in time: summaries near that date rank higher and matches \
|
||||
months away decay sharply. Omit `date` to rank purely by semantic \
|
||||
similarity across all time — do this for \"when did X happen?\" \
|
||||
questions where the date is unknown. For raw message text, prefer \
|
||||
`search_messages`. \
|
||||
Examples: `{query: \"family dinner\"}` — best matches across all \
|
||||
time. `{query: \"family dinner\", date: \"2018-12-24\"}` — what \
|
||||
daily summaries near Christmas Eve mention family / dinner / gathering. \
|
||||
`{query: \"work travel\", date: \"2019-06-15\", contact: \"Alice\"}` — \
|
||||
narrowed to summaries that involve Alice.",
|
||||
biased toward summaries that involve Alice.",
|
||||
serde_json::json!({
|
||||
"type": "object",
|
||||
"required": ["query", "date"],
|
||||
"required": ["query"],
|
||||
"properties": {
|
||||
"query": { "type": "string", "description": "Free-text query, semantically matched." },
|
||||
"date": { "type": "string", "description": "Anchor date, YYYY-MM-DD. Summaries near this date rank higher." },
|
||||
"contact": { "type": "string", "description": "Optional contact name to bias toward conversations with that person." },
|
||||
"date": { "type": "string", "description": "Optional anchor date, YYYY-MM-DD. When set, summaries near this date rank higher; omit to search all time evenly." },
|
||||
"contact": { "type": "string", "description": "Optional contact name to bias toward conversations with that person (soft semantic bias, not a hard filter)." },
|
||||
"limit": { "type": "integer", "description": "Max summaries to return (default 10, max 25)." }
|
||||
}
|
||||
}),
|
||||
@@ -4763,12 +4839,22 @@ mod tests {
|
||||
let hit = make_search_hit(1, "Sarah", "see you at the lake tomorrow", None, 1);
|
||||
let out = InsightGenerator::format_search_hits(&[hit], "fts5", false);
|
||||
|
||||
assert!(out.starts_with("Found 1 messages (mode: fts5):"));
|
||||
assert!(out.starts_with("Found 1 messages (mode: fts5"));
|
||||
assert!(out.contains("see you at the lake tomorrow"));
|
||||
assert!(out.contains("Sarah —"));
|
||||
// Received message: contact is the sender.
|
||||
assert!(out.contains("Sarah →"));
|
||||
assert!(!out.contains("date-filtered"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_search_hits_labels_sent_direction() {
|
||||
// Sent messages must name the recipient — results can span multiple
|
||||
// conversations, and a sender-only label left them unattributable.
|
||||
let hit = make_search_hit(5, "Sarah", "on my way", None, 2);
|
||||
let out = InsightGenerator::format_search_hits(&[hit], "fts5", false);
|
||||
assert!(out.contains("→ Sarah —"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_search_hits_prefers_snippet_over_body_and_strips_marks() {
|
||||
let hit = make_search_hit(
|
||||
@@ -4799,7 +4885,7 @@ mod tests {
|
||||
|
||||
assert!(out.contains("birthday_cake.jpg"));
|
||||
assert!(!out.contains("<mark>"));
|
||||
assert!(out.contains("Mom —"));
|
||||
assert!(out.contains("Mom →"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user