Add contact name filter to SMS search tool + misc improvements

- sms search tool: accept contact name, trim/validate, skip when
  contact_id is set, pass to API client
- sms_client: new contact field in SmsSearchParams, URL-encode on wire
- Tool description clarifies contact_id takes precedence when both given
- Add parse_title_body helper for LLM response parsing
- llamacpp backend improvements
This commit is contained in:
Cameron Cordes
2026-05-25 21:46:18 -04:00
parent b9175e2718
commit 0a627f4880
4 changed files with 165 additions and 67 deletions
+109 -38
View File
@@ -28,6 +28,39 @@ use crate::otel::global_tracer;
use crate::tags::TagDao;
use crate::utils::{earliest_fs_time, normalize_path};
/// Parse a "Title: ...\n\n<body>" response into (title, body).
/// Falls back to the first sentence as the title if the model didn't
/// follow the format.
pub(crate) fn parse_title_body(raw: &str) -> (String, String) {
let trimmed = raw.trim();
// Try "Title: <title>\n\n<body>" or "Title: <title>\n<body>"
if let Some(rest) = trimmed.strip_prefix("Title:").or_else(|| trimmed.strip_prefix("title:")) {
let rest = rest.trim_start();
if let Some(split_pos) = rest.find("\n\n").or_else(|| rest.find('\n')) {
let title = rest[..split_pos].trim();
let body = rest[split_pos..].trim();
if !title.is_empty() && !body.is_empty() {
return (title.to_string(), body.to_string());
}
}
}
// Fallback: first sentence (up to first `. ` or `.\n`) becomes the title
if let Some(pos) = trimmed.find(". ").or_else(|| trimmed.find(".\n")) {
let title = &trimmed[..pos];
let body = trimmed[pos + 1..].trim();
if title.len() <= 100 && !body.is_empty() {
return (title.to_string(), body.to_string());
}
}
// Last resort: truncate to 60 chars for title, full text as body
let title: String = trimmed.chars().take(60).collect();
let title = title.trim_end().to_string();
(title, trimmed.to_string())
}
/// Combine an optional personal Apollo Place with an optional Nominatim
/// reverse-geocoded city, falling back to bare coordinates when neither
/// resolves. Free function so we can test it cheaply without spinning up
@@ -1927,6 +1960,11 @@ Return ONLY the summary, nothing else."#,
.unwrap_or(20)
.clamp(1, 50) as usize;
let contact_id = args.get("contact_id").and_then(|v| v.as_i64());
let contact = args.get("contact")
.and_then(|v| v.as_str())
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.filter(|_| contact_id.is_none());
let start_ts = args.get("start_ts").and_then(|v| v.as_i64());
let end_ts = args.get("end_ts").and_then(|v| v.as_i64());
let is_mms = args.get("is_mms").and_then(|v| v.as_bool());
@@ -1934,10 +1972,11 @@ Return ONLY the summary, nothing else."#,
let has_date_filter = start_ts.is_some() || end_ts.is_some();
log::info!(
"tool_search_messages: query='{}', mode={}, contact_id={:?}, range=[{:?}, {:?}], is_mms={:?}, has_media={:?}, limit={}",
"tool_search_messages: query='{}', mode={}, contact_id={:?}, contact={:?}, range=[{:?}, {:?}], is_mms={:?}, has_media={:?}, limit={}",
query,
mode,
contact_id,
contact,
start_ts,
end_ts,
is_mms,
@@ -1952,6 +1991,7 @@ Return ONLY the summary, nothing else."#,
mode: mode.as_str(),
limit: user_limit,
contact_id,
contact,
date_from: start_ts,
date_to: end_ts,
is_mms,
@@ -3033,28 +3073,31 @@ Return ONLY the summary, nothing else."#,
}
tools.push(Tool::function(
"search_messages",
"search_messages",
"Search SMS/MMS messages — bodies and (for MMS) attachment text + filenames. \
Modes: `fts5` (keyword + phrase + prefix + AND/OR/NOT + NEAR proximity), \
`semantic` (embedding similarity, requires generated embeddings), `hybrid` (RRF merge, recommended; \
degrades to fts5 when embeddings absent). Optional filters: `start_ts` / `end_ts` (real-UTC unix \
seconds), `contact_id`, `is_mms` (true = MMS only, false = SMS only), `has_media` (true = messages \
with image/video/audio attachments only). For pure date / contact browsing without keywords, prefer \
`get_sms_messages`. \
\n\nFTS5 query syntax (works in fts5 + hybrid modes):\n\
- Phrase: `\"trader joe's\"` — exact word sequence (use double quotes).\n\
- Prefix: `restaur*` — matches restaurant, restaurants, restauranteur, ….\n\
- Boolean: `dinner AND tahoe`, `wedding OR reception OR ceremony`, `vacation NOT work` (operators must be UPPERCASE).\n\
- Proximity: `NEAR(meeting work, 5)` — both terms within 5 tokens of each other.\n\
- Combine: `(reception OR ceremony) AND tahoe*` — group with parens.\n\
Unquoted multi-word queries are treated as implicit AND. Apostrophes / hyphens / colons are safe — they no longer downgrade to a slow LIKE scan. Use `mode: \"fts5\"` when you want the operators above to be authoritative; `hybrid` still respects them but may surface semantically-similar non-keyword hits alongside.\n\n\
Examples:\n\
- `{query: \"trader joe's\"}` — phrase across all time.\n\
- `{query: \"dinner\", contact_id: 42, start_ts: 1700000000, end_ts: 1700604800}` — keyword within a contact and a week.\n\
- `{query: \"vacation\", has_media: true}` — only matches that include photos / videos.\n\
- `{query: \"wedding OR reception OR ceremony\", mode: \"fts5\"}` — any of several synonyms.\n\
- `{query: \"restaur*\", mode: \"fts5\"}` — prefix expansion for varying word forms.\n\
- `{query: \"NEAR(birthday cake, 5)\", mode: \"fts5\"}` — terms close together but in any order.",
Modes: `fts5` (keyword + phrase + prefix + AND/OR/NOT + NEAR proximity), \
`semantic` (embedding similarity, requires generated embeddings), `hybrid` (RRF merge, recommended; \
degrades to fts5 when embeddings absent). Optional filters: `start_ts` / `end_ts` (real-UTC unix \
seconds), `contact` (contact name, case-insensitive), `contact_id` (numeric), `is_mms` \
(true = MMS only, false = SMS only), `has_media` (true = messages with image/video/audio \
attachments only). Prefer `contact` over `contact_id` — the name is resolved server-side. \
If both are provided, `contact_id` takes precedence. \
For pure date / contact browsing without keywords, prefer `get_sms_messages`. \
\n\nFTS5 query syntax (works in fts5 + hybrid modes):\n\
- Phrase: `\"trader joe's\"` — exact word sequence (use double quotes).\n\
- Prefix: `restaur*` — matches restaurant, restaurants, restauranteur, ….\n\
- Boolean: `dinner AND tahoe`, `wedding OR reception OR ceremony`, `vacation NOT work` (operators must be UPPERCASE).\n\
- Proximity: `NEAR(meeting work, 5)` — both terms within 5 tokens of each other.\n\
- Combine: `(reception OR ceremony) AND tahoe*` — group with parens.\n\
Unquoted multi-word queries are treated as implicit AND. Apostrophes / hyphens / colons are safe — they no longer downgrade to a slow LIKE scan. Use `mode: \"fts5\"` when you want the operators above to be authoritative; `hybrid` still respects them but may surface semantically-similar non-keyword hits alongside.\n\n\
Examples:\n\
- `{query: \"trader joe's\"}` — phrase across all time.\n\
- `{query: \"dinner\", contact: \"Mom\"}` — keyword scoped to Mom's messages.\n\
- `{query: \"dinner\", contact_id: 42, start_ts: 1700000000, end_ts: 1700604800}` — keyword within a contact and a week.\n\
- `{query: \"vacation\", has_media: true}` — only matches that include photos / videos.\n\
- `{query: \"wedding OR reception OR ceremony\", mode: \"fts5\"}` — any of several synonyms.\n\
- `{query: \"restaur*\", mode: \"fts5\"}` — prefix expansion for varying word forms.\n\
- `{query: \"NEAR(birthday cake, 5)\", mode: \"fts5\"}` — terms close together but in any order.",
serde_json::json!({
"type": "object",
"required": ["query"],
@@ -3063,6 +3106,7 @@ Return ONLY the summary, nothing else."#,
"mode": { "type": "string", "enum": ["fts5", "semantic", "hybrid"], "description": "Search strategy. Default: hybrid." },
"limit": { "type": "integer", "description": "Max results (default 20, max 50)." },
"contact_id": { "type": "integer", "description": "Optional numeric contact id to scope the search." },
"contact": { "type": "string", "description": "Optional contact name (case-insensitive). Resolved to contact_id server-side. Use this when you know the name but not the ID." },
"start_ts": { "type": "integer", "description": "Optional inclusive lower bound, real-UTC unix seconds." },
"end_ts": { "type": "integer", "description": "Optional inclusive upper bound, real-UTC unix seconds." },
"is_mms": { "type": "boolean", "description": "Optional: true to restrict to MMS, false to restrict to SMS." },
@@ -3534,7 +3578,8 @@ Return ONLY the summary, nothing else."#,
- When you identify people / places / events / things, use store_entity + store_fact to grow the persistent memory.\n\
- Before store_entity, call recall_entities to check whether a similar name already exists; reuse the existing entity_id rather than creating a near-duplicate (e.g. \"Sara\" vs \"Sarah J.\"). The DAO will collapse obvious cosine matches, but choosing the existing id keeps facts and photo links consolidated.\n\
- Predicates should be relationship-shaped verbs that encode a queryable claim — `lives_in`, `works_at`, `attended`, `is_friend_of`, `is_parent_of`, `interested_in`, `married_to`, `owns`. DO NOT use vague speech-act predicates like `expressed`, `said`, `mentioned`, `stated`, `quoted`, `noted`, `discussed`, `thought`, `wondered`. DO NOT store quotations or sentence fragments as `object_value` — paraphrase into a structured claim. Bad: `(Cameron, expressed, \"I'm tempted to get a part-time job there\")`. Good: `(Cameron, considered_employment_at, <Place>)` or `(Cameron, interested_in, \"part-time work\")`.\n\
- A tool returning no results is informative; continue with the others.",
- A tool returning no results is informative; continue with the others.\n\
- When writing your final answer, start with \"Title: <short title>\" (max 8 words) on the first line, then a blank line, then the body.",
);
let mut out = identity;
@@ -4059,7 +4104,7 @@ Return ONLY the summary, nothing else."#,
iterations_used
);
messages.push(ChatMessage::user(format!(
"Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as {}.",
"Based on the context gathered, please write the final photo insight. Start with \"Title: <short title>\" on the first line (max 8 words), then a blank line, then the detailed personal summary. Write in first person as {}.",
user_display_name()
)));
let (final_response, prompt_tokens, eval_tokens) = backend
@@ -4077,21 +4122,11 @@ Return ONLY the summary, nothing else."#,
.set_attribute(KeyValue::new("iterations_used", iterations_used as i64));
loop_cx.span().set_status(Status::Ok);
// 13. Generate title via the same backend so voice stays consistent.
let title_prompt = format!(
"Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\nCapture the key moment or theme. Return ONLY the title, nothing else.",
final_content
);
let title_system = custom_system_prompt.as_deref().unwrap_or(
"You are my long term memory assistant. Use only the information provided. Do not invent details.",
);
let title_raw = backend
.chat()
.generate(&title_prompt, Some(title_system), None)
.await?;
let title = title_raw.trim().trim_matches('"').to_string();
// 13. Parse title from the model's inline response.
let (title, body) = parse_title_body(&final_content);
final_content = body;
log::info!("Agentic generated title: {}", title);
log::info!("Agentic parsed title: {}", title);
let summary_preview: String = final_content.chars().take(200).collect();
log::info!(
"Agentic generated summary ({} chars): {}",
@@ -4742,4 +4777,40 @@ mod tests {
assert!(out.contains("-> empty (pivoted)"));
assert!(out.contains("Final insight: Final title"));
}
#[test]
fn parse_title_body_standard_format() {
let (t, b) = parse_title_body("Title: Summer at the Lake\n\nWe spent the afternoon...");
assert_eq!(t, "Summer at the Lake");
assert_eq!(b, "We spent the afternoon...");
}
#[test]
fn parse_title_body_single_newline() {
let (t, b) = parse_title_body("Title: Morning Walk\nThe sun was rising...");
assert_eq!(t, "Morning Walk");
assert_eq!(b, "The sun was rising...");
}
#[test]
fn parse_title_body_lowercase_prefix() {
let (t, b) = parse_title_body("title: Garden Party\n\nEveryone gathered...");
assert_eq!(t, "Garden Party");
assert_eq!(b, "Everyone gathered...");
}
#[test]
fn parse_title_body_fallback_first_sentence() {
let (t, b) = parse_title_body("A warm summer day. We gathered at the park for a picnic.");
assert_eq!(t, "A warm summer day");
assert_eq!(b, "We gathered at the park for a picnic.");
}
#[test]
fn parse_title_body_fallback_truncate() {
let input = "A single long paragraph with no periods or title prefix that just keeps going on and on";
let (t, b) = parse_title_body(input);
assert!(t.len() <= 60);
assert_eq!(b, input);
}
}