From 0a627f48806732629e6cf4535f4dd904cd755eac Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Mon, 25 May 2026 21:46:18 -0400 Subject: [PATCH] Add contact name filter to SMS search tool + misc improvements - sms search tool: accept contact name, trim/validate, skip when contact_id is set, pass to API client - sms_client: new contact field in SmsSearchParams, URL-encode on wire - Tool description clarifies contact_id takes precedence when both given - Add parse_title_body helper for LLM response parsing - llamacpp backend improvements --- src/ai/insight_chat.rs | 33 +------- src/ai/insight_generator.rs | 147 ++++++++++++++++++++++++++---------- src/ai/llamacpp.rs | 46 +++++++++++ src/ai/sms_client.rs | 6 ++ 4 files changed, 165 insertions(+), 67 deletions(-) diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs index 6d52f8b..86671af 100644 --- a/src/ai/insight_chat.rs +++ b/src/ai/insight_chat.rs @@ -817,7 +817,8 @@ impl InsightChatService { let mut amended_insight_id: Option = None; if req.amend { - let title = self.generate_title(&backend, &final_content).await?; + let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content); + let final_content = body; // Amended rows intentionally do not inherit the parent's // `fewshot_source_ids`. The parent's few-shot influence is still @@ -1001,7 +1002,7 @@ impl InsightChatService { final_content, } = outcome; - let title = self.generate_title(&backend, &final_content).await?; + let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content); let json = serde_json::to_string(&messages) .map_err(|e| anyhow!("failed to serialize chat history: {}", e))?; @@ -1009,7 +1010,7 @@ impl InsightChatService { library_id: req.library_id, file_path: normalized.clone(), title, - summary: final_content, + summary: body, generated_at: Utc::now().timestamp(), model_version: model_used.clone(), is_current: true, @@ -1045,32 +1046,6 @@ impl InsightChatService { Ok(()) } - /// Generate a short title via the same chat backend so voice stays - /// consistent with the body. Mirrors generate_agentic_insight_for_photo's - /// titling pass. - async fn generate_title( - &self, - backend: &ResolvedBackend, - final_content: &str, - ) -> Result { - let title_prompt = format!( - "Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\n\ - Capture the key moment or theme. Return ONLY the title, nothing else.", - final_content - ); - let title_raw = backend - .chat() - .generate( - &title_prompt, - Some( - "You are my long term memory assistant. Use only the information provided. Do not invent details.", - ), - None, - ) - .await?; - Ok(title_raw.trim().trim_matches('"').to_string()) - } - /// Drive the agentic loop with streaming SSE events. Shared between /// bootstrap and continuation. Mutates `messages` in place (response /// turns + tool results are appended) and returns counters + the diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 80a7627..b0cd54c 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -28,6 +28,39 @@ use crate::otel::global_tracer; use crate::tags::TagDao; use crate::utils::{earliest_fs_time, normalize_path}; +/// Parse a "Title: ...\n\n" response into (title, body). +/// Falls back to the first sentence as the title if the model didn't +/// follow the format. +pub(crate) fn parse_title_body(raw: &str) -> (String, String) { + let trimmed = raw.trim(); + + // Try "Title: \n\n<body>" or "Title: <title>\n<body>" + if let Some(rest) = trimmed.strip_prefix("Title:").or_else(|| trimmed.strip_prefix("title:")) { + let rest = rest.trim_start(); + if let Some(split_pos) = rest.find("\n\n").or_else(|| rest.find('\n')) { + let title = rest[..split_pos].trim(); + let body = rest[split_pos..].trim(); + if !title.is_empty() && !body.is_empty() { + return (title.to_string(), body.to_string()); + } + } + } + + // Fallback: first sentence (up to first `. ` or `.\n`) becomes the title + if let Some(pos) = trimmed.find(". ").or_else(|| trimmed.find(".\n")) { + let title = &trimmed[..pos]; + let body = trimmed[pos + 1..].trim(); + if title.len() <= 100 && !body.is_empty() { + return (title.to_string(), body.to_string()); + } + } + + // Last resort: truncate to 60 chars for title, full text as body + let title: String = trimmed.chars().take(60).collect(); + let title = title.trim_end().to_string(); + (title, trimmed.to_string()) +} + /// Combine an optional personal Apollo Place with an optional Nominatim /// reverse-geocoded city, falling back to bare coordinates when neither /// resolves. Free function so we can test it cheaply without spinning up @@ -1927,6 +1960,11 @@ Return ONLY the summary, nothing else."#, .unwrap_or(20) .clamp(1, 50) as usize; let contact_id = args.get("contact_id").and_then(|v| v.as_i64()); + let contact = args.get("contact") + .and_then(|v| v.as_str()) + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .filter(|_| contact_id.is_none()); let start_ts = args.get("start_ts").and_then(|v| v.as_i64()); let end_ts = args.get("end_ts").and_then(|v| v.as_i64()); let is_mms = args.get("is_mms").and_then(|v| v.as_bool()); @@ -1934,10 +1972,11 @@ Return ONLY the summary, nothing else."#, let has_date_filter = start_ts.is_some() || end_ts.is_some(); log::info!( - "tool_search_messages: query='{}', mode={}, contact_id={:?}, range=[{:?}, {:?}], is_mms={:?}, has_media={:?}, limit={}", + "tool_search_messages: query='{}', mode={}, contact_id={:?}, contact={:?}, range=[{:?}, {:?}], is_mms={:?}, has_media={:?}, limit={}", query, mode, contact_id, + contact, start_ts, end_ts, is_mms, @@ -1952,6 +1991,7 @@ Return ONLY the summary, nothing else."#, mode: mode.as_str(), limit: user_limit, contact_id, + contact, date_from: start_ts, date_to: end_ts, is_mms, @@ -3033,28 +3073,31 @@ Return ONLY the summary, nothing else."#, } tools.push(Tool::function( - "search_messages", +"search_messages", "Search SMS/MMS messages — bodies and (for MMS) attachment text + filenames. \ - Modes: `fts5` (keyword + phrase + prefix + AND/OR/NOT + NEAR proximity), \ - `semantic` (embedding similarity, requires generated embeddings), `hybrid` (RRF merge, recommended; \ - degrades to fts5 when embeddings absent). Optional filters: `start_ts` / `end_ts` (real-UTC unix \ - seconds), `contact_id`, `is_mms` (true = MMS only, false = SMS only), `has_media` (true = messages \ - with image/video/audio attachments only). For pure date / contact browsing without keywords, prefer \ - `get_sms_messages`. \ - \n\nFTS5 query syntax (works in fts5 + hybrid modes):\n\ - - Phrase: `\"trader joe's\"` — exact word sequence (use double quotes).\n\ - - Prefix: `restaur*` — matches restaurant, restaurants, restauranteur, ….\n\ - - Boolean: `dinner AND tahoe`, `wedding OR reception OR ceremony`, `vacation NOT work` (operators must be UPPERCASE).\n\ - - Proximity: `NEAR(meeting work, 5)` — both terms within 5 tokens of each other.\n\ - - Combine: `(reception OR ceremony) AND tahoe*` — group with parens.\n\ - Unquoted multi-word queries are treated as implicit AND. Apostrophes / hyphens / colons are safe — they no longer downgrade to a slow LIKE scan. Use `mode: \"fts5\"` when you want the operators above to be authoritative; `hybrid` still respects them but may surface semantically-similar non-keyword hits alongside.\n\n\ - Examples:\n\ - - `{query: \"trader joe's\"}` — phrase across all time.\n\ - - `{query: \"dinner\", contact_id: 42, start_ts: 1700000000, end_ts: 1700604800}` — keyword within a contact and a week.\n\ - - `{query: \"vacation\", has_media: true}` — only matches that include photos / videos.\n\ - - `{query: \"wedding OR reception OR ceremony\", mode: \"fts5\"}` — any of several synonyms.\n\ - - `{query: \"restaur*\", mode: \"fts5\"}` — prefix expansion for varying word forms.\n\ - - `{query: \"NEAR(birthday cake, 5)\", mode: \"fts5\"}` — terms close together but in any order.", + Modes: `fts5` (keyword + phrase + prefix + AND/OR/NOT + NEAR proximity), \ + `semantic` (embedding similarity, requires generated embeddings), `hybrid` (RRF merge, recommended; \ + degrades to fts5 when embeddings absent). Optional filters: `start_ts` / `end_ts` (real-UTC unix \ + seconds), `contact` (contact name, case-insensitive), `contact_id` (numeric), `is_mms` \ + (true = MMS only, false = SMS only), `has_media` (true = messages with image/video/audio \ + attachments only). Prefer `contact` over `contact_id` — the name is resolved server-side. \ + If both are provided, `contact_id` takes precedence. \ + For pure date / contact browsing without keywords, prefer `get_sms_messages`. \ + \n\nFTS5 query syntax (works in fts5 + hybrid modes):\n\ + - Phrase: `\"trader joe's\"` — exact word sequence (use double quotes).\n\ + - Prefix: `restaur*` — matches restaurant, restaurants, restauranteur, ….\n\ + - Boolean: `dinner AND tahoe`, `wedding OR reception OR ceremony`, `vacation NOT work` (operators must be UPPERCASE).\n\ + - Proximity: `NEAR(meeting work, 5)` — both terms within 5 tokens of each other.\n\ + - Combine: `(reception OR ceremony) AND tahoe*` — group with parens.\n\ + Unquoted multi-word queries are treated as implicit AND. Apostrophes / hyphens / colons are safe — they no longer downgrade to a slow LIKE scan. Use `mode: \"fts5\"` when you want the operators above to be authoritative; `hybrid` still respects them but may surface semantically-similar non-keyword hits alongside.\n\n\ + Examples:\n\ + - `{query: \"trader joe's\"}` — phrase across all time.\n\ + - `{query: \"dinner\", contact: \"Mom\"}` — keyword scoped to Mom's messages.\n\ + - `{query: \"dinner\", contact_id: 42, start_ts: 1700000000, end_ts: 1700604800}` — keyword within a contact and a week.\n\ + - `{query: \"vacation\", has_media: true}` — only matches that include photos / videos.\n\ + - `{query: \"wedding OR reception OR ceremony\", mode: \"fts5\"}` — any of several synonyms.\n\ + - `{query: \"restaur*\", mode: \"fts5\"}` — prefix expansion for varying word forms.\n\ + - `{query: \"NEAR(birthday cake, 5)\", mode: \"fts5\"}` — terms close together but in any order.", serde_json::json!({ "type": "object", "required": ["query"], @@ -3063,6 +3106,7 @@ Return ONLY the summary, nothing else."#, "mode": { "type": "string", "enum": ["fts5", "semantic", "hybrid"], "description": "Search strategy. Default: hybrid." }, "limit": { "type": "integer", "description": "Max results (default 20, max 50)." }, "contact_id": { "type": "integer", "description": "Optional numeric contact id to scope the search." }, + "contact": { "type": "string", "description": "Optional contact name (case-insensitive). Resolved to contact_id server-side. Use this when you know the name but not the ID." }, "start_ts": { "type": "integer", "description": "Optional inclusive lower bound, real-UTC unix seconds." }, "end_ts": { "type": "integer", "description": "Optional inclusive upper bound, real-UTC unix seconds." }, "is_mms": { "type": "boolean", "description": "Optional: true to restrict to MMS, false to restrict to SMS." }, @@ -3534,7 +3578,8 @@ Return ONLY the summary, nothing else."#, - When you identify people / places / events / things, use store_entity + store_fact to grow the persistent memory.\n\ - Before store_entity, call recall_entities to check whether a similar name already exists; reuse the existing entity_id rather than creating a near-duplicate (e.g. \"Sara\" vs \"Sarah J.\"). The DAO will collapse obvious cosine matches, but choosing the existing id keeps facts and photo links consolidated.\n\ - Predicates should be relationship-shaped verbs that encode a queryable claim — `lives_in`, `works_at`, `attended`, `is_friend_of`, `is_parent_of`, `interested_in`, `married_to`, `owns`. DO NOT use vague speech-act predicates like `expressed`, `said`, `mentioned`, `stated`, `quoted`, `noted`, `discussed`, `thought`, `wondered`. DO NOT store quotations or sentence fragments as `object_value` — paraphrase into a structured claim. Bad: `(Cameron, expressed, \"I'm tempted to get a part-time job there\")`. Good: `(Cameron, considered_employment_at, <Place>)` or `(Cameron, interested_in, \"part-time work\")`.\n\ - - A tool returning no results is informative; continue with the others.", + - A tool returning no results is informative; continue with the others.\n\ + - When writing your final answer, start with \"Title: <short title>\" (max 8 words) on the first line, then a blank line, then the body.", ); let mut out = identity; @@ -4059,7 +4104,7 @@ Return ONLY the summary, nothing else."#, iterations_used ); messages.push(ChatMessage::user(format!( - "Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as {}.", + "Based on the context gathered, please write the final photo insight. Start with \"Title: <short title>\" on the first line (max 8 words), then a blank line, then the detailed personal summary. Write in first person as {}.", user_display_name() ))); let (final_response, prompt_tokens, eval_tokens) = backend @@ -4077,21 +4122,11 @@ Return ONLY the summary, nothing else."#, .set_attribute(KeyValue::new("iterations_used", iterations_used as i64)); loop_cx.span().set_status(Status::Ok); - // 13. Generate title via the same backend so voice stays consistent. - let title_prompt = format!( - "Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\nCapture the key moment or theme. Return ONLY the title, nothing else.", - final_content - ); - let title_system = custom_system_prompt.as_deref().unwrap_or( - "You are my long term memory assistant. Use only the information provided. Do not invent details.", - ); - let title_raw = backend - .chat() - .generate(&title_prompt, Some(title_system), None) - .await?; - let title = title_raw.trim().trim_matches('"').to_string(); + // 13. Parse title from the model's inline response. + let (title, body) = parse_title_body(&final_content); + final_content = body; - log::info!("Agentic generated title: {}", title); + log::info!("Agentic parsed title: {}", title); let summary_preview: String = final_content.chars().take(200).collect(); log::info!( "Agentic generated summary ({} chars): {}", @@ -4742,4 +4777,40 @@ mod tests { assert!(out.contains("-> empty (pivoted)")); assert!(out.contains("Final insight: Final title")); } + + #[test] + fn parse_title_body_standard_format() { + let (t, b) = parse_title_body("Title: Summer at the Lake\n\nWe spent the afternoon..."); + assert_eq!(t, "Summer at the Lake"); + assert_eq!(b, "We spent the afternoon..."); + } + + #[test] + fn parse_title_body_single_newline() { + let (t, b) = parse_title_body("Title: Morning Walk\nThe sun was rising..."); + assert_eq!(t, "Morning Walk"); + assert_eq!(b, "The sun was rising..."); + } + + #[test] + fn parse_title_body_lowercase_prefix() { + let (t, b) = parse_title_body("title: Garden Party\n\nEveryone gathered..."); + assert_eq!(t, "Garden Party"); + assert_eq!(b, "Everyone gathered..."); + } + + #[test] + fn parse_title_body_fallback_first_sentence() { + let (t, b) = parse_title_body("A warm summer day. We gathered at the park for a picnic."); + assert_eq!(t, "A warm summer day"); + assert_eq!(b, "We gathered at the park for a picnic."); + } + + #[test] + fn parse_title_body_fallback_truncate() { + let input = "A single long paragraph with no periods or title prefix that just keeps going on and on"; + let (t, b) = parse_title_body(input); + assert!(t.len() <= 60); + assert_eq!(b, input); + } } diff --git a/src/ai/llamacpp.rs b/src/ai/llamacpp.rs index 77cd05f..8ea1063 100644 --- a/src/ai/llamacpp.rs +++ b/src/ai/llamacpp.rs @@ -354,6 +354,8 @@ impl LlamaCppClient { .and_then(|v| v.as_i64()) .map(|n| n as i32); + log_timings(&parsed, prompt_tokens, completion_tokens); + Ok((chat_msg, prompt_tokens, completion_tokens)) } } @@ -456,6 +458,7 @@ impl LlmClient for LlamaCppClient { let mut role = "assistant".to_string(); let mut prompt_tokens: Option<i32> = None; let mut completion_tokens: Option<i32> = None; + let mut last_frame: Option<Value> = None; let mut done_seen = false; while let Some(chunk) = byte_stream.next().await { @@ -505,6 +508,7 @@ impl LlmClient for LlamaCppClient { .get("completion_tokens") .and_then(|n| n.as_i64()) .map(|n| n as i32); + last_frame = Some(v.clone()); } let Some(choices) = v.get("choices").and_then(|c| c.as_array()) @@ -587,6 +591,10 @@ impl LlmClient for LlamaCppClient { Some(v) }; + if let Some(ref frame) = last_frame { + log_timings(frame, prompt_tokens, completion_tokens); + } + let message = ChatMessage { role, content: accumulated_content, @@ -720,6 +728,44 @@ impl LlamaCppClient { /// Extract a diagnostic fragment from a llama-swap / llama-server response /// that doesn't match the expected `{choices: [...]}` shape. llama-server /// returns errors as `{"error": {"message": "...", "code": N, "type": "..."}}`; +fn log_timings(parsed: &Value, prompt_tokens: Option<i32>, completion_tokens: Option<i32>) { + let timings = match parsed.get("timings") { + Some(t) => t, + None => return, + }; + let prompt_tps = timings.get("prompt_per_second").and_then(|v| v.as_f64()); + let gen_tps = timings.get("predicted_per_second").and_then(|v| v.as_f64()); + let prompt_ms = timings.get("prompt_ms").and_then(|v| v.as_f64()); + let gen_ms = timings.get("predicted_ms").and_then(|v| v.as_f64()); + + let mut parts: Vec<String> = Vec::new(); + if let Some(c) = prompt_tokens { + let mut s = format!("prompt={} tok", c); + if let Some(ms) = prompt_ms { + s.push_str(&format!(" ({:.0} ms", ms)); + if let Some(tps) = prompt_tps { + s.push_str(&format!(", {:.1} tok/s", tps)); + } + s.push(')'); + } + parts.push(s); + } + if let Some(c) = completion_tokens { + let mut s = format!("gen={} tok", c); + if let Some(ms) = gen_ms { + s.push_str(&format!(" ({:.0} ms", ms)); + if let Some(tps) = gen_tps { + s.push_str(&format!(", {:.1} tok/s", tps)); + } + s.push(')'); + } + parts.push(s); + } + if !parts.is_empty() { + log::info!("llama-swap chat metrics — {}", parts.join(", ")); + } +} + /// llama-swap itself sometimes wraps subprocess failures with its own /// `{"error": "..."}` flat shape. Surface either when present, otherwise fall /// back to a truncated raw-JSON view. diff --git a/src/ai/sms_client.rs b/src/ai/sms_client.rs index 6661bac..d5e175f 100644 --- a/src/ai/sms_client.rs +++ b/src/ai/sms_client.rs @@ -281,6 +281,9 @@ impl SmsApiClient { if let Some(cid) = params.contact_id { url.push_str(&format!("&contact_id={}", cid)); } + if let Some(ref c) = params.contact { + url.push_str(&format!("&contact={}", urlencoding::encode(c))); + } if let Some(off) = params.offset { url.push_str(&format!("&offset={}", off)); } @@ -413,6 +416,9 @@ pub struct SmsSearchParams<'a> { pub mode: &'a str, pub limit: usize, pub contact_id: Option<i64>, + /// Contact name (case-insensitive). Resolved to a numeric ID by the + /// SMS-API server when `contact_id` is not set. + pub contact: Option<String>, /// Unix-seconds inclusive lower bound on `date`. pub date_from: Option<i64>, /// Unix-seconds inclusive upper bound on `date`.