From 0a627f48806732629e6cf4535f4dd904cd755eac Mon Sep 17 00:00:00 2001
From: Cameron Cordes <cameronc.dev@gmail.com>
Date: Mon, 25 May 2026 21:46:18 -0400
Subject: [PATCH] Add contact name filter to SMS search tool + misc
 improvements

- sms search tool: accept contact name, trim/validate, skip when
  contact_id is set, pass to API client
- sms_client: new contact field in SmsSearchParams, URL-encode on wire
- Tool description clarifies contact_id takes precedence when both given
- Add parse_title_body helper for LLM response parsing
- llamacpp backend improvements
---
 src/ai/insight_chat.rs      |  33 +-------
 src/ai/insight_generator.rs | 147 ++++++++++++++++++++++++++----------
 src/ai/llamacpp.rs          |  46 +++++++++++
 src/ai/sms_client.rs        |   6 ++
 4 files changed, 165 insertions(+), 67 deletions(-)
diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs
index 6d52f8b..86671af 100644
--- a/src/ai/insight_chat.rs
+++ b/src/ai/insight_chat.rs
@@ -817,7 +817,8 @@ impl InsightChatService {
 
         let mut amended_insight_id: Option<i32> = None;
         if req.amend {
-            let title = self.generate_title(&backend, &final_content).await?;
+            let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
+            let final_content = body;
 
             // Amended rows intentionally do not inherit the parent's
             // `fewshot_source_ids`. The parent's few-shot influence is still
@@ -1001,7 +1002,7 @@ impl InsightChatService {
             final_content,
         } = outcome;
 
-        let title = self.generate_title(&backend, &final_content).await?;
+        let (title, body) = crate::ai::insight_generator::parse_title_body(&final_content);
 
         let json = serde_json::to_string(&messages)
             .map_err(|e| anyhow!("failed to serialize chat history: {}", e))?;
@@ -1009,7 +1010,7 @@ impl InsightChatService {
             library_id: req.library_id,
             file_path: normalized.clone(),
             title,
-            summary: final_content,
+            summary: body,
             generated_at: Utc::now().timestamp(),
             model_version: model_used.clone(),
             is_current: true,
@@ -1045,32 +1046,6 @@ impl InsightChatService {
         Ok(())
     }
 
-    /// Generate a short title via the same chat backend so voice stays
-    /// consistent with the body. Mirrors generate_agentic_insight_for_photo's
-    /// titling pass.
-    async fn generate_title(
-        &self,
-        backend: &ResolvedBackend,
-        final_content: &str,
-    ) -> Result<String> {
-        let title_prompt = format!(
-            "Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\n\
-             Capture the key moment or theme. Return ONLY the title, nothing else.",
-            final_content
-        );
-        let title_raw = backend
-            .chat()
-            .generate(
-                &title_prompt,
-                Some(
-                    "You are my long term memory assistant. Use only the information provided. Do not invent details.",
-                ),
-                None,
-            )
-            .await?;
-        Ok(title_raw.trim().trim_matches('"').to_string())
-    }
-
     /// Drive the agentic loop with streaming SSE events. Shared between
     /// bootstrap and continuation. Mutates `messages` in place (response
     /// turns + tool results are appended) and returns counters + the
diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs
index 80a7627..b0cd54c 100644
--- a/src/ai/insight_generator.rs
+++ b/src/ai/insight_generator.rs
@@ -28,6 +28,39 @@ use crate::otel::global_tracer;
 use crate::tags::TagDao;
 use crate::utils::{earliest_fs_time, normalize_path};
 
+/// Parse a "Title: ...\n\n<body>" response into (title, body).
+/// Falls back to the first sentence as the title if the model didn't
+/// follow the format.
+pub(crate) fn parse_title_body(raw: &str) -> (String, String) {
+    let trimmed = raw.trim();
+
+    // Try "Title: <title>\n\n<body>" or "Title: <title>\n<body>"
+    if let Some(rest) = trimmed.strip_prefix("Title:").or_else(|| trimmed.strip_prefix("title:")) {
+        let rest = rest.trim_start();
+        if let Some(split_pos) = rest.find("\n\n").or_else(|| rest.find('\n')) {
+            let title = rest[..split_pos].trim();
+            let body = rest[split_pos..].trim();
+            if !title.is_empty() && !body.is_empty() {
+                return (title.to_string(), body.to_string());
+            }
+        }
+    }
+
+    // Fallback: first sentence (up to first `. ` or `.\n`) becomes the title
+    if let Some(pos) = trimmed.find(". ").or_else(|| trimmed.find(".\n")) {
+        let title = &trimmed[..pos];
+        let body = trimmed[pos + 1..].trim();
+        if title.len() <= 100 && !body.is_empty() {
+            return (title.to_string(), body.to_string());
+        }
+    }
+
+    // Last resort: truncate to 60 chars for title, full text as body
+    let title: String = trimmed.chars().take(60).collect();
+    let title = title.trim_end().to_string();
+    (title, trimmed.to_string())
+}
+
 /// Combine an optional personal Apollo Place with an optional Nominatim
 /// reverse-geocoded city, falling back to bare coordinates when neither
 /// resolves. Free function so we can test it cheaply without spinning up
@@ -1927,6 +1960,11 @@ Return ONLY the summary, nothing else."#,
             .unwrap_or(20)
             .clamp(1, 50) as usize;
         let contact_id = args.get("contact_id").and_then(|v| v.as_i64());
+        let contact = args.get("contact")
+            .and_then(|v| v.as_str())
+            .map(|s| s.trim().to_string())
+            .filter(|s| !s.is_empty())
+            .filter(|_| contact_id.is_none());
         let start_ts = args.get("start_ts").and_then(|v| v.as_i64());
         let end_ts = args.get("end_ts").and_then(|v| v.as_i64());
         let is_mms = args.get("is_mms").and_then(|v| v.as_bool());
@@ -1934,10 +1972,11 @@ Return ONLY the summary, nothing else."#,
         let has_date_filter = start_ts.is_some() || end_ts.is_some();
 
         log::info!(
-            "tool_search_messages: query='{}', mode={}, contact_id={:?}, range=[{:?}, {:?}], is_mms={:?}, has_media={:?}, limit={}",
+            "tool_search_messages: query='{}', mode={}, contact_id={:?}, contact={:?}, range=[{:?}, {:?}], is_mms={:?}, has_media={:?}, limit={}",
             query,
             mode,
             contact_id,
+            contact,
             start_ts,
             end_ts,
             is_mms,
@@ -1952,6 +1991,7 @@ Return ONLY the summary, nothing else."#,
             mode: mode.as_str(),
             limit: user_limit,
             contact_id,
+            contact,
             date_from: start_ts,
             date_to: end_ts,
             is_mms,
@@ -3033,28 +3073,31 @@ Return ONLY the summary, nothing else."#,
         }
 
         tools.push(Tool::function(
-            "search_messages",
+"search_messages",
             "Search SMS/MMS messages — bodies and (for MMS) attachment text + filenames. \
-             Modes: `fts5` (keyword + phrase + prefix + AND/OR/NOT + NEAR proximity), \
-             `semantic` (embedding similarity, requires generated embeddings), `hybrid` (RRF merge, recommended; \
-             degrades to fts5 when embeddings absent). Optional filters: `start_ts` / `end_ts` (real-UTC unix \
-             seconds), `contact_id`, `is_mms` (true = MMS only, false = SMS only), `has_media` (true = messages \
-             with image/video/audio attachments only). For pure date / contact browsing without keywords, prefer \
-             `get_sms_messages`. \
-             \n\nFTS5 query syntax (works in fts5 + hybrid modes):\n\
-             - Phrase:   `\"trader joe's\"` — exact word sequence (use double quotes).\n\
-             - Prefix:   `restaur*` — matches restaurant, restaurants, restauranteur, ….\n\
-             - Boolean:  `dinner AND tahoe`, `wedding OR reception OR ceremony`, `vacation NOT work` (operators must be UPPERCASE).\n\
-             - Proximity: `NEAR(meeting work, 5)` — both terms within 5 tokens of each other.\n\
-             - Combine:  `(reception OR ceremony) AND tahoe*` — group with parens.\n\
-             Unquoted multi-word queries are treated as implicit AND. Apostrophes / hyphens / colons are safe — they no longer downgrade to a slow LIKE scan. Use `mode: \"fts5\"` when you want the operators above to be authoritative; `hybrid` still respects them but may surface semantically-similar non-keyword hits alongside.\n\n\
-             Examples:\n\
-             - `{query: \"trader joe's\"}` — phrase across all time.\n\
-             - `{query: \"dinner\", contact_id: 42, start_ts: 1700000000, end_ts: 1700604800}` — keyword within a contact and a week.\n\
-             - `{query: \"vacation\", has_media: true}` — only matches that include photos / videos.\n\
-             - `{query: \"wedding OR reception OR ceremony\", mode: \"fts5\"}` — any of several synonyms.\n\
-             - `{query: \"restaur*\", mode: \"fts5\"}` — prefix expansion for varying word forms.\n\
-             - `{query: \"NEAR(birthday cake, 5)\", mode: \"fts5\"}` — terms close together but in any order.",
+              Modes: `fts5` (keyword + phrase + prefix + AND/OR/NOT + NEAR proximity), \
+              `semantic` (embedding similarity, requires generated embeddings), `hybrid` (RRF merge, recommended; \
+              degrades to fts5 when embeddings absent). Optional filters: `start_ts` / `end_ts` (real-UTC unix \
+              seconds), `contact` (contact name, case-insensitive), `contact_id` (numeric), `is_mms` \
+              (true = MMS only, false = SMS only), `has_media` (true = messages with image/video/audio \
+              attachments only). Prefer `contact` over `contact_id` — the name is resolved server-side. \
+              If both are provided, `contact_id` takes precedence. \
+              For pure date / contact browsing without keywords, prefer `get_sms_messages`. \
+              \n\nFTS5 query syntax (works in fts5 + hybrid modes):\n\
+              - Phrase:   `\"trader joe's\"` — exact word sequence (use double quotes).\n\
+              - Prefix:   `restaur*` — matches restaurant, restaurants, restauranteur, ….\n\
+              - Boolean:  `dinner AND tahoe`, `wedding OR reception OR ceremony`, `vacation NOT work` (operators must be UPPERCASE).\n\
+              - Proximity: `NEAR(meeting work, 5)` — both terms within 5 tokens of each other.\n\
+              - Combine:  `(reception OR ceremony) AND tahoe*` — group with parens.\n\
+              Unquoted multi-word queries are treated as implicit AND. Apostrophes / hyphens / colons are safe — they no longer downgrade to a slow LIKE scan. Use `mode: \"fts5\"` when you want the operators above to be authoritative; `hybrid` still respects them but may surface semantically-similar non-keyword hits alongside.\n\n\
+              Examples:\n\
+              - `{query: \"trader joe's\"}` — phrase across all time.\n\
+              - `{query: \"dinner\", contact: \"Mom\"}` — keyword scoped to Mom's messages.\n\
+              - `{query: \"dinner\", contact_id: 42, start_ts: 1700000000, end_ts: 1700604800}` — keyword within a contact and a week.\n\
+              - `{query: \"vacation\", has_media: true}` — only matches that include photos / videos.\n\
+              - `{query: \"wedding OR reception OR ceremony\", mode: \"fts5\"}` — any of several synonyms.\n\
+              - `{query: \"restaur*\", mode: \"fts5\"}` — prefix expansion for varying word forms.\n\
+              - `{query: \"NEAR(birthday cake, 5)\", mode: \"fts5\"}` — terms close together but in any order.",
             serde_json::json!({
                 "type": "object",
                 "required": ["query"],
@@ -3063,6 +3106,7 @@ Return ONLY the summary, nothing else."#,
                     "mode":  { "type": "string", "enum": ["fts5", "semantic", "hybrid"], "description": "Search strategy. Default: hybrid." },
                     "limit": { "type": "integer", "description": "Max results (default 20, max 50)." },
                     "contact_id": { "type": "integer", "description": "Optional numeric contact id to scope the search." },
+                    "contact":    { "type": "string", "description": "Optional contact name (case-insensitive). Resolved to contact_id server-side. Use this when you know the name but not the ID." },
                     "start_ts": { "type": "integer", "description": "Optional inclusive lower bound, real-UTC unix seconds." },
                     "end_ts":   { "type": "integer", "description": "Optional inclusive upper bound, real-UTC unix seconds." },
                     "is_mms":   { "type": "boolean", "description": "Optional: true to restrict to MMS, false to restrict to SMS." },
@@ -3534,7 +3578,8 @@ Return ONLY the summary, nothing else."#,
              - When you identify people / places / events / things, use store_entity + store_fact to grow the persistent memory.\n\
              - Before store_entity, call recall_entities to check whether a similar name already exists; reuse the existing entity_id rather than creating a near-duplicate (e.g. \"Sara\" vs \"Sarah J.\"). The DAO will collapse obvious cosine matches, but choosing the existing id keeps facts and photo links consolidated.\n\
              - Predicates should be relationship-shaped verbs that encode a queryable claim — `lives_in`, `works_at`, `attended`, `is_friend_of`, `is_parent_of`, `interested_in`, `married_to`, `owns`. DO NOT use vague speech-act predicates like `expressed`, `said`, `mentioned`, `stated`, `quoted`, `noted`, `discussed`, `thought`, `wondered`. DO NOT store quotations or sentence fragments as `object_value` — paraphrase into a structured claim. Bad: `(Cameron, expressed, \"I'm tempted to get a part-time job there\")`. Good: `(Cameron, considered_employment_at, <Place>)` or `(Cameron, interested_in, \"part-time work\")`.\n\
-             - A tool returning no results is informative; continue with the others.",
+             - A tool returning no results is informative; continue with the others.\n\
+             - When writing your final answer, start with \"Title: <short title>\" (max 8 words) on the first line, then a blank line, then the body.",
         );
 
         let mut out = identity;
@@ -4059,7 +4104,7 @@ Return ONLY the summary, nothing else."#,
                 iterations_used
             );
             messages.push(ChatMessage::user(format!(
-                "Based on the context gathered, please write the final photo insight: a title and a detailed personal summary. Write in first person as {}.",
+                "Based on the context gathered, please write the final photo insight. Start with \"Title: <short title>\" on the first line (max 8 words), then a blank line, then the detailed personal summary. Write in first person as {}.",
                 user_display_name()
             )));
             let (final_response, prompt_tokens, eval_tokens) = backend
@@ -4077,21 +4122,11 @@ Return ONLY the summary, nothing else."#,
             .set_attribute(KeyValue::new("iterations_used", iterations_used as i64));
         loop_cx.span().set_status(Status::Ok);
 
-        // 13. Generate title via the same backend so voice stays consistent.
-        let title_prompt = format!(
-            "Create a short title (maximum 8 words) for the following journal entry:\n\n{}\n\nCapture the key moment or theme. Return ONLY the title, nothing else.",
-            final_content
-        );
-        let title_system = custom_system_prompt.as_deref().unwrap_or(
-            "You are my long term memory assistant. Use only the information provided. Do not invent details.",
-        );
-        let title_raw = backend
-            .chat()
-            .generate(&title_prompt, Some(title_system), None)
-            .await?;
-        let title = title_raw.trim().trim_matches('"').to_string();
+        // 13. Parse title from the model's inline response.
+        let (title, body) = parse_title_body(&final_content);
+        final_content = body;
 
-        log::info!("Agentic generated title: {}", title);
+        log::info!("Agentic parsed title: {}", title);
         let summary_preview: String = final_content.chars().take(200).collect();
         log::info!(
             "Agentic generated summary ({} chars): {}",
@@ -4742,4 +4777,40 @@ mod tests {
         assert!(out.contains("-> empty (pivoted)"));
         assert!(out.contains("Final insight: Final title"));
     }
+
+    #[test]
+    fn parse_title_body_standard_format() {
+        let (t, b) = parse_title_body("Title: Summer at the Lake\n\nWe spent the afternoon...");
+        assert_eq!(t, "Summer at the Lake");
+        assert_eq!(b, "We spent the afternoon...");
+    }
+
+    #[test]
+    fn parse_title_body_single_newline() {
+        let (t, b) = parse_title_body("Title: Morning Walk\nThe sun was rising...");
+        assert_eq!(t, "Morning Walk");
+        assert_eq!(b, "The sun was rising...");
+    }
+
+    #[test]
+    fn parse_title_body_lowercase_prefix() {
+        let (t, b) = parse_title_body("title: Garden Party\n\nEveryone gathered...");
+        assert_eq!(t, "Garden Party");
+        assert_eq!(b, "Everyone gathered...");
+    }
+
+    #[test]
+    fn parse_title_body_fallback_first_sentence() {
+        let (t, b) = parse_title_body("A warm summer day. We gathered at the park for a picnic.");
+        assert_eq!(t, "A warm summer day");
+        assert_eq!(b, "We gathered at the park for a picnic.");
+    }
+
+    #[test]
+    fn parse_title_body_fallback_truncate() {
+        let input = "A single long paragraph with no periods or title prefix that just keeps going on and on";
+        let (t, b) = parse_title_body(input);
+        assert!(t.len() <= 60);
+        assert_eq!(b, input);
+    }
 }
diff --git a/src/ai/llamacpp.rs b/src/ai/llamacpp.rs
index 77cd05f..8ea1063 100644
--- a/src/ai/llamacpp.rs
+++ b/src/ai/llamacpp.rs
@@ -354,6 +354,8 @@ impl LlamaCppClient {
             .and_then(|v| v.as_i64())
             .map(|n| n as i32);
 
+        log_timings(&parsed, prompt_tokens, completion_tokens);
+
         Ok((chat_msg, prompt_tokens, completion_tokens))
     }
 }
@@ -456,6 +458,7 @@ impl LlmClient for LlamaCppClient {
             let mut role = "assistant".to_string();
             let mut prompt_tokens: Option<i32> = None;
             let mut completion_tokens: Option<i32> = None;
+            let mut last_frame: Option<Value> = None;
             let mut done_seen = false;
 
             while let Some(chunk) = byte_stream.next().await {
@@ -505,6 +508,7 @@ impl LlmClient for LlamaCppClient {
                                 .get("completion_tokens")
                                 .and_then(|n| n.as_i64())
                                 .map(|n| n as i32);
+                            last_frame = Some(v.clone());
                         }
 
                         let Some(choices) = v.get("choices").and_then(|c| c.as_array())
@@ -587,6 +591,10 @@ impl LlmClient for LlamaCppClient {
                 Some(v)
             };
 
+            if let Some(ref frame) = last_frame {
+                log_timings(frame, prompt_tokens, completion_tokens);
+            }
+
             let message = ChatMessage {
                 role,
                 content: accumulated_content,
@@ -720,6 +728,44 @@ impl LlamaCppClient {
 /// Extract a diagnostic fragment from a llama-swap / llama-server response
 /// that doesn't match the expected `{choices: [...]}` shape. llama-server
 /// returns errors as `{"error": {"message": "...", "code": N, "type": "..."}}`;
+fn log_timings(parsed: &Value, prompt_tokens: Option<i32>, completion_tokens: Option<i32>) {
+    let timings = match parsed.get("timings") {
+        Some(t) => t,
+        None => return,
+    };
+    let prompt_tps = timings.get("prompt_per_second").and_then(|v| v.as_f64());
+    let gen_tps = timings.get("predicted_per_second").and_then(|v| v.as_f64());
+    let prompt_ms = timings.get("prompt_ms").and_then(|v| v.as_f64());
+    let gen_ms = timings.get("predicted_ms").and_then(|v| v.as_f64());
+
+    let mut parts: Vec<String> = Vec::new();
+    if let Some(c) = prompt_tokens {
+        let mut s = format!("prompt={} tok", c);
+        if let Some(ms) = prompt_ms {
+            s.push_str(&format!(" ({:.0} ms", ms));
+            if let Some(tps) = prompt_tps {
+                s.push_str(&format!(", {:.1} tok/s", tps));
+            }
+            s.push(')');
+        }
+        parts.push(s);
+    }
+    if let Some(c) = completion_tokens {
+        let mut s = format!("gen={} tok", c);
+        if let Some(ms) = gen_ms {
+            s.push_str(&format!(" ({:.0} ms", ms));
+            if let Some(tps) = gen_tps {
+                s.push_str(&format!(", {:.1} tok/s", tps));
+            }
+            s.push(')');
+        }
+        parts.push(s);
+    }
+    if !parts.is_empty() {
+        log::info!("llama-swap chat metrics — {}", parts.join(", "));
+    }
+}
+
 /// llama-swap itself sometimes wraps subprocess failures with its own
 /// `{"error": "..."}` flat shape. Surface either when present, otherwise fall
 /// back to a truncated raw-JSON view.
diff --git a/src/ai/sms_client.rs b/src/ai/sms_client.rs
index 6661bac..d5e175f 100644
--- a/src/ai/sms_client.rs
+++ b/src/ai/sms_client.rs
@@ -281,6 +281,9 @@ impl SmsApiClient {
         if let Some(cid) = params.contact_id {
             url.push_str(&format!("&contact_id={}", cid));
         }
+        if let Some(ref c) = params.contact {
+            url.push_str(&format!("&contact={}", urlencoding::encode(c)));
+        }
         if let Some(off) = params.offset {
             url.push_str(&format!("&offset={}", off));
         }
@@ -413,6 +416,9 @@ pub struct SmsSearchParams<'a> {
     pub mode: &'a str,
     pub limit: usize,
     pub contact_id: Option<i64>,
+    /// Contact name (case-insensitive). Resolved to a numeric ID by the
+    /// SMS-API server when `contact_id` is not set.
+    pub contact: Option<String>,
     /// Unix-seconds inclusive lower bound on `date`.
     pub date_from: Option<i64>,
     /// Unix-seconds inclusive upper bound on `date`.