From f50d32667b0e9374f4d3175c0eadac7a6b2ecc46 Mon Sep 17 00:00:00 2001 From: Cameron Cordes Date: Thu, 7 May 2026 14:56:58 -0400 Subject: [PATCH] insight-chat: ToolGateOpts + per-tool description rewrites Tools whose backing tables are empty (calendar, location_history, daily_summaries) drop out of the catalog so the LLM doesn't waste iteration budget calling them only to receive "no results found". Vision and apollo gates already existed; this generalizes the pattern. search_messages gains start_ts/end_ts/contact_id filters (date filter is a client-side post-filter; SMS-API only accepts contact_id natively on the search endpoint). Descriptions follow a consistent convention: one sentence (what + when), param semantics, examples for tools with non-obvious param choices. No more all-caps headers, no more identity-prescriptive language inside descriptions. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ai/insight_chat.rs | 20 +- src/ai/insight_generator.rs | 629 +++++++++++++++++------------- src/ai/sms_client.rs | 41 ++ src/database/daily_summary_dao.rs | 25 ++ 4 files changed, 437 insertions(+), 278 deletions(-) diff --git a/src/ai/insight_chat.rs b/src/ai/insight_chat.rs index e782070..93c3e55 100644 --- a/src/ai/insight_chat.rs +++ b/src/ai/insight_chat.rs @@ -364,10 +364,12 @@ impl InsightChatService { .map(|imgs| !imgs.is_empty()) .unwrap_or(false); let offer_describe_tool = !is_hybrid && local_first_user_has_image; - let tools = InsightGenerator::build_tool_definitions( - offer_describe_tool, - self.generator.apollo_enabled(), - ); + // current_gate_opts(has_vision) sets gate_opts.has_vision = has_vision + // and probes the per-table presence flags. Pass `offer_describe_tool` + // directly — the `!is_hybrid && local_first_user_has_image` decision + // is the chat-path's vision predicate. + let gate_opts = self.generator.current_gate_opts(offer_describe_tool); + let tools = InsightGenerator::build_tool_definitions(gate_opts); // Image base64 only needed when describe_photo is on the menu. Load // lazily to avoid disk IO when the loop never invokes it. @@ -810,10 +812,12 @@ impl InsightChatService { .map(|imgs| !imgs.is_empty()) .unwrap_or(false); let offer_describe_tool = !is_hybrid && local_first_user_has_image; - let tools = InsightGenerator::build_tool_definitions( - offer_describe_tool, - self.generator.apollo_enabled(), - ); + // current_gate_opts(has_vision) sets gate_opts.has_vision = has_vision + // and probes the per-table presence flags. Pass `offer_describe_tool` + // directly — the `!is_hybrid && local_first_user_has_image` decision + // is the chat-path's vision predicate. + let gate_opts = self.generator.current_gate_opts(offer_describe_tool); + let tools = InsightGenerator::build_tool_definitions(gate_opts); let image_base64: Option = if offer_describe_tool { self.generator.load_image_as_base64(&normalized).ok() diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index 911068b..1cba095 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -89,6 +89,19 @@ pub struct InsightGenerator { libraries: Vec, } +/// Per-call gating flags for `build_tool_definitions`. Tools whose backing +/// data is empty (or whose env-var guard is unset) are dropped from the +/// catalog so the LLM doesn't reach for a tool that always returns "No +/// results found." — that wastes iteration budget. +#[derive(Debug, Clone, Copy, Default)] +pub struct ToolGateOpts { + pub has_vision: bool, + pub apollo_enabled: bool, + pub daily_summaries_present: bool, + pub calendar_present: bool, + pub location_history_present: bool, +} + impl InsightGenerator { pub fn new( ollama: OllamaClient, @@ -130,6 +143,45 @@ impl InsightGenerator { self.apollo_client.is_enabled() } + /// Compute the per-call tool gate options by probing each backing + /// table. Cheap (`SELECT 1 FROM LIMIT 1` shape via the existing + /// count methods); meant to be called once per chat turn / generation. + /// `has_vision` is supplied by the caller because it depends on the + /// model selected for this turn, not on persistent state. + pub fn current_gate_opts(&self, has_vision: bool) -> ToolGateOpts { + let cx = opentelemetry::Context::new(); + let calendar_present = { + let mut dao = self + .calendar_dao + .lock() + .expect("Unable to lock CalendarEventDao"); + dao.get_event_count(&cx).map(|n| n > 0).unwrap_or(false) + }; + let location_history_present = { + let mut dao = self + .location_dao + .lock() + .expect("Unable to lock LocationHistoryDao"); + dao.get_location_count(&cx).map(|n| n > 0).unwrap_or(false) + }; + let daily_summaries_present = { + let mut dao = self + .daily_summary_dao + .lock() + .expect("Unable to lock DailySummaryDao"); + dao.get_total_summary_count(&cx) + .map(|n| n > 0) + .unwrap_or(false) + }; + ToolGateOpts { + has_vision, + apollo_enabled: self.apollo_enabled(), + daily_summaries_present, + calendar_present, + location_history_present, + } + } + /// Resolve `rel_path` against the configured libraries, returning the /// first root under which the file exists. Insights may be generated /// for any library — the generator itself doesn't know which — so we @@ -1711,24 +1763,21 @@ Return ONLY the summary, nothing else."#, } /// Tool: search_messages — keyword / semantic / hybrid search over all - /// SMS message bodies via the Django FTS5 + embeddings pipeline. Unlike - /// `search_rag` (daily summaries, date-weighted) this hits raw message - /// text across time and is the right choice for exact phrases, proper - /// nouns, URLs, or anything where specific wording matters. + /// SMS message bodies via the Django FTS5 + embeddings pipeline. Now + /// supports optional `contact_id`, `start_ts`, `end_ts` filters. async fn tool_search_messages(&self, args: &serde_json::Value) -> String { let query = match args.get("query").and_then(|v| v.as_str()) { Some(q) if !q.trim().is_empty() => q.trim(), _ => { - // Redirect when the model reached for this tool with a - // date/contact-shaped intent — get_sms_messages is the right - // call. Without this hint, small models often just retry - // search_messages again with the same args. - let has_date = args.get("date").is_some(); - let has_contact = args.get("contact").is_some(); + let has_date = args.get("date").is_some() + || args.get("start_ts").is_some() + || args.get("end_ts").is_some(); + let has_contact = args.get("contact").is_some() + || args.get("contact_id").is_some(); if has_date || has_contact { return "Error: search_messages needs a 'query' (keywords/phrase). \ - To fetch messages around a date or from a contact, call \ - get_sms_messages with { date, contact? } instead." + To fetch messages around a date or from a contact without keywords, \ + call get_sms_messages with { date, contact? } instead." .to_string(); } return "Error: missing required parameter 'query'".to_string(); @@ -1748,51 +1797,86 @@ Return ONLY the summary, nothing else."#, mode ); } - let limit = args + let user_limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(20) .clamp(1, 50) as usize; + let contact_id = args.get("contact_id").and_then(|v| v.as_i64()); + let start_ts = args.get("start_ts").and_then(|v| v.as_i64()); + let end_ts = args.get("end_ts").and_then(|v| v.as_i64()); + let has_date_filter = start_ts.is_some() || end_ts.is_some(); + + // When a date filter is supplied, fetch a larger pool from SMS-API + // so in-window matches that ranked lower than out-of-window ones + // aren't lost. + let fetch_limit = if has_date_filter { 100 } else { user_limit }; log::info!( - "tool_search_messages: query='{}', mode={}, limit={}", - query, - mode, - limit + "tool_search_messages: query='{}', mode={}, contact_id={:?}, range=[{:?}, {:?}], user_limit={}, fetch_limit={}", + query, mode, contact_id, start_ts, end_ts, user_limit, fetch_limit ); - match self.sms_client.search_messages(query, &mode, limit).await { - Ok(hits) if hits.is_empty() => "No messages matched.".to_string(), - Ok(hits) => { - let mut out = String::new(); - out.push_str(&format!( - "Found {} messages (mode: {}):\n\n", - hits.len(), - mode - )); - let user_name = user_display_name(); - for h in hits { - let date = chrono::DateTime::from_timestamp(h.date, 0) - .map(|dt| dt.format("%Y-%m-%d").to_string()) - .unwrap_or_else(|| h.date.to_string()); - let direction: &str = if h.type_ == 2 { - &user_name - } else { - &h.contact_name - }; - let score = h - .similarity_score - .map(|s| format!(" [score {:.2}]", s)) - .unwrap_or_default(); - out.push_str(&format!( - "[{}]{} {} — {}\n\n", - date, score, direction, h.body - )); + let hits = match self + .sms_client + .search_messages_with_contact(query, &mode, fetch_limit, contact_id) + .await + { + Ok(h) => h, + Err(e) => return format!("Error searching messages: {}", e), + }; + + // Date-range post-filter on the client side. SMS-API's /search/ + // doesn't accept date params; mirroring Apollo's pattern here. + let filtered: Vec<_> = hits + .into_iter() + .filter(|h| { + if let Some(s) = start_ts + && h.date < s + { + return false; } - out - } - Err(e) => format!("Error searching messages: {}", e), + if let Some(e) = end_ts + && h.date > e + { + return false; + } + true + }) + .take(user_limit) + .collect(); + + if filtered.is_empty() { + return "No messages matched.".to_string(); } + + let user_name = user_display_name(); + let mut out = String::new(); + out.push_str(&format!( + "Found {} messages (mode: {}{}):\n\n", + filtered.len(), + mode, + if has_date_filter { ", date-filtered" } else { "" } + )); + for h in filtered { + let date = chrono::DateTime::from_timestamp(h.date, 0) + .map(|dt| dt.format("%Y-%m-%d").to_string()) + .unwrap_or_else(|| h.date.to_string()); + let direction: &str = if h.type_ == 2 { + &user_name + } else { + &h.contact_name + }; + let score = h + .similarity_score + .map(|s| format!(" [score {:.2}]", s)) + .unwrap_or_default(); + out.push_str(&format!( + "[{}]{} {} — {}\n\n", + date, score, direction, h.body + )); + } + out } /// Tool: get_sms_messages — fetch SMS messages near a date for a contact @@ -2485,283 +2569,238 @@ Return ONLY the summary, nothing else."#, // ── Agentic insight generation ────────────────────────────────────── - /// Build the list of tool definitions for the agentic loop - pub(crate) fn build_tool_definitions(has_vision: bool, apollo_enabled: bool) -> Vec { - let mut tools = vec![ - Tool::function( + /// Build the list of tool definitions for the agentic loop, gated by + /// `opts`. Always-on tools: `search_messages`, `get_sms_messages`, + /// `get_file_tags`, `reverse_geocode`, `get_current_datetime`, the + /// four knowledge-memory tools. Conditional: `describe_photo` (vision + /// model), `get_personal_place_at` (Apollo configured), `search_rag` + /// (daily_summaries populated), `get_calendar_events` (calendar + /// populated), `get_location_history` (location history populated). + pub(crate) fn build_tool_definitions(opts: ToolGateOpts) -> Vec { + let mut tools: Vec = Vec::new(); + + if opts.daily_summaries_present { + tools.push(Tool::function( "search_rag", - "Search conversation history using semantic search. Use this to find relevant past conversations about specific topics, people, or events.", + "Date-anchored semantic search over the user's daily-summary corpus. \ + Returns up to `limit` summaries most semantically similar to `query`, \ + weighted toward summaries near `date`. For raw message text across all \ + time, prefer `search_messages`. \ + Examples: `{query: \"family dinner\", date: \"2018-12-24\"}` — what \ + daily summaries near Christmas Eve mention family / dinner / gathering. \ + `{query: \"work travel\", date: \"2019-06-15\", contact: \"Alice\"}` — \ + narrowed to summaries that involve Alice.", serde_json::json!({ "type": "object", "required": ["query", "date"], "properties": { - "query": { - "type": "string", - "description": "The search query to find relevant conversations" - }, - "date": { - "type": "string", - "description": "The reference date in YYYY-MM-DD format" - }, - "contact": { - "type": "string", - "description": "Optional contact name to filter results" - }, - "limit": { - "type": "integer", - "description": "Maximum number of results to return (default: 10, max: 25)" - } - } - }), - ), - Tool::function( - "search_messages", - "CONTENT search over SMS message bodies by keywords/phrases/topics across all time. Use when you're looking for specific wording (phrases, proper nouns, URLs, topics) and DON'T have a date in mind. NOT for time-based queries — if you know the date or want messages around a date, call get_sms_messages instead. Modes: 'fts5' (keyword, supports \"phrase\" / prefix* / AND / NEAR(w1 w2, 5)), 'semantic' (embedding similarity), 'hybrid' (recommended — merges both via reciprocal rank fusion).", - serde_json::json!({ - "type": "object", - "required": ["query"], - "properties": { - "query": { - "type": "string", - "description": "Search query. Min 3 chars. For fts5 mode, supports phrase (\"\"), prefix (*), AND/OR/NOT, and NEAR proximity." - }, - "mode": { - "type": "string", - "enum": ["fts5", "semantic", "hybrid"], - "description": "Search strategy. Default: hybrid." - }, - "limit": { - "type": "integer", - "description": "Maximum number of results (default: 20, max: 50)" - } - } - }), - ), - Tool::function( - "get_sms_messages", - "TIME-BASED fetch of SMS/text messages around a specific date (and optionally from a specific contact). Returns the actual message conversation for that window. Use this whenever you know the date or want the context around a photo's timestamp. Omit contact to search across all conversations. For keyword/topic search without a date, use search_messages instead.", - serde_json::json!({ - "type": "object", - "required": ["date"], - "properties": { - "date": { - "type": "string", - "description": "The center date in YYYY-MM-DD format" - }, - "contact": { - "type": "string", - "description": "Optional contact name to filter messages. If omitted, searches all conversations." - }, - "days_radius": { - "type": "integer", - "description": "Number of days before and after the date to search (default: 4)" - }, - "limit": { - "type": "integer", - "description": "Maximum number of messages to return (default: 60, max: 150)" - } - } - }), - ), - Tool::function( - "get_calendar_events", - "Fetch calendar events near a specific date. Shows scheduled events, meetings, and activities.", - serde_json::json!({ - "type": "object", - "required": ["date"], - "properties": { - "date": { - "type": "string", - "description": "The center date in YYYY-MM-DD format" - }, - "days_radius": { - "type": "integer", - "description": "Number of days before and after the date to search (default: 7)" - }, - "limit": { - "type": "integer", - "description": "Maximum number of events to return (default: 20, max: 50)" - } - } - }), - ), - Tool::function( - "get_location_history", - "Fetch location history records near a specific date. Shows places visited and activities.", - serde_json::json!({ - "type": "object", - "required": ["date"], - "properties": { - "date": { - "type": "string", - "description": "The center date in YYYY-MM-DD format" - }, - "days_radius": { - "type": "integer", - "description": "Number of days before and after the date to search (default: 14)" - } - } - }), - ), - Tool::function( - "get_file_tags", - "Get tags/labels that have been applied to a specific photo file.", - serde_json::json!({ - "type": "object", - "required": ["file_path"], - "properties": { - "file_path": { - "type": "string", - "description": "The file path of the photo to get tags for" - } - } - }), - ), - ]; - - tools.push(Tool::function( - "reverse_geocode", - "Convert GPS latitude/longitude coordinates to a human-readable place name (city, state). Use this when GPS coordinates are available in the photo metadata, or to resolve coordinates returned by get_location_history.", - serde_json::json!({ - "type": "object", - "required": ["latitude", "longitude"], - "properties": { - "latitude": { - "type": "number", - "description": "GPS latitude in decimal degrees" - }, - "longitude": { - "type": "number", - "description": "GPS longitude in decimal degrees" - } - } - }), - )); - - // Personal place lookup. Only registered when the integration is - // enabled — otherwise the LLM gets a tool that always errors. - if apollo_enabled { - tools.push(Tool::function( - "get_personal_place_at", - "Get the user's personal, named place (e.g. Home, Work, Cabin) at a GPS coordinate, if any. Returns the place name, category, free-text description (the user's own notes about the location), and radius. More specific than reverse_geocode — prefer this when both apply.", - serde_json::json!({ - "type": "object", - "required": ["latitude", "longitude"], - "properties": { - "latitude": { "type": "number", "description": "GPS latitude in decimal degrees" }, - "longitude": { "type": "number", "description": "GPS longitude in decimal degrees" } + "query": { "type": "string", "description": "Free-text query, semantically matched." }, + "date": { "type": "string", "description": "Anchor date, YYYY-MM-DD. Summaries near this date rank higher." }, + "contact": { "type": "string", "description": "Optional contact name to bias toward conversations with that person." }, + "limit": { "type": "integer", "description": "Max summaries to return (default 10, max 25)." } + } + }), + )); + } + + tools.push(Tool::function( + "search_messages", + "Search SMS/MMS message bodies. Modes: `fts5` (keyword + phrase + prefix + AND/OR/NOT + NEAR proximity), \ + `semantic` (embedding similarity, requires generated embeddings), `hybrid` (RRF merge, recommended; \ + degrades to fts5 when embeddings absent). Optional `start_ts` / `end_ts` (real-UTC unix seconds) and \ + `contact_id` filters. For pure date / contact browsing without keywords, prefer `get_sms_messages`. \ + Examples: `{query: \"trader joe's\"}` — phrase across all time. \ + `{query: \"dinner\", contact_id: 42, start_ts: 1700000000, end_ts: 1700604800}` — keyword within a contact and a week. \ + `{query: \"NEAR(meeting work, 5)\"}` — proximity search.", + serde_json::json!({ + "type": "object", + "required": ["query"], + "properties": { + "query": { "type": "string", "description": "Search query. Min 3 chars. fts5 supports phrase (\"\"), prefix (*), AND/OR/NOT, and NEAR proximity." }, + "mode": { "type": "string", "enum": ["fts5", "semantic", "hybrid"], "description": "Search strategy. Default: hybrid." }, + "limit": { "type": "integer", "description": "Max results (default 20, max 50)." }, + "contact_id": { "type": "integer", "description": "Optional numeric contact id to scope the search." }, + "start_ts": { "type": "integer", "description": "Optional inclusive lower bound, real-UTC unix seconds." }, + "end_ts": { "type": "integer", "description": "Optional inclusive upper bound, real-UTC unix seconds." } + } + }), + )); + + tools.push(Tool::function( + "get_sms_messages", + "Fetch SMS/MMS messages near a date (and optionally from a specific contact). Use when you know the date \ + or want context around a photo's timestamp. For keyword search without a date, use `search_messages`. \ + Returns up to `limit` messages within `±days_radius` of `date`, sorted by proximity. \ + Example: `{date: \"2018-08-12\", contact: \"Mom\", days_radius: 2}` — messages from Mom within ±2 days of Aug 12 2018.", + serde_json::json!({ + "type": "object", + "required": ["date"], + "properties": { + "date": { "type": "string", "description": "Center date, YYYY-MM-DD." }, + "contact": { "type": "string", "description": "Optional contact name (case-insensitive). Falls back to all contacts on no match." }, + "days_radius": { "type": "integer", "description": "Days before and after to include (default 4)." }, + "limit": { "type": "integer", "description": "Max messages to return (default 60, max 150)." } + } + }), + )); + + if opts.calendar_present { + tools.push(Tool::function( + "get_calendar_events", + "Fetch calendar events near a date — meetings, scheduled activities, all-day events. \ + Returns events within `±days_radius` of `date`. \ + Example: `{date: \"2019-03-22\", days_radius: 3}` — events within a week of March 22 2019.", + serde_json::json!({ + "type": "object", + "required": ["date"], + "properties": { + "date": { "type": "string", "description": "Center date, YYYY-MM-DD." }, + "days_radius": { "type": "integer", "description": "Days before and after to include (default 7)." }, + "limit": { "type": "integer", "description": "Max events to return (default 20, max 50)." } + } + }), + )); + } + + if opts.location_history_present { + tools.push(Tool::function( + "get_location_history", + "Fetch raw location records (lat/lon/timestamp/activity) near a date. The default 14-day radius is \ + wide because location density varies; tighten to ±1 day for a single-trip query. For a coordinate's \ + named place, use `reverse_geocode` (or `get_personal_place_at` when Apollo is enabled).", + serde_json::json!({ + "type": "object", + "required": ["date"], + "properties": { + "date": { "type": "string", "description": "Center date, YYYY-MM-DD." }, + "days_radius": { "type": "integer", "description": "Days before and after to include (default 14)." } + } + }), + )); + } + + tools.push(Tool::function( + "get_file_tags", + "Get user-applied tags for a specific photo file path. Tags are user-curated, not auto-detected.", + serde_json::json!({ + "type": "object", + "required": ["file_path"], + "properties": { + "file_path": { "type": "string", "description": "File path of the photo." } + } + }), + )); + + tools.push(Tool::function( + "reverse_geocode", + "Convert GPS lat/lon to a human-readable place name (city, state). Use for any coordinate the LLM has \ + obtained from EXIF or `get_location_history`. When Apollo is configured, prefer `get_personal_place_at` \ + — it returns the user's named places (Home / Work / etc.) which are more specific.", + serde_json::json!({ + "type": "object", + "required": ["latitude", "longitude"], + "properties": { + "latitude": { "type": "number", "description": "Decimal degrees." }, + "longitude": { "type": "number", "description": "Decimal degrees." } + } + }), + )); + + if opts.apollo_enabled { + tools.push(Tool::function( + "get_personal_place_at", + "Return any of the user's named Places (e.g. Home, Work, Cabin) whose radius contains (latitude, longitude). \ + Smallest radius first — most specific match wins. More specific than `reverse_geocode`; prefer this when \ + both apply. Returns place name, category, free-text description, and radius.", + serde_json::json!({ + "type": "object", + "required": ["latitude", "longitude"], + "properties": { + "latitude": { "type": "number", "description": "Decimal degrees." }, + "longitude": { "type": "number", "description": "Decimal degrees." } } }), )); } - // Knowledge memory tools tools.push(Tool::function( "recall_entities", - "Search the knowledge memory for people, places, events, or things previously learned from other photos. Use this to retrieve context about subjects appearing in this photo.", + "Search the persistent knowledge memory for previously learned people, places, events, or things. \ + Use BEFORE writing the insight to ground the model on what's already known.", serde_json::json!({ "type": "object", "properties": { - "name": { - "type": "string", - "description": "Name or partial name to search for (case-insensitive substring match)" - }, - "entity_type": { - "type": "string", - "enum": ["person", "place", "event", "thing"], - "description": "Filter by entity type (optional)" - }, - "limit": { - "type": "integer", - "description": "Maximum number of results to return (default: 20, max: 50)" - } + "name": { "type": "string", "description": "Name or partial name (case-insensitive substring match)." }, + "entity_type": { "type": "string", "enum": ["person", "place", "event", "thing"] }, + "limit": { "type": "integer", "description": "Max results (default 20, max 50)." } } }), )); tools.push(Tool::function( "recall_facts_for_photo", - "Retrieve all known facts linked to a specific photo from the knowledge memory. Use this at the start of insight generation to load any previously stored knowledge about subjects in this photo.", + "Retrieve all stored facts linked to a specific photo. Call at the start of insight generation to load \ + prior knowledge about subjects in this photo without scanning the whole knowledge base.", serde_json::json!({ "type": "object", "required": ["file_path"], "properties": { - "file_path": { - "type": "string", - "description": "The file path of the photo to retrieve facts for" - } + "file_path": { "type": "string", "description": "File path of the photo." } } }), )); tools.push(Tool::function( "store_entity", - "Store or update a person, place, event, or thing in the knowledge memory. Call this when you identify a subject in this photo that should be remembered for future insights.", + "Upsert a person / place / event / thing into the knowledge memory. Returns the entity id (use it as \ + `subject_entity_id` or `object_entity_id` in `store_fact`). Idempotent on canonical name.", serde_json::json!({ "type": "object", "required": ["name", "entity_type"], "properties": { - "name": { - "type": "string", - "description": "The canonical name of the entity (e.g. 'John Smith', 'Banff National Park')" - }, - "entity_type": { - "type": "string", - "enum": ["person", "place", "event", "thing"], - "description": "The type of entity" - }, - "description": { - "type": "string", - "description": "A brief description of the entity" - } + "name": { "type": "string", "description": "Canonical name (e.g. \"John Smith\", \"Banff National Park\")." }, + "entity_type": { "type": "string", "enum": ["person", "place", "event", "thing"] }, + "description": { "type": "string", "description": "Brief description." } } }), )); tools.push(Tool::function( "store_fact", - "Record a fact about an entity in the knowledge memory. Provide EITHER object_entity_id (when the object is a known entity whose ID you have) OR object_value (for free-text attributes). The fact will be linked to the current photo automatically.", + "Record a fact about an entity in the knowledge memory. Always linked to the current photo. \ + You must provide EITHER `object_entity_id` (when the object is itself a stored entity — e.g. \ + person A is_friend_of person B) OR `object_value` (free-text attribute — e.g. role=\"software engineer\"). \ + `object_entity_id` takes precedence when both are present. \ + Examples: \ + `{subject_entity_id: 7, predicate: \"is_friend_of\", object_entity_id: 12}` — links two known entities. \ + `{subject_entity_id: 7, predicate: \"lives_in\", object_value: \"Portland, Oregon\"}` — free-text attribute.", serde_json::json!({ "type": "object", "required": ["subject_entity_id", "predicate"], "properties": { - "subject_entity_id": { - "type": "integer", - "description": "The ID of the entity this fact is about (returned by store_entity or recall_entities)" - }, - "predicate": { - "type": "string", - "description": "The relationship or attribute (e.g. 'is_friend_of', 'located_in', 'attended_event', 'is_sibling_of')" - }, - "object_entity_id": { - "type": "integer", - "description": "Use when the object is a known entity (e.g. another person's entity ID for 'is_friend_of '). Takes precedence over object_value." - }, - "object_value": { - "type": "string", - "description": "Use for free-text attributes where the object is not a stored entity (e.g. 'Portland, Oregon', 'software engineer')" - }, - "photo_role": { - "type": "string", - "description": "How this entity appears in the photo (e.g. 'subject', 'background', 'location'). Defaults to 'subject'." - } + "subject_entity_id": { "type": "integer", "description": "Entity id this fact is about." }, + "predicate": { "type": "string", "description": "Relationship or attribute (e.g. is_friend_of, located_in, attended_event)." }, + "object_entity_id": { "type": "integer", "description": "Use when the object is itself a stored entity. Takes precedence over object_value." }, + "object_value": { "type": "string", "description": "Use for free-text attributes where the object is not a stored entity." }, + "photo_role": { "type": "string", "description": "How this entity appears in the photo (default \"subject\")." } } }), )); tools.push(Tool::function( "get_current_datetime", - "Get the current date and time. Useful for understanding how long ago the photo was taken.", + "Get the current date and time. Useful when reasoning about how long ago a photo was taken.", serde_json::json!({ "type": "object", "properties": {} }), )); - if has_vision { + if opts.has_vision { tools.push(Tool::function( "describe_photo", - "Generate a visual description of the photo. Describes people, location, and activity visible in the image.", + "Generate a visual description of the current photo — people, location, objects, activity visible \ + in the image. Only available with vision-capable models.", serde_json::json!({ "type": "object", "properties": {} @@ -3405,11 +3444,12 @@ Return ONLY the summary, nothing else."#, date = date_taken.format("%B %d, %Y"), ); - // 10. Define tools. Hybrid mode omits `describe_photo` since the - // chat model receives the visual description inline. - let offer_describe_tool = has_vision && !is_hybrid; - let tools = - Self::build_tool_definitions(offer_describe_tool, self.apollo_client.is_enabled()); + // 10. Define tools. Gate flags computed from current data presence; + // hybrid mode omits describe_photo since the chat model receives + // the visual description inline (so we pass `false` for has_vision + // in hybrid mode regardless of the model's actual capability). + let gate_opts = self.current_gate_opts(has_vision && !is_hybrid); + let tools = Self::build_tool_definitions(gate_opts); // 11. Build initial messages. In hybrid mode images are never // attached to the wire message — the description is part of @@ -3690,6 +3730,55 @@ mod tests { use super::*; use crate::ai::ollama::{ToolCall, ToolCallFunction}; + #[test] + fn build_tool_definitions_drops_gated_tools() { + let opts = ToolGateOpts { + has_vision: false, + apollo_enabled: false, + daily_summaries_present: false, + calendar_present: false, + location_history_present: false, + }; + let tools = InsightGenerator::build_tool_definitions(opts); + let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect(); + + // Always-on tools survive. + assert!(names.contains(&"search_messages")); + assert!(names.contains(&"get_sms_messages")); + assert!(names.contains(&"get_file_tags")); + assert!(names.contains(&"reverse_geocode")); + assert!(names.contains(&"get_current_datetime")); + assert!(names.contains(&"recall_entities")); + assert!(names.contains(&"recall_facts_for_photo")); + assert!(names.contains(&"store_entity")); + assert!(names.contains(&"store_fact")); + + // Gated tools are absent. + assert!(!names.contains(&"describe_photo")); + assert!(!names.contains(&"get_personal_place_at")); + assert!(!names.contains(&"search_rag")); + assert!(!names.contains(&"get_calendar_events")); + assert!(!names.contains(&"get_location_history")); + } + + #[test] + fn build_tool_definitions_includes_gated_tools_when_present() { + let opts = ToolGateOpts { + has_vision: true, + apollo_enabled: true, + daily_summaries_present: true, + calendar_present: true, + location_history_present: true, + }; + let tools = InsightGenerator::build_tool_definitions(opts); + let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect(); + assert!(names.contains(&"describe_photo")); + assert!(names.contains(&"get_personal_place_at")); + assert!(names.contains(&"search_rag")); + assert!(names.contains(&"get_calendar_events")); + assert!(names.contains(&"get_location_history")); + } + fn place(name: &str, description: &str) -> ApolloPlace { ApolloPlace { id: 1, diff --git a/src/ai/sms_client.rs b/src/ai/sms_client.rs index a4843ee..1ead9df 100644 --- a/src/ai/sms_client.rs +++ b/src/ai/sms_client.rs @@ -295,6 +295,47 @@ impl SmsApiClient { Ok(data.results) } + /// Same shape as `search_messages` but with optional `contact_id`. The + /// SMS-API endpoint accepts contact_id natively; date filtering is the + /// caller's responsibility (post-filter on the returned rows). + pub async fn search_messages_with_contact( + &self, + query: &str, + mode: &str, + limit: usize, + contact_id: Option, + ) -> Result> { + let mut url = format!( + "{}/api/messages/search/?q={}&mode={}&limit={}", + self.base_url, + urlencoding::encode(query), + urlencoding::encode(mode), + limit + ); + if let Some(cid) = contact_id { + url.push_str(&format!("&contact_id={}", cid)); + } + + let mut request = self.client.get(&url); + if let Some(token) = &self.token { + request = request.header("Authorization", format!("Bearer {}", token)); + } + + let response = request.send().await?; + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + return Err(anyhow::anyhow!( + "SMS search request failed: {} - {}", + status, + body + )); + } + + let data: SmsSearchResponse = response.json().await?; + Ok(data.results) + } + pub async fn summarize_context( &self, messages: &[SmsMessage], diff --git a/src/database/daily_summary_dao.rs b/src/database/daily_summary_dao.rs index 276a5a2..5614a06 100644 --- a/src/database/daily_summary_dao.rs +++ b/src/database/daily_summary_dao.rs @@ -75,6 +75,13 @@ pub trait DailySummaryDao: Sync + Send { context: &opentelemetry::Context, contact: &str, ) -> Result; + + /// Get total count of all summaries (across all contacts). Used by + /// `current_gate_opts` to check whether daily_summaries are present. + fn get_total_summary_count( + &mut self, + context: &opentelemetry::Context, + ) -> Result; } pub struct SqliteDailySummaryDao { @@ -454,6 +461,24 @@ impl DailySummaryDao for SqliteDailySummaryDao { }) .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + + fn get_total_summary_count( + &mut self, + context: &opentelemetry::Context, + ) -> Result { + trace_db_call(context, "query", "get_total_summary_count", |_span| { + let mut conn = self + .connection + .lock() + .expect("Unable to get DailySummaryDao"); + + diesel::sql_query("SELECT COUNT(*) as count FROM daily_conversation_summaries") + .get_result::(conn.deref_mut()) + .map(|r| r.count) + .map_err(|e| anyhow::anyhow!("Count query error: {:?}", e)) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } } // Helper structs for raw SQL queries