\n<body>", tolerating markdown decoration around // the title line. let (first_line, rest) = match trimmed.find('\n') { Some(pos) => (&trimmed[..pos], trimmed[pos..].trim()), None => (trimmed, ""), }; let first_line = strip_title_markdown(first_line); if let Some(t) = first_line .strip_prefix("Title:") .or_else(|| first_line.strip_prefix("title:")) { let title = strip_title_markdown(t); if !title.is_empty() && !rest.is_empty() { return (title.to_string(), rest.to_string()); } } // Fallback: first sentence (up to first `. ` or `.\n`) becomes the title if let Some(pos) = trimmed.find(". ").or_else(|| trimmed.find(".\n")) { let title = strip_title_markdown(&trimmed[..pos]); let body = trimmed[pos + 1..].trim(); if title.len() <= 100 && !body.is_empty() { return (title.to_string(), body.to_string()); } } // Last resort: truncate to 60 chars for title, full text as body let title: String = trimmed.chars().take(60).collect(); let title = strip_title_markdown(title.trim_end()).to_string(); (title, trimmed.to_string()) } /// Combine an optional personal Apollo Place with an optional Nominatim /// reverse-geocoded city, falling back to bare coordinates when neither /// resolves. Free function so we can test it cheaply without spinning up /// the whole InsightGenerator. fn compose_location_string( apollo: Option<ApolloPlace>, nominatim: Option<String>, lat: f64, lon: f64, ) -> String { match (apollo, nominatim) { (Some(p), Some(n)) if !p.description.is_empty() => { format!("{} ({}) — near {}", p.name, p.description, n) } (Some(p), Some(n)) => format!("{} — near {}", p.name, n), (Some(p), None) if !p.description.is_empty() => format!("{} ({})", p.name, p.description), (Some(p), None) => p.name, (None, Some(n)) => n, (None, None) => format!("{lat:.4}, {lon:.4}"), } } #[derive(Deserialize)] struct NominatimResponse { display_name: Option<String>, address: Option<NominatimAddress>, } #[derive(Deserialize)] struct NominatimAddress { city: Option<String>, town: Option<String>, village: Option<String>, state: Option<String>, } #[derive(Clone)] pub struct InsightGenerator { ollama: OllamaClient, /// Optional OpenRouter client, used when `backend=hybrid` is requested. /// `None` when `OPENROUTER_API_KEY` is not configured. openrouter: Option<Arc<OpenRouterClient>>, /// Optional llama-swap client, used when `backend=llamacpp` is requested. /// `None` when `LLAMA_SWAP_URL` is not configured. llamacpp: Option<Arc<LlamaCppClient>>, sms_client: SmsApiClient, /// Optional integration with Apollo's user-defined Places. When the /// integration is disabled (`APOLLO_API_BASE_URL` unset), every /// query returns empty and the legacy Nominatim path is used as-is. apollo_client: ApolloClient, insight_dao: Arc<Mutex<Box<dyn InsightDao>>>, exif_dao: Arc<Mutex<Box<dyn ExifDao>>>, daily_summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>>, // Google Takeout data sources calendar_dao: Arc<Mutex<Box<dyn CalendarEventDao>>>, location_dao: Arc<Mutex<Box<dyn LocationHistoryDao>>>, search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>>, tag_dao: Arc<Mutex<Box<dyn TagDao>>>, // Face detections (used by the get_faces_in_photo agentic tool) face_dao: Arc<Mutex<Box<dyn crate::faces::FaceDao>>>, // Knowledge memory knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>, // Persona settings (looked up by recall_facts_for_photo to honour // the per-persona "reviewed-only" strict-mode toggle). persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>>, libraries: Vec<Library>, } /// Per-call gating flags for `build_tool_definitions`. Tools whose backing /// data is empty (or whose env-var guard is unset) are dropped from the /// catalog so the LLM doesn't reach for a tool that always returns "No /// results found." — that wastes iteration budget. #[derive(Debug, Clone, Copy, Default)] pub struct ToolGateOpts { pub has_vision: bool, pub apollo_enabled: bool, pub daily_summaries_present: bool, pub calendar_present: bool, pub location_history_present: bool, pub faces_present: bool, /// Per-persona toggle from migration 2026-05-10-000500. When /// false the agent's update_fact / supersede_fact tools aren't /// in the catalog at all — defense-in-depth so a hallucinated /// tool name still 404s, and the agent doesn't waste iterations /// trying corrections it isn't allowed to do. pub allow_agent_corrections: bool, } impl InsightGenerator { pub fn new( ollama: OllamaClient, openrouter: Option<Arc<OpenRouterClient>>, llamacpp: Option<Arc<LlamaCppClient>>, sms_client: SmsApiClient, apollo_client: ApolloClient, insight_dao: Arc<Mutex<Box<dyn InsightDao>>>, exif_dao: Arc<Mutex<Box<dyn ExifDao>>>, daily_summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>>, calendar_dao: Arc<Mutex<Box<dyn CalendarEventDao>>>, location_dao: Arc<Mutex<Box<dyn LocationHistoryDao>>>, search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>>, tag_dao: Arc<Mutex<Box<dyn TagDao>>>, face_dao: Arc<Mutex<Box<dyn crate::faces::FaceDao>>>, knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>, persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>>, libraries: Vec<Library>, ) -> Self { Self { ollama, openrouter, llamacpp, sms_client, apollo_client, insight_dao, exif_dao, daily_summary_dao, calendar_dao, location_dao, search_dao, tag_dao, face_dao, knowledge_dao, persona_dao, libraries, } } /// Accessor for the insight DAO (used by async job completion to /// look up the stored insight id). pub fn insight_dao(&self) -> &Arc<Mutex<Box<dyn InsightDao>>> { &self.insight_dao } /// Whether the optional Apollo Places integration is wired up. Drives /// tool-definition gating (no point offering `get_personal_place_at` /// when Apollo is unreachable) — exposed publicly so `insight_chat` /// can call `build_tool_definitions` with the same flag. pub fn apollo_enabled(&self) -> bool { self.apollo_client.is_enabled() } /// Compute the per-call tool gate options by probing each backing /// table for presence. `daily_summaries_present` uses a `LIMIT 1` /// existence probe; `calendar_present` and `location_history_present` /// use the existing `get_event_count` / `get_location_count` /// methods (small enough that a full `COUNT(*)` is fine). Meant to /// be called once per chat turn / generation. `has_vision` is /// supplied by the caller because it depends on the model selected /// for this turn, not on persistent state. /// /// Also resolves the per-persona `allow_agent_corrections` flag. /// Pass `Some((user_id, persona_id))` when generating in a persona /// context (every chat turn and agentic generation does); pass /// `None` for callers that don't have one, which defaults the gate /// to closed — the conservative posture. pub fn current_gate_opts_for_persona( &self, has_vision: bool, persona: Option<(i32, &str)>, ) -> ToolGateOpts { let cx = opentelemetry::Context::new(); let calendar_present = { let mut dao = self .calendar_dao .lock() .expect("Unable to lock CalendarEventDao"); dao.get_event_count(&cx).map(|n| n > 0).unwrap_or(false) }; let location_history_present = { let mut dao = self .location_dao .lock() .expect("Unable to lock LocationHistoryDao"); dao.get_location_count(&cx).map(|n| n > 0).unwrap_or(false) }; let daily_summaries_present = { let mut dao = self .daily_summary_dao .lock() .expect("Unable to lock DailySummaryDao"); dao.has_any_summaries(&cx).unwrap_or(false) }; let faces_present = { let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao"); dao.has_any_faces(&cx).unwrap_or(false) }; let allow_agent_corrections = persona .and_then(|(uid, pid)| { let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(&cx, uid, pid).ok().flatten() }) .map(|p| p.allow_agent_corrections) .unwrap_or(false); ToolGateOpts { has_vision, apollo_enabled: self.apollo_enabled(), daily_summaries_present, calendar_present, location_history_present, faces_present, allow_agent_corrections, } } /// Resolve the stored system prompt for `(user_id, persona_id)` from /// the persona store. Returns `None` when the persona row doesn't /// exist or its prompt is blank — callers fall back to their own /// default. Used for server-side persona resolution: a request that /// carries `persona_id` but no explicit `system_prompt` should speak /// in the persona's stored voice, not the neutral default. pub(crate) fn persona_system_prompt(&self, user_id: i32, persona_id: &str) -> Option<String> { let cx = opentelemetry::Context::new(); let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(&cx, user_id, persona_id) .ok() .flatten() .map(|p| p.system_prompt) .filter(|s| !s.trim().is_empty()) } /// Resolve `rel_path` against the configured libraries, returning the /// first root under which the file exists. Insights may be generated /// for any library — the generator itself doesn't know which — so we /// probe each root rather than trust a single `base_path`. pub(crate) fn resolve_full_path(&self, rel_path: &str) -> Option<std::path::PathBuf> { use std::path::Path; for lib in &self.libraries { let candidate = Path::new(&lib.root_path).join(rel_path); if candidate.exists() { return Some(candidate); } } None } /// Extract contact name from file path /// e.g., "Sarah/img.jpeg" -> Some("Sarah") /// e.g., "img.jpeg" -> None fn extract_contact_from_path(file_path: &str) -> Option<String> { use std::path::Path; let path = Path::new(file_path); let components: Vec<_> = path.components().collect(); // If path has at least 2 components (directory + file), extract first directory if components.len() >= 2 && let Some(component) = components.first() && let Some(os_str) = component.as_os_str().to_str() { return Some(os_str.to_string()); } None } /// Look up the EXIF row for a photo. Returns `None` when no row /// exists yet (file watcher hasn't reached it) or the DAO call /// fails. Used by callers — including the chat-bootstrap path — /// that need a few specific fields (date_taken, GPS) without /// duplicating DAO plumbing. pub(crate) fn fetch_exif(&self, file_path: &str) -> Option<crate::database::models::ImageExif> { let cx = opentelemetry::Context::current(); let mut dao = self.exif_dao.lock().expect("Unable to lock ExifDao"); dao.get_exif(&cx, file_path).ok().flatten() } /// Load image file, resize it, and encode as base64 for vision models /// Resizes to max 1024px on longest edge to reduce context usage pub(crate) fn load_image_as_base64(&self, file_path: &str) -> Result<String> { use image::imageops::FilterType; let full_path = self.resolve_full_path(file_path).ok_or_else(|| { anyhow::anyhow!( "File '{}' not found under any configured library", file_path ) })?; log::debug!("Loading image for vision model: {:?}", full_path); // Open and decode the image let img = image::open(&full_path) .map_err(|e| anyhow::anyhow!("Failed to open image file: {}", e))?; let (original_width, original_height) = (img.width(), img.height()); // Resize to max 1024px on longest edge let resized = img.resize(1024, 1024, FilterType::Lanczos3); log::debug!( "Resized image from {}x{} to {}x{}", original_width, original_height, resized.width(), resized.height() ); // Encode as JPEG at 85% quality let mut buffer = Vec::new(); let mut cursor = Cursor::new(&mut buffer); resized .write_to(&mut cursor, ImageFormat::Jpeg) .map_err(|e| anyhow::anyhow!("Failed to encode image as JPEG: {}", e))?; let base64_string = base64::engine::general_purpose::STANDARD.encode(&buffer); log::debug!( "Encoded image as base64 ({} bytes -> {} chars)", buffer.len(), base64_string.len() ); Ok(base64_string) } /// Find relevant messages using RAG, excluding recent messages (>30 days ago) /// This prevents RAG from returning messages already in the immediate time window async fn find_relevant_messages_rag_historical( &self, parent_cx: &opentelemetry::Context, date: chrono::NaiveDate, location: Option<&str>, contact: Option<&str>, topics: Option<&[String]>, limit: usize, extra_context: Option<&str>, ) -> Result<Vec<String>> { let tracer = global_tracer(); let span = tracer.start_with_context("ai.rag.filter_historical", parent_cx); let filter_cx = parent_cx.with_span(span); filter_cx .span() .set_attribute(KeyValue::new("date", date.to_string())); filter_cx .span() .set_attribute(KeyValue::new("limit", limit as i64)); filter_cx .span() .set_attribute(KeyValue::new("exclusion_window_days", 30)); if let Some(t) = topics { filter_cx .span() .set_attribute(KeyValue::new("topics", t.join(", "))); } let query_results = self .find_relevant_messages_rag(date, location, contact, topics, limit * 2, extra_context) .await?; filter_cx.span().set_attribute(KeyValue::new( "rag_results_count", query_results.len() as i64, )); // Filter out messages from within 30 days of the photo date let photo_timestamp = date .and_hms_opt(12, 0, 0) .ok_or_else(|| anyhow::anyhow!("Invalid date"))? .and_utc() .timestamp(); let exclusion_window = 30 * 86400; // 30 days in seconds let historical_only: Vec<String> = query_results .into_iter() .filter(|msg| { // Extract date from formatted daily summary "[2024-08-15] Contact ..." if let Some(bracket_end) = msg.find(']') && let Some(date_str) = msg.get(1..bracket_end) { // Parse just the date (daily summaries don't have time) if let Ok(msg_date) = chrono::NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { let msg_timestamp = msg_date .and_hms_opt(12, 0, 0) .unwrap() .and_utc() .timestamp(); let time_diff = (photo_timestamp - msg_timestamp).abs(); return time_diff > exclusion_window; } } false }) .take(limit) .collect(); log::info!( "Found {} historical messages (>30 days from photo date)", historical_only.len() ); filter_cx.span().set_attribute(KeyValue::new( "historical_results_count", historical_only.len() as i64, )); filter_cx.span().set_status(Status::Ok); Ok(historical_only) } /// Find relevant daily summaries using RAG (semantic search) /// Returns formatted daily summary strings for LLM context async fn find_relevant_messages_rag( &self, date: chrono::NaiveDate, location: Option<&str>, contact: Option<&str>, topics: Option<&[String]>, limit: usize, extra_context: Option<&str>, ) -> Result<Vec<String>> { let tracer = global_tracer(); let current_cx = opentelemetry::Context::current(); let mut span = tracer.start_with_context("ai.rag.search_daily_summaries", ¤t_cx); span.set_attribute(KeyValue::new("date", date.to_string())); span.set_attribute(KeyValue::new("limit", limit as i64)); if let Some(loc) = location { span.set_attribute(KeyValue::new("location", loc.to_string())); } if let Some(c) = contact { span.set_attribute(KeyValue::new("contact", c.to_string())); } // Build query string - prioritize topics if available (semantically meaningful) let base_query = if let Some(topics) = topics { if !topics.is_empty() { // Use topics for semantic search - these are actual content keywords let topic_str = topics.join(", "); if let Some(c) = contact { format!("Conversations about {} with {}", topic_str, c) } else { format!("Conversations about {}", topic_str) } } else { // Fallback to metadata-based query Self::build_metadata_query(date, location, contact) } } else { // Fallback to metadata-based query Self::build_metadata_query(date, location, contact) }; let query = if let Some(extra) = extra_context { format!("{}. {}", base_query, extra) } else { base_query }; span.set_attribute(KeyValue::new("query", query.clone())); // Create context with this span for child operations let search_cx = current_cx.with_span(span); log::info!("========================================"); log::info!("RAG QUERY: {}", query); log::info!("========================================"); // Generate embedding for the query via the configured local backend // (`LLM_BACKEND` switch). Must match the backend that populated the // daily-summary embeddings or similarity search will be garbage. let query_embedding = crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), &query).await?; // Search for similar daily summaries with time-based weighting // This prioritizes summaries temporally close to the query date let mut summary_dao = self .daily_summary_dao .lock() .expect("Unable to lock DailySummaryDao"); let date_str = date.format("%Y-%m-%d").to_string(); let similar_summaries = summary_dao .find_similar_summaries_with_time_weight(&search_cx, &query_embedding, &date_str, limit) .map_err(|e| anyhow::anyhow!("Failed to find similar summaries: {:?}", e))?; log::info!( "Found {} relevant daily summaries via RAG", similar_summaries.len() ); search_cx.span().set_attribute(KeyValue::new( "results_count", similar_summaries.len() as i64, )); // Format daily summaries for LLM context let formatted = similar_summaries .into_iter() .map(|s| { format!( "[{}] {} ({} messages):\n{}", s.date, s.contact, s.message_count, s.summary ) }) .collect(); search_cx.span().set_status(Status::Ok); Ok(formatted) } /// Semantic search over daily summaries for the agentic `search_rag` /// tool. Embeds the caller's query as-is (no metadata boilerplate) and /// only applies time weighting when an anchor date is provided — /// without one, results rank purely by similarity across all time. async fn search_summaries_semantic( &self, query: &str, date: Option<chrono::NaiveDate>, limit: usize, ) -> Result<Vec<String>> { let tracer = global_tracer(); let current_cx = opentelemetry::Context::current(); let mut span = tracer.start_with_context("ai.rag.search_daily_summaries", ¤t_cx); span.set_attribute(KeyValue::new("query", query.to_string())); span.set_attribute(KeyValue::new("limit", limit as i64)); span.set_attribute(KeyValue::new("time_weighted", date.is_some())); if let Some(d) = date { span.set_attribute(KeyValue::new("date", d.to_string())); } let search_cx = current_cx.with_span(span); log::info!("RAG QUERY: {} (anchor date: {:?})", query, date); // Must use the same backend that populated the daily-summary // embeddings or similarity search is garbage (see embed_one docs). let query_embedding = crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), query).await?; let mut summary_dao = self .daily_summary_dao .lock() .expect("Unable to lock DailySummaryDao"); let similar_summaries = match date { Some(d) => summary_dao.find_similar_summaries_with_time_weight( &search_cx, &query_embedding, &d.format("%Y-%m-%d").to_string(), limit, ), None => summary_dao.find_similar_summaries(&search_cx, &query_embedding, limit), } .map_err(|e| anyhow::anyhow!("Failed to find similar summaries: {:?}", e))?; search_cx.span().set_attribute(KeyValue::new( "results_count", similar_summaries.len() as i64, )); search_cx.span().set_status(Status::Ok); Ok(similar_summaries .into_iter() .map(|s| { format!( "[{}] {} ({} messages):\n{}", s.date, s.contact, s.message_count, s.summary ) }) .collect()) } /// Build a metadata-based query (fallback when no topics available) fn build_metadata_query( date: chrono::NaiveDate, location: Option<&str>, contact: Option<&str>, ) -> String { let mut query_parts = Vec::new(); // Add temporal context query_parts.push(format!("On {}", date.format("%B %d, %Y"))); // Add location if available if let Some(loc) = location { query_parts.push(format!("at {}", loc)); } // Add contact context if available if let Some(c) = contact { query_parts.push(format!("conversation with {}", c)); } // Add day of week for temporal context let weekday = date.format("%A"); query_parts.push(format!("it was a {}", weekday)); query_parts.join(", ") } /// Haversine distance calculation for GPS proximity (in kilometers) fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 { const R: f64 = 6371.0; // Earth radius in km let d_lat = (lat2 - lat1).to_radians(); let d_lon = (lon2 - lon1).to_radians(); let a = (d_lat / 2.0).sin().powi(2) + lat1.to_radians().cos() * lat2.to_radians().cos() * (d_lon / 2.0).sin().powi(2); R * 2.0 * a.sqrt().atan2((1.0 - a).sqrt()) } /// Gather calendar context for photo timestamp /// Uses hybrid time + semantic search (±7 days, ranked by relevance) async fn gather_calendar_context( &self, parent_cx: &opentelemetry::Context, timestamp: i64, location: Option<&str>, ) -> Result<Option<String>> { let tracer = global_tracer(); let span = tracer.start_with_context("ai.context.calendar", parent_cx); let calendar_cx = parent_cx.with_span(span); let query_embedding = if let Some(loc) = location { match crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), loc).await { Ok(emb) => Some(emb), Err(e) => { log::warn!("Failed to generate embedding for location '{}': {}", loc, e); None } } } else { None }; let events = { let mut dao = self .calendar_dao .lock() .expect("Unable to lock CalendarEventDao"); dao.find_relevant_events_hybrid( &calendar_cx, timestamp, 7, // ±7 days window query_embedding.as_deref(), 5, // Top 5 events ) .ok() }; calendar_cx.span().set_status(Status::Ok); if let Some(events) = events { if events.is_empty() { return Ok(None); } let formatted = events .iter() .map(|e| { let date = DateTime::from_timestamp(e.start_time, 0) .map(|dt| { dt.with_timezone(&Local) .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); let attendees = e .attendees .as_ref() .and_then(|a| serde_json::from_str::<Vec<String>>(a).ok()) .map(|list| format!(" (with {})", list.join(", "))) .unwrap_or_default(); format!("[{}] {}{}", date, e.summary, attendees) }) .collect::<Vec<_>>() .join("\n"); Ok(Some(format!("Calendar events:\n{}", formatted))) } else { Ok(None) } } /// Gather location context for photo timestamp /// Finds nearest location record (±30 minutes) async fn gather_location_context( &self, parent_cx: &opentelemetry::Context, timestamp: i64, exif_gps: Option<(f64, f64)>, ) -> Result<Option<String>> { let tracer = global_tracer(); let span = tracer.start_with_context("ai.context.location", parent_cx); let location_cx = parent_cx.with_span(span); let nearest = { let mut dao = self .location_dao .lock() .expect("Unable to lock LocationHistoryDao"); dao.find_nearest_location( &location_cx, timestamp, 10800, // ±3 hours (more realistic for photo timing) ) .ok() .flatten() }; location_cx.span().set_status(Status::Ok); if let Some(loc) = nearest { // Check if this adds NEW information compared to EXIF if let Some((exif_lat, exif_lon)) = exif_gps { let distance = Self::haversine_distance(exif_lat, exif_lon, loc.latitude, loc.longitude); // Skip only if very close AND no useful activity/place info // Allow activity context even if coordinates match if distance < 0.5 && loc.activity.is_none() && loc.place_name.is_none() { log::debug!( "Location history matches EXIF GPS ({}m) with no extra context, skipping", (distance * 1000.0) as i32 ); return Ok(None); } else if distance < 0.5 { log::debug!( "Location history close to EXIF ({}m) but has activity/place info", (distance * 1000.0) as i32 ); } } let activity = loc .activity .as_ref() .map(|a| format!(" ({})", a)) .unwrap_or_default(); let place = loc .place_name .as_ref() .map(|p| format!(" at {}", p)) .unwrap_or_default(); Ok(Some(format!( "Location history: You were{}{}{}", activity, place, if activity.is_empty() && place.is_empty() { format!(" near {:.4}, {:.4}", loc.latitude, loc.longitude) } else { String::new() } ))) } else { Ok(None) } } /// Gather search context for photo date /// Uses semantic search on queries (±30 days, top 5 relevant) async fn gather_search_context( &self, parent_cx: &opentelemetry::Context, timestamp: i64, location: Option<&str>, contact: Option<&str>, enrichment: Option<&str>, ) -> Result<Option<String>> { let tracer = global_tracer(); let span = tracer.start_with_context("ai.context.search", parent_cx); let search_cx = parent_cx.with_span(span); // Use enrichment (topics + photo description + tags) if available; // fall back to generic temporal query. let query_text = if let Some(enriched) = enrichment { enriched.to_string() } else { // Fallback: generic temporal query format!( "searches about {} {} {}", DateTime::from_timestamp(timestamp, 0) .map(|dt| dt.format("%B %Y").to_string()) .unwrap_or_default(), location.unwrap_or(""), contact .map(|c| format!("involving {}", c)) .unwrap_or_default() ) }; let query_embedding = match crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), &query_text).await { Ok(emb) => emb, Err(e) => { log::warn!("Failed to generate search embedding: {}", e); search_cx.span().set_status(Status::Error { description: e.to_string().into(), }); return Ok(None); } }; let searches = { let mut dao = self .search_dao .lock() .expect("Unable to lock SearchHistoryDao"); dao.find_relevant_searches_hybrid( &search_cx, timestamp, 30, // ±30 days (wider window than calendar) Some(&query_embedding), 5, // Top 5 searches ) .ok() }; search_cx.span().set_status(Status::Ok); if let Some(searches) = searches { if searches.is_empty() { log::warn!( "No relevant searches found for photo timestamp {}", timestamp ); return Ok(None); } let formatted = searches .iter() .map(|s| { let date = DateTime::from_timestamp(s.timestamp, 0) .map(|dt| dt.format("%Y-%m-%d").to_string()) .unwrap_or_else(|| "unknown".to_string()); format!("[{}] \"{}\"", date, s.query) }) .collect::<Vec<_>>() .join("\n"); Ok(Some(format!("Search history:\n{}", formatted))) } else { Ok(None) } } /// Combine all context sources with equal weight fn combine_contexts( sms: Option<String>, calendar: Option<String>, location: Option<String>, search: Option<String>, tags: Option<String>, ) -> String { let mut parts = Vec::new(); if let Some(s) = sms { parts.push(format!("## Messages\n{}", s)); } if let Some(c) = calendar { parts.push(format!("## Calendar\n{}", c)); } if let Some(l) = location { parts.push(format!("## Location\n{}", l)); } if let Some(s) = search { parts.push(format!("## Searches\n{}", s)); } if let Some(t) = tags { parts.push(format!("## Tags\n{}", t)); } if parts.is_empty() { "No additional context available".to_string() } else { parts.join("\n\n") } } /// Generate AI insight for a single photo with custom configuration pub async fn generate_insight_for_photo_with_config( &self, file_path: &str, custom_model: Option<String>, custom_system_prompt: Option<String>, num_ctx: Option<i32>, temperature: Option<f32>, top_p: Option<f32>, top_k: Option<i32>, min_p: Option<f32>, ) -> Result<()> { let tracer = global_tracer(); let current_cx = opentelemetry::Context::current(); let mut span = tracer.start_with_context("ai.insight.generate", ¤t_cx); // Normalize path to ensure consistent forward slashes in database let file_path = normalize_path(file_path); log::info!("Generating insight for photo: {}", file_path); span.set_attribute(KeyValue::new("file_path", file_path.clone())); // Create custom Ollama client if model is specified let mut ollama_client = if let Some(model) = custom_model { log::info!("Using custom model: {}", model); span.set_attribute(KeyValue::new("custom_model", model.clone())); OllamaClient::new( self.ollama.primary_url.clone(), self.ollama.fallback_url.clone(), model.clone(), Some(model), // Use the same custom model for fallback server ) } else { span.set_attribute(KeyValue::new("model", self.ollama.primary_model.clone())); self.ollama.clone() }; // Set context size if specified if let Some(ctx) = num_ctx { log::info!("Using custom context size: {}", ctx); span.set_attribute(KeyValue::new("num_ctx", ctx as i64)); ollama_client.set_num_ctx(Some(ctx)); } // Apply sampling parameters if any were provided if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() { log::info!( "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}", temperature, top_p, top_k, min_p ); if let Some(t) = temperature { span.set_attribute(KeyValue::new("temperature", t as f64)); } if let Some(p) = top_p { span.set_attribute(KeyValue::new("top_p", p as f64)); } if let Some(k) = top_k { span.set_attribute(KeyValue::new("top_k", k as i64)); } if let Some(m) = min_p { span.set_attribute(KeyValue::new("min_p", m as f64)); } ollama_client.set_sampling_params(temperature, top_p, top_k, min_p); } // Create context with this span for child operations let insight_cx = current_cx.with_span(span); // 1. Get EXIF data for the photo let exif = { let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao"); exif_dao .get_exif(&insight_cx, &file_path) .map_err(|e| anyhow::anyhow!("Failed to get EXIF: {:?}", e))? }; // Get full timestamp for proximity-based message filtering let timestamp = if let Some(ts) = exif.as_ref().and_then(|e| e.date_taken) { ts } else { log::warn!("No date_taken in EXIF for {}, trying filename", file_path); extract_date_from_filename(&file_path) .map(|dt| dt.timestamp()) .or_else(|| { let full_path = self.resolve_full_path(&file_path)?; File::open(&full_path) .and_then(|f| f.metadata()) .inspect_err(|e| { log::warn!( "Failed to get file timestamp for insight {}: {}", file_path, e ) }) .ok() .and_then(|m| earliest_fs_time(&m)) .map(|t| DateTime::<Utc>::from(t).timestamp()) }) .unwrap_or_else(|| Utc::now().timestamp()) }; let date_taken = DateTime::from_timestamp(timestamp, 0) .map(|dt| dt.date_naive()) .unwrap_or_else(|| Utc::now().date_naive()); // 3. Extract contact name from file path let contact = Self::extract_contact_from_path(&file_path); log::info!("Extracted contact from path: {:?}", contact); insight_cx .span() .set_attribute(KeyValue::new("date_taken", date_taken.to_string())); if let Some(ref c) = contact { insight_cx .span() .set_attribute(KeyValue::new("contact", c.clone())); } // Fetch file tags (used to enrich RAG and final context) let tag_names: Vec<String> = { let mut dao = self.tag_dao.lock().expect("Unable to lock TagDao"); dao.get_tags_for_path(&insight_cx, &file_path) .unwrap_or_else(|e| { log::warn!("Failed to fetch tags for insight {}: {}", file_path, e); Vec::new() }) .into_iter() .map(|t| t.name) .collect() }; log::info!( "Fetched {} tags for photo: {:?}", tag_names.len(), tag_names ); // 4. Get location name from GPS coordinates (needed for RAG query). // Personal Apollo Place wins over Nominatim when both apply — // "Home (My house in Cambridge) — near Cambridge, MA" is more // grounding for the LLM than the city name alone. let location = match exif { Some(ref exif) => { if let (Some(lat), Some(lon)) = (exif.gps_latitude, exif.gps_longitude) { let lat = lat as f64; let lon = lon as f64; let nominatim = self.reverse_geocode(lat, lon).await; let apollo_primary = self .apollo_client .places_containing(lat, lon) .await .into_iter() .next(); let combined = compose_location_string(apollo_primary, nominatim, lat, lon); insight_cx .span() .set_attribute(KeyValue::new("location", combined.clone())); Some(combined) } else { None } } None => None, }; // Check if the model has vision capabilities let model_to_check = ollama_client.primary_model.clone(); let has_vision = match OllamaClient::check_model_capabilities( &ollama_client.primary_url, &model_to_check, ) .await { Ok(capabilities) => { log::info!( "Model '{}' vision capability: {}", model_to_check, capabilities.has_vision ); capabilities.has_vision } Err(e) => { log::warn!( "Failed to check vision capabilities for model '{}', assuming no vision support: {}", model_to_check, e ); false } }; insight_cx .span() .set_attribute(KeyValue::new("model_has_vision", has_vision)); // Load image and encode as base64 only if model supports vision let image_base64 = if has_vision { match self.load_image_as_base64(&file_path) { Ok(b64) => { log::info!( "Successfully loaded image for vision-capable model '{}'", model_to_check ); Some(b64) } Err(e) => { log::warn!("Failed to load image for vision model: {}", e); None } } } else { log::info!( "Model '{}' does not support vision, skipping image processing", model_to_check ); None }; // Generate brief photo description for RAG enrichment (vision models only) let photo_description: Option<String> = if let Some(ref img_b64) = image_base64 { match ollama_client.generate_photo_description(img_b64).await { Ok(desc) => { log::info!("Photo description for RAG enrichment: {}", desc); Some(desc) } Err(e) => { log::warn!( "Failed to generate photo description for RAG enrichment: {}", e ); None } } } else { None }; // Build enriched context string for RAG: photo description + tags // (SMS topics are passed separately to RAG functions) let enriched_query: Option<String> = { let mut parts: Vec<String> = Vec::new(); if let Some(ref desc) = photo_description { parts.push(desc.clone()); } if !tag_names.is_empty() { parts.push(format!("tags: {}", tag_names.join(", "))); } if parts.is_empty() { None } else { Some(parts.join(". ")) } }; let mut search_enrichment: Option<String> = enriched_query.clone(); // 5. Intelligent retrieval: Hybrid approach for better context let mut sms_summary = None; let mut used_rag = false; // TEMPORARY: Set to true to disable RAG and use only time-based retrieval for testing let disable_rag_for_testing = false; if disable_rag_for_testing { log::warn!("RAG DISABLED FOR TESTING - Using only time-based retrieval (±4 days)"); // Skip directly to fallback } else { // ALWAYS use Strategy B: Expanded immediate context + historical RAG // This is more reliable than pure semantic search which can match irrelevant messages log::info!("Using expanded immediate context + historical RAG approach"); // Step 1: Get FULL immediate temporal context (±4 days, ALL messages) let immediate_messages = self .sms_client .fetch_messages_for_contact(contact.as_deref(), timestamp, 4) .await .unwrap_or_else(|e| { log::error!("Failed to fetch immediate messages: {}", e); Vec::new() }); log::info!( "Fetched {} messages from ±4 days window (using ALL for immediate context)", immediate_messages.len() ); if !immediate_messages.is_empty() { // Step 2: Extract topics from immediate messages to enrich RAG query let topics = self .extract_topics_from_messages(&immediate_messages, &ollama_client) .await; log::info!("Extracted topics for query enrichment: {:?}", topics); // Build full search enrichment: SMS topics + photo description + tag names search_enrichment = { let mut parts: Vec<String> = Vec::new(); if !topics.is_empty() { parts.push(topics.join(", ")); } if let Some(ref desc) = photo_description { parts.push(desc.clone()); } if !tag_names.is_empty() { parts.push(format!("tags: {}", tag_names.join(", "))); } if parts.is_empty() { None } else { Some(parts.join(". ")) } }; // Step 3: Try historical RAG (>30 days ago) using extracted topics let topics_slice = if topics.is_empty() { None } else { Some(topics.as_slice()) }; match self .find_relevant_messages_rag_historical( &insight_cx, date_taken, None, contact.as_deref(), topics_slice, 10, // Top 10 historical matches enriched_query.as_deref(), ) .await { Ok(historical_messages) if !historical_messages.is_empty() => { log::info!( "Two-context approach: {} immediate (full conversation) + {} historical (similar past moments)", immediate_messages.len(), historical_messages.len() ); used_rag = true; // Step 4: Summarize contexts separately, then combine let immediate_summary = self .summarize_context_from_messages( &immediate_messages, &ollama_client, custom_system_prompt.as_deref(), ) .await .unwrap_or_else(|| String::from("No immediate context")); let historical_summary = self .summarize_messages( &historical_messages, &ollama_client, custom_system_prompt.as_deref(), ) .await .unwrap_or_else(|| String::from("No historical context")); // Combine summaries sms_summary = Some(format!( "Immediate context (±4 days): {}\n\nSimilar moments from the past: {}", immediate_summary, historical_summary )); } Ok(_) => { // RAG found no historical matches, just use immediate context log::info!("No historical RAG matches, using immediate context only"); sms_summary = self .summarize_context_from_messages( &immediate_messages, &ollama_client, custom_system_prompt.as_deref(), ) .await; } Err(e) => { log::warn!("Historical RAG failed, using immediate context only: {}", e); sms_summary = self .summarize_context_from_messages( &immediate_messages, &ollama_client, custom_system_prompt.as_deref(), ) .await; } } } else { log::info!("No immediate messages found, trying basic RAG as fallback"); // Fallback to basic RAG even without strong query match self .find_relevant_messages_rag( date_taken, None, contact.as_deref(), None, 20, enriched_query.as_deref(), ) .await { Ok(rag_messages) if !rag_messages.is_empty() => { used_rag = true; sms_summary = self .summarize_messages( &rag_messages, &ollama_client, custom_system_prompt.as_deref(), ) .await; } _ => {} } } } // 6. Fallback to traditional time-based message retrieval if RAG didn't work if !used_rag { log::info!("Using traditional time-based message retrieval (±4 days)"); let sms_messages = self .sms_client .fetch_messages_for_contact(contact.as_deref(), timestamp, 4) .await .unwrap_or_else(|e| { log::error!("Failed to fetch SMS messages: {}", e); Vec::new() }); log::info!( "Fetched {} SMS messages closest to {}", sms_messages.len(), DateTime::from_timestamp(timestamp, 0) .map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string()) .unwrap_or_else(|| "unknown time".to_string()) ); // Summarize time-based messages if !sms_messages.is_empty() { match self .sms_client .summarize_context(&sms_messages, &ollama_client) .await { Ok(summary) => { sms_summary = Some(summary); } Err(e) => { log::warn!("Failed to summarize SMS context: {}", e); } } } } let retrieval_method = if used_rag { "RAG" } else { "time-based" }; insight_cx .span() .set_attribute(KeyValue::new("retrieval_method", retrieval_method)); insight_cx .span() .set_attribute(KeyValue::new("has_sms_context", sms_summary.is_some())); log::info!( "Photo context: date={}, location={:?}, retrieval_method={}", date_taken, location, retrieval_method ); // 6. Gather Google Takeout context from all sources let calendar_context = self .gather_calendar_context(&insight_cx, timestamp, location.as_deref()) .await .ok() .flatten(); let exif_gps = exif.as_ref().and_then(|e| { if let (Some(lat), Some(lon)) = (e.gps_latitude, e.gps_longitude) { Some((lat as f64, lon as f64)) } else { None } }); let location_context = self .gather_location_context(&insight_cx, timestamp, exif_gps) .await .ok() .flatten(); let search_context = self .gather_search_context( &insight_cx, timestamp, location.as_deref(), contact.as_deref(), search_enrichment.as_deref(), ) .await .ok() .flatten(); // 7. Combine all context sources with equal weight let tags_context = if tag_names.is_empty() { None } else { Some(tag_names.join(", ")) }; let combined_context = Self::combine_contexts( sms_summary, calendar_context, location_context, search_context, tags_context, ); log::info!( "Combined context from all sources ({} chars)", combined_context.len() ); // 10. Generate summary first, then derive title from the summary let summary = ollama_client .generate_photo_summary( date_taken, location.as_deref(), contact.as_deref(), Some(&combined_context), custom_system_prompt.as_deref(), image_base64.clone(), ) .await?; let title = ollama_client .generate_photo_title(&summary, custom_system_prompt.as_deref()) .await?; log::info!("Generated title: {}", title); log::info!("Generated summary: {}", summary); insight_cx .span() .set_attribute(KeyValue::new("title_length", title.len() as i64)); insight_cx .span() .set_attribute(KeyValue::new("summary_length", summary.len() as i64)); // 11. Store in database. content_hash is None here — store_insight // looks it up from image_exif before persisting; reconciliation // backfills if the hash isn't known yet. let insight = InsertPhotoInsight { library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), title, summary, generated_at: Utc::now().timestamp(), model_version: ollama_client.primary_model.clone(), is_current: true, training_messages: None, backend: "local".to_string(), fewshot_source_ids: None, content_hash: None, num_ctx, temperature, top_p, top_k, min_p, system_prompt: custom_system_prompt.clone(), persona_id: None, prompt_eval_count: None, eval_count: None, }; let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); let result = dao .store_insight(&insight_cx, insight) .map_err(|e| anyhow::anyhow!("Failed to store insight: {:?}", e)); match &result { Ok(_) => { log::info!("Successfully stored insight for {}", file_path); insight_cx.span().set_status(Status::Ok); } Err(e) => { log::error!("Failed to store insight: {:?}", e); insight_cx.span().set_status(Status::error(e.to_string())); } } result?; Ok(()) } /// Extract key topics/entities from messages using LLM for query enrichment async fn extract_topics_from_messages( &self, messages: &[crate::ai::SmsMessage], ollama: &OllamaClient, ) -> Vec<String> { if messages.is_empty() { return Vec::new(); } // Format a sample of messages for topic extraction let sample_size = messages.len().min(20); let user_name = user_display_name(); let sample_text: Vec<String> = messages .iter() .take(sample_size) .map(|m| { let sender: &str = if m.is_sent { &user_name } else { &m.contact }; format!("{}: {}", sender, m.body) }) .collect(); let prompt = format!( r#"Extract important entities from these messages that provide context about what was happening. Focus on: 1. **People**: Names of specific people mentioned (first names, nicknames) 2. **Places**: Locations, cities, buildings, workplaces, parks, restaurants, venues 3. **Activities**: Specific events, hobbies, groups, organizations (e.g., "drum corps", "auditions") 4. **Unique terms**: Domain-specific words or phrases that might need explanation (e.g., "Hyland", "Vanguard", "DCI") Messages: {} Return a comma-separated list of 3-7 specific entities (people, places, activities, unique terms). Focus on proper nouns and specific terms that provide context. Return ONLY the comma-separated list, nothing else."#, sample_text.join("\n") ); match ollama .generate(&prompt, Some("You are an entity extraction assistant. Extract proper nouns, people, places, and domain-specific terms that provide context.")) .await { Ok(response) => { log::debug!("Topic extraction raw response: {}", response); // Parse comma-separated topics let topics: Vec<String> = response .split(',') .map(|s| s.trim().to_string()) .filter(|s| !s.is_empty() && s.len() > 1) // Filter out single chars .take(7) // Increased from 5 to 7 .collect(); if topics.is_empty() { log::warn!("Topic extraction returned empty list from {} messages", messages.len()); } else { log::info!("Extracted {} topics from {} messages: {}", topics.len(), messages.len(), topics.join(", ")); } topics } Err(e) => { log::warn!("Failed to extract topics from messages: {}", e); Vec::new() } } } /// Summarize pre-formatted message strings using LLM (concise version for historical context) async fn summarize_messages( &self, messages: &[String], ollama: &OllamaClient, custom_system: Option<&str>, ) -> Option<String> { if messages.is_empty() { return None; } let messages_text = messages.join("\n"); let prompt = format!( r#"Summarize the context from these messages in 2-3 sentences. Focus on activities, locations, events, and relationships mentioned. Messages: {} Return ONLY the summary, nothing else."#, messages_text ); let system = custom_system .unwrap_or("You are a context summarization assistant. Be concise and factual."); match ollama.generate(&prompt, Some(system)).await { Ok(summary) => Some(summary), Err(e) => { log::warn!("Failed to summarize messages: {}", e); None } } } /// Convert SmsMessage objects to formatted strings and summarize with more detail /// This is used for immediate context (±2 days) to preserve conversation details async fn summarize_context_from_messages( &self, messages: &[crate::ai::SmsMessage], ollama: &OllamaClient, custom_system: Option<&str>, ) -> Option<String> { if messages.is_empty() { return None; } // Format messages let user_name = user_display_name(); let formatted: Vec<String> = messages .iter() .map(|m| { let sender: &str = if m.is_sent { &user_name } else { &m.contact }; let timestamp = chrono::DateTime::from_timestamp(m.timestamp, 0) .map(|dt| { dt.with_timezone(&Local) .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown time".to_string()); format!("[{}] {}: {}", timestamp, sender, m.body) }) .collect(); let messages_text = formatted.join("\n"); // Use a more detailed prompt for immediate context let prompt = format!( r#"Provide a detailed summary of the conversation context from these messages. Include: - Key activities, events, and plans discussed - Important locations or places mentioned - Emotional tone and relationship dynamics - Any significant details that provide context about what was happening Be thorough but organized. Messages: {} Return ONLY the summary, nothing else."#, messages_text ); let system = custom_system.unwrap_or( "You are a context summarization assistant. Be detailed and factual, preserving important context.", ); match ollama.generate(&prompt, Some(system)).await { Ok(summary) => Some(summary), Err(e) => { log::warn!("Failed to summarize immediate context: {}", e); None } } } // ── Tool executors for agentic loop ──────────────────────────────── /// Dispatch a tool call to the appropriate executor. /// /// `(user_id, persona_id)` identifies the author this loop is /// generating for — `store_fact` tags new facts with both, /// `recall_facts_for_photo` filters reads to both (always Single /// in the agentic loop, even when the persona has /// `include_all_memories=true`; the hive-mind toggle is for human /// browsing of `/knowledge/*`, where mixing voices is the explicit /// goal — during generation the persona's own voice must stay /// clean). The composite (user_id, persona_id) is required for the /// FK to personas to hold (migration 2026-05-10-000000). pub(crate) async fn execute_tool( &self, tool_name: &str, arguments: &serde_json::Value, backend: &ResolvedBackend, image_base64: &Option<String>, file_path: &str, user_id: i32, persona_id: &str, cx: &opentelemetry::Context, ) -> String { let model = backend.model(); let backend_label = backend.kind.as_str(); let result = match tool_name { "search_rag" => self.tool_search_rag(arguments, backend.local(), cx).await, "search_messages" => self.tool_search_messages(arguments, cx).await, "get_sms_messages" => self.tool_get_sms_messages(arguments, cx).await, "get_calendar_events" => self.tool_get_calendar_events(arguments, cx).await, "get_location_history" => self.tool_get_location_history(arguments, cx).await, "get_file_tags" => self.tool_get_file_tags(arguments, cx).await, "get_faces_in_photo" => self.tool_get_faces_in_photo(arguments, cx).await, "describe_photo" => { self.tool_describe_photo(backend.local(), image_base64) .await } "reverse_geocode" => self.tool_reverse_geocode(arguments).await, "get_personal_place_at" => self.tool_get_personal_place_at(arguments).await, "recall_entities" => self.tool_recall_entities(arguments, cx).await, "recall_facts_for_photo" => { self.tool_recall_facts_for_photo(arguments, user_id, persona_id, cx) .await } "recall_facts_for_entity" => { self.tool_recall_facts_for_entity(arguments, user_id, persona_id, cx) .await } "store_entity" => self.tool_store_entity(arguments, cx).await, "store_fact" => { self.tool_store_fact( arguments, file_path, user_id, persona_id, model, backend_label, cx, ) .await } "update_fact" => { self.tool_update_fact(arguments, user_id, persona_id, model, backend_label, cx) .await } "supersede_fact" => { self.tool_supersede_fact(arguments, user_id, persona_id, model, backend_label, cx) .await } "get_current_datetime" => Self::tool_get_current_datetime(), unknown => format!("Unknown tool: {}", unknown), }; if result.starts_with("Error") || result.starts_with("No ") { log::warn!("Tool '{}' result: {}", tool_name, result); } else { log::info!("Tool '{}' result: {} chars", tool_name, result.len()); } result } /// Tool: search_rag — semantic search over daily summaries async fn tool_search_rag( &self, args: &serde_json::Value, local: &dyn LlmClient, _cx: &opentelemetry::Context, ) -> String { let query = match args.get("query").and_then(|v| v.as_str()) { Some(q) => q.to_string(), None => return "Error: missing required parameter 'query'".to_string(), }; let date = match args.get("date").and_then(|v| v.as_str()) { Some(d) => match NaiveDate::parse_from_str(d, "%Y-%m-%d") { Ok(d) => Some(d), Err(e) => return format!("Error: failed to parse date '{}': {}", d, e), }, None => None, }; let contact = args .get("contact") .and_then(|v| v.as_str()) .map(|s| s.to_string()); let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(10) .clamp(1, 25) as usize; log::info!( "tool_search_rag: query='{}', date={:?}, contact={:?}, limit={}", query, date, contact, limit ); // Pull a wider candidate pool than the final limit so the LLM // reranker has room to promote less-obvious hits. Candidates_factor // is capped so a big `limit` doesn't blow past what the reranker // can sensibly judge in one prompt. let rerank_enabled = std::env::var("SEARCH_RAG_RERANK") .ok() .map(|v| v.to_lowercase() != "off" && v != "0") .unwrap_or(true); let candidate_limit = if rerank_enabled { (limit * 3).min(40) } else { limit }; // Embed the model's query verbatim — a soft contact bias is the // only decoration. The metadata boilerplate ("On <date>, it was a // <weekday>") that find_relevant_messages_rag prepends drowns the // semantic signal, so the tool path deliberately bypasses it. let search_query = match contact.as_deref() { Some(c) => format!("{} (conversation with {})", query, c), None => query.clone(), }; let results = match self .search_summaries_semantic(&search_query, date, candidate_limit) .await { Ok(results) if !results.is_empty() => results, Ok(_) => return "No relevant messages found.".to_string(), Err(e) => return format!("Error searching RAG: {}", e), }; let final_results = if rerank_enabled && results.len() > limit { match self.rerank_with_llm(&query, &results, limit, local).await { Ok(reordered) => reordered, Err(e) => { log::warn!("rerank failed, using vector order: {}", e); results.into_iter().take(limit).collect() } } } else { results.into_iter().take(limit).collect::<Vec<_>>() }; final_results.join("\n\n") } /// LLM-based reranker: ask the local model to pick the top-`limit` /// passages from `candidates` that are most relevant to `query`. /// Returns the reordered subset. /// /// Cheap-ish because the reranker prompt and output live outside the /// agent's visible context — only the final selection lands in the /// tool_result. On parse failure we fall back to the input order. async fn rerank_with_llm( &self, query: &str, candidates: &[String], limit: usize, local: &dyn LlmClient, ) -> Result<Vec<String>> { let query_preview: String = query.chars().take(60).collect(); log::info!( "rerank: {} candidates -> top {} (query=\"{}\")", candidates.len(), limit, query_preview ); // Build numbered list (1-based for readability). Cap each passage // at ~1000 chars so very long summaries don't eat the prompt. let numbered: String = candidates .iter() .enumerate() .map(|(i, c)| { let trimmed = if c.chars().count() > 1000 { format!("{}…", c.chars().take(1000).collect::<String>()) } else { c.clone() }; format!("[{}] {}", i + 1, trimmed) }) .collect::<Vec<_>>() .join("\n\n"); let prompt = format!( "You are ranking search results. From the numbered passages below, \ select the {} most relevant to the query. Respond with ONLY a \ comma-separated list of passage numbers in order from most to \ least relevant. No explanation, no other text.\n\n\ Query: {}\n\n\ Passages:\n{}\n\n\ Top {} passage numbers:", limit, query, numbered, limit ); let started = std::time::Instant::now(); let system = Some("You are a terse relevance ranker. You output only numbers separated by commas."); let response = local.generate(&prompt, system, None).await?; log::info!( "rerank: finished in {} ms (prompt={} chars)", started.elapsed().as_millis(), prompt.len() ); // Extract indices from the response. Accept "3, 1, 7" and also // tolerate "[3, 1, 7]" or "3,1,7,..." with trailing junk. let picks: Vec<usize> = response .split(|c: char| !c.is_ascii_digit()) .filter_map(|s| s.parse::<usize>().ok()) .filter(|&n| n >= 1 && n <= candidates.len()) .collect(); if picks.is_empty() { return Err(anyhow::anyhow!( "reranker returned no usable indices (raw: {})", response.chars().take(120).collect::<String>() )); } let mut seen = std::collections::HashSet::new(); let mut reordered: Vec<String> = Vec::with_capacity(limit); let mut final_indices: Vec<usize> = Vec::with_capacity(limit); for n in picks { if seen.insert(n) { reordered.push(candidates[n - 1].clone()); final_indices.push(n); if reordered.len() >= limit { break; } } } // Top-up from original order if the reranker returned fewer than // `limit` distinct entries. if reordered.len() < limit { for (i, c) in candidates.iter().enumerate() { if !seen.contains(&(i + 1)) { reordered.push(c.clone()); final_indices.push(i + 1); if reordered.len() >= limit { break; } } } } // Debug snapshot: show what the reranker changed. Position p holds // the 1-based index of the candidate that now sits at position p. // A value that equals its position means "no change at that slot". let swapped = final_indices .iter() .enumerate() .filter(|(pos, idx)| **idx != pos + 1) .count(); log::info!( "rerank: final indices (1-based): {:?} — {} of top {} swapped from vector order", final_indices, swapped, final_indices.len() ); let show = final_indices.len().min(5); log::debug!("rerank: vector-order top {}:", show); for (i, c) in candidates.iter().enumerate().take(show) { let preview: String = c.chars().take(100).collect(); log::debug!("rerank: [{}] {}", i + 1, preview); } log::debug!("rerank: reranked top {}:", show); for (pos, idx) in final_indices.iter().enumerate().take(show) { let preview: String = candidates[*idx - 1].chars().take(100).collect(); log::debug!("rerank: [{}] (orig #{}) {}", pos + 1, idx, preview); } Ok(reordered) } /// Tool: search_messages — keyword / semantic / hybrid search over all /// SMS message bodies via the Django FTS5 + embeddings pipeline. Now /// supports optional `contact_id`, `start_ts`, `end_ts` filters. async fn tool_search_messages( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { let query = match args.get("query").and_then(|v| v.as_str()) { Some(q) if !q.trim().is_empty() => q.trim(), _ => { // Common LLM mistake: calling search_messages with // { date, ... } as if it were date-browsing. The two // tools share the "messages" word, and search_messages // sounds like the natural verb. Instead of returning // an error and burning a turn, transparently route to // get_sms_messages when there's a `date` (and a // contact-name string, optional). The LLM gets real // data on its first try; the result is logged with a // routing note so a human reading the trace can see // what happened. let has_date_str = args .get("date") .and_then(|v| v.as_str()) .map(|s| !s.trim().is_empty()) .unwrap_or(false); let has_numeric_contact = args.get("contact_id").is_some(); let has_ts_window = args.get("start_ts").is_some() || args.get("end_ts").is_some(); if has_date_str && !has_numeric_contact && !has_ts_window { log::info!( "search_messages with `date` and no `query` — routing to get_sms_messages" ); let routed = self.tool_get_sms_messages(args, cx).await; return format!( "(Note: routed to get_sms_messages — search_messages requires a \ `query`; date-only browsing belongs on get_sms_messages. Prefer \ get_sms_messages directly next time.)\n\n{}", routed ); } let has_contact_name = args.get("contact").and_then(|v| v.as_str()).is_some(); if has_ts_window || has_numeric_contact || has_contact_name { return "Error: search_messages needs a 'query' (keywords/phrase). \ To fetch messages around a date or from a contact without keywords, \ call get_sms_messages with { date, contact? } instead." .to_string(); } return "Error: missing required parameter 'query'".to_string(); } }; if query.len() < 3 { return "Error: query must be at least 3 characters".to_string(); } let mode = args .get("mode") .and_then(|v| v.as_str()) .map(|s| s.to_lowercase()) .unwrap_or_else(|| "hybrid".to_string()); if !matches!(mode.as_str(), "fts5" | "semantic" | "hybrid") { return format!( "Error: unknown mode '{}'; expected one of: fts5, semantic, hybrid", mode ); } let user_limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(20) .clamp(1, 50) as usize; let contact_id = args.get("contact_id").and_then(|v| v.as_i64()); let contact = args .get("contact") .and_then(|v| v.as_str()) .map(|s| s.trim().to_string()) .filter(|s| !s.is_empty()) .filter(|_| contact_id.is_none()); let start_ts = args.get("start_ts").and_then(|v| v.as_i64()); let end_ts = args.get("end_ts").and_then(|v| v.as_i64()); let is_mms = args.get("is_mms").and_then(|v| v.as_bool()); let has_media = args.get("has_media").and_then(|v| v.as_bool()); let has_date_filter = start_ts.is_some() || end_ts.is_some(); log::info!( "tool_search_messages: query='{}', mode={}, contact_id={:?}, contact={:?}, range=[{:?}, {:?}], is_mms={:?}, has_media={:?}, limit={}", query, mode, contact_id, contact, start_ts, end_ts, is_mms, has_media, user_limit ); // SMS-API applies all of date/contact/mms/media filtering and // pagination server-side, so the response is already exactly the // page we want — no over-fetch, no client-side post-filter. let params = SmsSearchParams { mode: mode.as_str(), limit: user_limit, contact_id, contact, date_from: start_ts, date_to: end_ts, is_mms, has_media, offset: None, }; let hits = match self.sms_client.search_messages(query, ¶ms).await { Ok(h) => h, Err(e) => return format!("Error searching messages: {}", e), }; if hits.is_empty() { return "No messages matched.".to_string(); } Self::format_search_hits(&hits, &mode, has_date_filter) } /// Render a list of [`SmsSearchHit`] for the LLM. Prefers the SMS-API /// snippet (which already excerpts the matched span and is the only /// preview MMS-attachment-only matches have) over the full body, and /// strips the `` tags the snippet ships with. Each line names /// both parties (`sender → recipient`) — results can span multiple /// conversations, and a sender-only label leaves sent messages /// unattributable to a thread. fn format_search_hits(hits: &[SmsSearchHit], mode: &str, date_filtered: bool) -> String { let user_name = user_display_name(); let mut out = String::new(); out.push_str(&format!( "Found {} messages (mode: {}{}, sender → recipient):\n\n", hits.len(), mode, if date_filtered { ", date-filtered" } else { "" } )); for h in hits { let date = chrono::DateTime::from_timestamp(h.date, 0) .map(|dt| dt.format("%Y-%m-%d").to_string()) .unwrap_or_else(|| h.date.to_string()); let direction = if h.type_ == 2 { format!("{} → {}", user_name, h.contact_name) } else { format!("{} → {}", h.contact_name, user_name) }; let score = h .similarity_score .map(|s| format!(" [score {:.2}]", s)) .unwrap_or_default(); let preview = match h.snippet.as_deref() { Some(s) if !s.is_empty() => Self::strip_mark_tags(s), _ => h.body.clone(), }; out.push_str(&format!( "[{}]{} {} — {}\n\n", date, score, direction, preview )); } out } /// Strip the `` / `` highlight tags that SMS-API wraps /// matched terms in. The tags carry no signal beyond what the query /// already conveys to the LLM, and leaving them in adds prompt noise. fn strip_mark_tags(s: &str) -> String { s.replace("", "").replace("", "") } /// Tool: get_sms_messages — fetch SMS messages near a date for a contact async fn tool_get_sms_messages( &self, args: &serde_json::Value, _cx: &opentelemetry::Context, ) -> String { let date_str = match args.get("date").and_then(|v| v.as_str()) { Some(d) => d, None => return "Error: missing required parameter 'date'".to_string(), }; let contact = args .get("contact") .and_then(|v| v.as_str()) .map(|s| s.to_string()); let days_radius = args .get("days_radius") .and_then(|v| v.as_i64()) .unwrap_or(4) .clamp(1, 30); let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(60) .clamp(1, 150) as usize; let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { Ok(d) => d, Err(e) => return format!("Error: failed to parse date '{}': {}", date_str, e), }; let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); log::info!( "tool_get_sms_messages: date={}, contact={:?}, days_radius={}, limit={}", date, contact, days_radius, limit ); match self .sms_client .fetch_messages_for_contact(contact.as_deref(), timestamp, days_radius) .await { Ok(messages) if !messages.is_empty() => { let user_name = user_display_name(); // Name both parties — without a contact filter the window // spans every conversation, and a sender-only label leaves // sent messages unattributable to a thread. let formatted: Vec<String> = messages .iter() .take(limit) .map(|m| { let direction = if m.is_sent { format!("{} → {}", user_name, m.contact) } else { format!("{} → {}", m.contact, user_name) }; let ts = DateTime::from_timestamp(m.timestamp, 0) .map(|dt| { dt.with_timezone(&Local) .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); format!("[{}] {}: {}", ts, direction, m.body) }) .collect(); format!( "{}\n{}", Self::found_header(messages.len(), formatted.len(), "messages"), formatted.join("\n") ) } Ok(_) => "No messages found.".to_string(), Err(e) => { log::warn!("tool_get_sms_messages failed: {}", e); format!("Error fetching SMS messages: {}", e) } } } /// Tool: get_calendar_events — fetch calendar events near a date async fn tool_get_calendar_events( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { let date_str = match args.get("date").and_then(|v| v.as_str()) { Some(d) => d, None => return "Error: missing required parameter 'date'".to_string(), }; let days_radius = args .get("days_radius") .and_then(|v| v.as_i64()) .unwrap_or(7) .clamp(1, 30); let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(20) .clamp(1, 50) as usize; let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { Ok(d) => d, Err(e) => return format!("Error: failed to parse date '{}': {}", date_str, e), }; let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); log::info!( "tool_get_calendar_events: date={}, days_radius={}, limit={}", date, days_radius, limit ); let events = { let mut dao = self .calendar_dao .lock() .expect("Unable to lock CalendarEventDao"); dao.find_relevant_events_hybrid(cx, timestamp, days_radius, None, limit) .ok() }; match events { Some(evts) if !evts.is_empty() => { let formatted: Vec<String> = evts .iter() .map(|e| { let dt = DateTime::from_timestamp(e.start_time, 0) .map(|dt| { dt.with_timezone(&Local) .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); let loc = e .location .as_ref() .map(|l| format!(" at {}", l)) .unwrap_or_default(); let attendees = e .attendees .as_ref() .and_then(|a| serde_json::from_str::<Vec<String>>(a).ok()) .map(|list| format!(" (with {})", list.join(", "))) .unwrap_or_default(); format!("[{}] {}{}{}", dt, e.summary, loc, attendees) }) .collect(); format!( "Found {} calendar events:\n{}", evts.len(), formatted.join("\n") ) } Some(_) => "No calendar events found.".to_string(), None => "No calendar events found.".to_string(), } } /// Tool: get_location_history — fetch location records near a date async fn tool_get_location_history( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { let date_str = match args.get("date").and_then(|v| v.as_str()) { Some(d) => d, None => return "Error: missing required parameter 'date'".to_string(), }; let days_radius = args .get("days_radius") .and_then(|v| v.as_i64()) .unwrap_or(14) .clamp(1, 30); let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { Ok(d) => d, Err(e) => return format!("Error: failed to parse date '{}': {}", date_str, e), }; let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); log::info!( "tool_get_location_history: date={}, days_radius={}", date, days_radius ); let start_ts = timestamp - (days_radius * 86400); let end_ts = timestamp + (days_radius * 86400); let locations = { let mut dao = self .location_dao .lock() .expect("Unable to lock LocationHistoryDao"); dao.find_locations_in_range(cx, start_ts, end_ts).ok() }; match locations { Some(locs) if !locs.is_empty() => { let formatted: Vec<String> = locs .iter() .take(LOCATION_HISTORY_DISPLAY_LIMIT) .map(|loc| { let dt = DateTime::from_timestamp(loc.timestamp, 0) .map(|dt| { dt.with_timezone(&Local) .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); let activity = loc .activity .as_ref() .map(|a| format!(" ({})", a)) .unwrap_or_default(); let place = loc .place_name .as_ref() .map(|p| format!(" at {}", p)) .unwrap_or_default(); format!( "[{}] {:.4}, {:.4}{}{}", dt, loc.latitude, loc.longitude, place, activity ) }) .collect(); format!( "{}\n{}", Self::found_header(locs.len(), formatted.len(), "location records"), formatted.join("\n") ) } Some(_) => "No location history found.".to_string(), None => "No location history found.".to_string(), } } /// Render a `Found N <noun>:` tool-result header, annotating when only /// the first `shown` of `total` items are listed below it. Without the /// annotation the model believes it saw everything ("Found 312 /// messages:" followed by 60 lines) and reasons from a silently /// truncated list. `summarize_tool_result` keeps parsing the leading /// count either way. fn found_header(total: usize, shown: usize, noun: &str) -> String { if shown < total { format!("Found {} {} (showing first {}):", total, noun, shown) } else { format!("Found {} {}:", total, noun) } } /// Tool: get_file_tags — fetch tags for a file path async fn tool_get_file_tags( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { let file_path = match args.get("file_path").and_then(|v| v.as_str()) { Some(p) => p.to_string(), None => return "Error: missing required parameter 'file_path'".to_string(), }; log::info!("tool_get_file_tags: file_path='{}'", file_path); let tags = { let mut dao = self.tag_dao.lock().expect("Unable to lock TagDao"); dao.get_tags_for_path(cx, &file_path).ok() }; match tags { Some(t) if !t.is_empty() => { let names: Vec<String> = t.into_iter().map(|tag| tag.name).collect(); names.join(", ") } Some(_) => "No tags found.".to_string(), None => "No tags found.".to_string(), } } /// Tool: get_faces_in_photo — list face detections + person names for /// the given file path. Resolves rel_path → content_hash via FaceDao, /// then queries face_detections joined with persons (status='detected' /// only). Returns a compact bullet list keyed for human-LLM readability. async fn tool_get_faces_in_photo( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { let file_path = match args.get("file_path").and_then(|v| v.as_str()) { Some(p) if !p.trim().is_empty() => p.trim().to_string(), _ => return "Error: missing required parameter 'file_path'".to_string(), }; log::info!("tool_get_faces_in_photo: file_path='{}'", file_path); // Resolve content_hash from any library that has this rel_path. // Hold the FaceDao lock once across all libraries — resolve_content_hash // is synchronous and there's no await in the loop body. let content_hash: Option<String> = { let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao"); self.libraries.iter().find_map(|lib| { dao.resolve_content_hash(cx, lib.id, &file_path) .ok() .flatten() }) }; let Some(content_hash) = content_hash else { return "No content_hash found for that file path (the photo may not be indexed yet, \ or the path doesn't match any library)." .to_string(); }; let faces = { let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao"); match dao.list_for_content_hash(cx, &content_hash) { Ok(rows) => rows, Err(e) => return format!("Error querying faces: {}", e), } }; if faces.is_empty() { return "No faces detected in this photo.".to_string(); } // Render: bound faces grouped by person first, then unbound. The // model uses the bound names directly; the unbound count + bbox // helps it count people without naming them. let bound: Vec<&_> = faces.iter().filter(|f| f.person_name.is_some()).collect(); let unbound: Vec<&_> = faces.iter().filter(|f| f.person_name.is_none()).collect(); let mut out = format!("Found {} face(s) in this photo:\n", faces.len()); for f in &bound { // Invariant: `bound` is filtered on `person_name.is_some()` above. let name = f .person_name .as_deref() .expect("bound face must have a name"); out.push_str(&format!( "- {} (confidence {:.2}, bbox x={:.2} y={:.2} w={:.2} h={:.2}, source: {})\n", name, f.confidence, f.bbox_x, f.bbox_y, f.bbox_w, f.bbox_h, f.source, )); } for f in &unbound { out.push_str(&format!( "- (unidentified) confidence {:.2}, bbox x={:.2} y={:.2} w={:.2} h={:.2}, source: {}\n", f.confidence, f.bbox_x, f.bbox_y, f.bbox_w, f.bbox_h, f.source, )); } out } async fn tool_describe_photo( &self, local: &dyn LlmClient, image_base64: &Option<String>, ) -> String { log::info!("tool_describe_photo: generating visual description"); match image_base64 { Some(img) => match local.describe_image(img).await { Ok(desc) => desc, Err(e) => format!("Error describing photo: {}", e), }, None => "No image available for description.".to_string(), } } /// Tool: reverse_geocode — convert GPS coordinates to a human-readable place name async fn tool_reverse_geocode(&self, args: &serde_json::Value) -> String { let lat = match args.get("latitude").and_then(|v| v.as_f64()) { Some(v) => v, None => return "Error: missing required parameter 'latitude'".to_string(), }; let lon = match args.get("longitude").and_then(|v| v.as_f64()) { Some(v) => v, None => return "Error: missing required parameter 'longitude'".to_string(), }; log::info!("tool_reverse_geocode: lat={}, lon={}", lat, lon); match self.reverse_geocode(lat, lon).await { Some(place) => place, None => "Could not resolve coordinates to a place name.".to_string(), } } /// Tool: get_personal_place_at — look up the user's named places (Home, /// Work, Cabin) at a coordinate. Server-side filter; results sorted /// smallest-radius first. async fn tool_get_personal_place_at(&self, args: &serde_json::Value) -> String { if !self.apollo_client.is_enabled() { return "Personal place lookup is disabled.".to_string(); } let lat = match args.get("latitude").and_then(|v| v.as_f64()) { Some(v) => v, None => return "Error: missing required parameter 'latitude'".to_string(), }; let lon = match args.get("longitude").and_then(|v| v.as_f64()) { Some(v) => v, None => return "Error: missing required parameter 'longitude'".to_string(), }; log::info!("tool_get_personal_place_at: lat={}, lon={}", lat, lon); let places = self.apollo_client.places_containing(lat, lon).await; if places.is_empty() { return "No personal place contains this coordinate.".to_string(); } places .iter() .map(|p| { let category = p .category .as_deref() .map(|c| format!(" [{c}]")) .unwrap_or_default(); let desc = if p.description.is_empty() { "(no description)".to_string() } else { p.description.clone() }; format!( "- {}{}: {} (radius {} m)", p.name, category, desc, p.radius_m ) }) .collect::<Vec<_>>() .join("\n") } /// Tool: recall_entities — search the knowledge memory for known entities async fn tool_recall_entities( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { use crate::database::EntityFilter; let name_search = args .get("name") .and_then(|v| v.as_str()) .map(|s| s.to_string()); let entity_type = args .get("entity_type") .and_then(|v| v.as_str()) .map(|s| s.to_string()); let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(20) .clamp(1, 50); log::info!( "tool_recall_entities: name={:?}, type={:?}, limit={}", name_search, entity_type, limit ); // Status scope mirrors recall_facts_for_photo: by default // include both 'active' (agent-generated, not yet reviewed) // and 'reviewed' (human-verified). The DAO list_entities only // takes a single status string, so use 'all' here and filter // out 'rejected' below — slightly broader but fine for the // recall use case (entities are global, persona-agnostic). let filter = EntityFilter { entity_type, status: Some("all".to_string()), search: name_search, limit, offset: 0, }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); match kdao.list_entities(cx, filter) { Ok((entities, _total)) if entities.iter().all(|e| e.status == "rejected") => { "No known entities found matching the query.".to_string() } Ok((entities, _total)) => { let lines: Vec<String> = entities .iter() .filter(|e| e.status != "rejected") .map(|e| { format!( "ID:{} | {} | {} | {} | confidence:{:.2}", e.id, e.entity_type, e.name, e.description, e.confidence ) }) .collect(); if lines.is_empty() { "No known entities found matching the query.".to_string() } else { format!("Known entities:\n{}", lines.join("\n")) } } Err(e) => format!("Error recalling entities: {:?}", e), } } /// Tool: recall_facts_for_photo — retrieve facts linked to a specific photo async fn tool_recall_facts_for_photo( &self, args: &serde_json::Value, user_id: i32, persona_id: &str, cx: &opentelemetry::Context, ) -> String { use crate::database::PersonaFilter; let persona_filter = PersonaFilter::Single { user_id, persona_id: persona_id.to_string(), }; let file_path = match args.get("file_path").and_then(|v| v.as_str()) { Some(p) => p.to_string(), None => return "Error: missing required parameter 'file_path'".to_string(), }; log::info!("tool_recall_facts_for_photo: file_path={}", file_path); // Resolve the persona's reviewed-only-mode flag once. If the // persona row is missing (shouldn't happen — composite FK // enforces existence on writes), fall back to the permissive // default of active+reviewed. let reviewed_only = { let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(cx, user_id, persona_id) .ok() .flatten() .map(|p| p.reviewed_only_facts) .unwrap_or(false) }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); // Fetch photo links to find which entities appear in this photo let links = match kdao.get_links_for_photo(cx, &file_path) { Ok(l) => l, Err(e) => return format!("Error fetching photo links: {:?}", e), }; if links.is_empty() { return "No knowledge facts found for this photo.".to_string(); } let mut output_lines = Vec::new(); let entity_ids: Vec<i32> = links.iter().map(|l| l.entity_id).collect(); // For each linked entity, fetch its facts for entity_id in entity_ids { if let Ok(entity) = kdao.get_entity_by_id(cx, entity_id) && let Some(e) = entity { let role = links .iter() .find(|l| l.entity_id == entity_id) .map(|l| l.role.as_str()) .unwrap_or("subject"); output_lines.push(format!( "Entity: {} ({}, role: {})", e.name, e.entity_type, role )); if let Ok(facts) = kdao.get_facts_for_entity(cx, entity_id, &persona_filter) { // Default scope: active + reviewed (everything not // rejected / superseded). Strict mode trims to // reviewed only — the persona has opted into seeing // exclusively human-verified facts. let allow = |s: &str| -> bool { if reviewed_only { s == "reviewed" } else { s == "active" || s == "reviewed" } }; for f in facts.iter().filter(|f| allow(&f.status)) { let obj = if let Some(ref v) = f.object_value { v.clone() } else if let Some(oid) = f.object_entity_id { kdao.get_entity_by_id(cx, oid) .ok() .flatten() .map(|e| format!("{} (entity ID: {})", e.name, e.id)) .unwrap_or_else(|| format!("entity:{}", oid)) } else { "(unknown)".to_string() }; output_lines.push(format!(" - {} {}", f.predicate, obj)); } } } } if output_lines.is_empty() { "No active knowledge facts found for this photo.".to_string() } else { format!("Knowledge for this photo:\n{}", output_lines.join("\n")) } } /// Tool: recall_facts_for_entity — retrieve facts for one entity by id. /// Persona scoping and the reviewed-only-facts filter mirror /// `tool_recall_facts_for_photo` exactly: reads are always Single /// (user_id, persona_id), and strict-mode personas see only /// human-reviewed facts. async fn tool_recall_facts_for_entity( &self, args: &serde_json::Value, user_id: i32, persona_id: &str, cx: &opentelemetry::Context, ) -> String { use crate::database::PersonaFilter; let persona_filter = PersonaFilter::Single { user_id, persona_id: persona_id.to_string(), }; let entity_id = match args.get("entity_id").and_then(|v| v.as_i64()) { Some(id) => id as i32, None => return "Error: missing required parameter 'entity_id'".to_string(), }; let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(50) .clamp(1, 100) as usize; log::info!( "tool_recall_facts_for_entity: entity_id={}, limit={}", entity_id, limit ); // Resolve the persona's reviewed-only-mode flag once — identical // fallback semantics to recall_facts_for_photo (missing persona // row → permissive active+reviewed default). let reviewed_only = { let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(cx, user_id, persona_id) .ok() .flatten() .map(|p| p.reviewed_only_facts) .unwrap_or(false) }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); let entity = match kdao.get_entity_by_id(cx, entity_id) { Ok(Some(e)) => e, Ok(None) => return format!("No entity found with ID {}.", entity_id), Err(e) => return format!("Error fetching entity: {:?}", e), }; let mut output_lines = vec![format!("Entity: {} ({})", entity.name, entity.entity_type)]; match kdao.get_facts_for_entity(cx, entity_id, &persona_filter) { Ok(facts) => { // Default scope: active + reviewed. Strict mode trims to // reviewed only — same allow rule as recall_facts_for_photo. let allow = |s: &str| -> bool { if reviewed_only { s == "reviewed" } else { s == "active" || s == "reviewed" } }; for f in facts.iter().filter(|f| allow(&f.status)).take(limit) { let obj = if let Some(ref v) = f.object_value { v.clone() } else if let Some(oid) = f.object_entity_id { kdao.get_entity_by_id(cx, oid) .ok() .flatten() .map(|e| format!("{} (entity ID: {})", e.name, e.id)) .unwrap_or_else(|| format!("entity:{}", oid)) } else { "(unknown)".to_string() }; output_lines.push(format!(" - {} {}", f.predicate, obj)); } } Err(e) => return format!("Error fetching facts: {:?}", e), } if output_lines.len() == 1 { format!( "No active knowledge facts found for entity {} (ID: {}).", entity.name, entity_id ) } else { format!("Knowledge for this entity:\n{}", output_lines.join("\n")) } } /// Tool: store_entity — upsert an entity into the knowledge memory. /// Embeddings go through the configured local backend (`LLM_BACKEND`), /// independent of the per-request chat backend in the caller. async fn tool_store_entity( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { use crate::database::models::InsertEntity; let name = match args.get("name").and_then(|v| v.as_str()) { Some(n) => n.to_string(), None => return "Error: missing required parameter 'name'".to_string(), }; let entity_type = match args.get("entity_type").and_then(|v| v.as_str()) { Some(t) => t.to_string(), None => return "Error: missing required parameter 'entity_type'".to_string(), }; let description = args .get("description") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); log::info!( "tool_store_entity: name='{}', type='{}', description='{}'", name, entity_type, description ); // Pre-flight similarity check — surface near-duplicates to the model // before it commits to a new entity. Uses the first name token as the // search term so "Sarah" matches when storing "Sarah Johnson" and vice // versa. Exact-name matches are excluded (upsert_entity deduplicates // those already). Results are appended to the tool response so the // model can choose to use an existing entity's ID instead. let similar_entities: Vec<String> = { use crate::database::EntityFilter; use crate::database::knowledge_dao::normalize_entity_type; let normalised_type = normalize_entity_type(&entity_type); let first_token = name.split_whitespace().next().unwrap_or(&name).to_string(); let filter = EntityFilter { entity_type: None, // search all types, filter client-side to avoid case issues status: Some("active".to_string()), search: Some(first_token), limit: 10, offset: 0, }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); kdao.list_entities(cx, filter) .unwrap_or_default() .0 .into_iter() .filter(|e| { normalize_entity_type(&e.entity_type) == normalised_type && e.name.to_lowercase() != name.to_lowercase() }) .map(|e| format!(" ID:{} | {} | {}", e.id, e.name, e.description)) .collect() }; // Generate embedding for name + description (best-effort) via the // configured local backend. let embed_text = format!("{} {}", name, description); let embedding: Option<Vec<u8>> = match crate::ai::embed_document( &self.ollama, self.llamacpp.as_deref(), &embed_text, ) .await { // The entities table has no dim check at the DAO layer, and a // wrong-dim vector silently kills dedup/recall (cosine over // mismatched lengths is 0) — guard here, store None instead. Ok(vec) if vec.len() == crate::ai::embedding_dim() => { let bytes: Vec<u8> = vec.iter().flat_map(|f| f.to_le_bytes()).collect(); Some(bytes) } Ok(vec) => { log::warn!( "Entity '{}' embedding has {} dims (expected {}) — storing without embedding", name, vec.len(), crate::ai::embedding_dim() ); None } Err(e) => { log::warn!("Embedding generation failed for entity '{}': {}", name, e); None } }; let now = chrono::Utc::now().timestamp(); let insert = InsertEntity { name, entity_type, description, embedding, confidence: 0.6, status: "active".to_string(), created_at: now, updated_at: now, }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); match kdao.upsert_entity(cx, insert) { Ok(entity) => { let mut response = format!( "Entity stored: ID:{} | {} | {} | confidence:{:.2}", entity.id, entity.entity_type, entity.name, entity.confidence ); if !similar_entities.is_empty() { response.push_str( "\nSimilar existing entities found — verify this is not a duplicate:\n", ); response.push_str(&similar_entities.join("\n")); response.push_str( "\nIf one of these is the same entity, use their existing ID in store_fact instead of the newly created one.", ); } response } Err(e) => format!("Error storing entity: {:?}", e), } } /// Tool: store_fact — record a fact about an entity, linked to the current photo async fn tool_store_fact( &self, args: &serde_json::Value, file_path: &str, user_id: i32, persona_id: &str, model: &str, backend: &str, cx: &opentelemetry::Context, ) -> String { use crate::database::models::{InsertEntityFact, InsertEntityPhotoLink}; let subject_entity_id = match args.get("subject_entity_id").and_then(|v| v.as_i64()) { Some(id) => id as i32, None => return "Error: missing required parameter 'subject_entity_id'".to_string(), }; let predicate = match args.get("predicate").and_then(|v| v.as_str()) { Some(p) => p.to_string(), None => return "Error: missing required parameter 'predicate'".to_string(), }; let object_entity_id = args .get("object_entity_id") .and_then(|v| v.as_i64()) .map(|id| id as i32); let object_value = args .get("object_value") .and_then(|v| v.as_str()) .map(|s| s.to_string()); if object_entity_id.is_none() && object_value.is_none() { return "Error: provide either object_entity_id or object_value".to_string(); } let photo_role = args .get("photo_role") .and_then(|v| v.as_str()) .unwrap_or("subject") .to_string(); log::info!( "tool_store_fact: entity_id={}, predicate='{}', object_entity_id={:?}, object_value={:?}, photo='{}'", subject_entity_id, predicate, object_entity_id, object_value, file_path ); // Anchor the fact in valid-time using the source photo's // `date_taken` (Apollo's naive-as-UTC convention is fine // here — we only care about calendar ordering, not absolute // UTC). The semantic stretch: a photo *evidences* the fact at // that date — the fact may have started earlier — so this is // best read as "no later than this it started being true", // not a strict lower bound. Still useful: gives the curator a // calendar anchor and lets supersession (next slice) close // intervals cleanly when a newer fact arrives. valid_until // stays NULL — a single photo can't tell us when something // *stopped* being true. let valid_from = self.fetch_exif(file_path).and_then(|e| e.date_taken); let fact = InsertEntityFact { subject_entity_id, predicate, object_entity_id, object_value, source_photo: Some(file_path.to_string()), source_insight_id: None, // will be backfilled after store_insight confidence: 0.6, status: "active".to_string(), created_at: chrono::Utc::now().timestamp(), persona_id: persona_id.to_string(), user_id, valid_from, valid_until: None, superseded_by: None, created_by_model: Some(model.to_string()), created_by_backend: Some(backend.to_string()), // Initial write — no modification yet; last_modified_* // intentionally NULL so the audit feed only shows real // post-creation changes. last_modified_by_model: None, last_modified_by_backend: None, last_modified_at: None, }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); // Upsert the fact (corroboration bumps confidence if duplicate) let (stored_fact, is_new) = match kdao.upsert_fact(cx, fact) { Ok(r) => r, Err(e) => return format!("Error storing fact: {:?}", e), }; // Upsert a photo link so this entity is associated with this photo let link = InsertEntityPhotoLink { entity_id: subject_entity_id, library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), role: photo_role, }; if let Err(e) = kdao.upsert_photo_link(cx, link) { log::warn!( "Failed to upsert photo link for entity {}: {:?}", subject_entity_id, e ); } let action = if is_new { "Stored new fact" } else { "Corroborated existing fact" }; format!( "{}: ID:{} | confidence:{:.2}", action, stored_fact.id, stored_fact.confidence ) } /// Tool: update_fact — patch a fact's mutable fields. Gated by the /// active persona's `allow_agent_corrections` flag at the schema / /// catalog layer (build_tool_definitions); rechecked here as a /// defense in depth in case a hallucinated tool call slips /// through. async fn tool_update_fact( &self, args: &serde_json::Value, user_id: i32, persona_id: &str, model: &str, backend: &str, cx: &opentelemetry::Context, ) -> String { use crate::database::FactPatch; // Defense-in-depth gate check. let allowed = { let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(cx, user_id, persona_id) .ok() .flatten() .map(|p| p.allow_agent_corrections) .unwrap_or(false) }; if !allowed { return "Error: agent corrections are disabled for this persona. Ask the operator to flip allow_agent_corrections.".to_string(); } let fact_id = match args.get("fact_id").and_then(|v| v.as_i64()) { Some(id) => id as i32, None => return "Error: missing required parameter 'fact_id'".to_string(), }; // Build the patch from any fields present on `args`. valid_* // are tri-state — JSON null → Some(None) → clear back to NULL, // omitted → None → leave alone, value → Some(Some(value)) → // set. The match-on-presence pattern below mirrors the HTTP // PATCH path's serde-helper behaviour. let parse_optional_i64 = |v: Option<&serde_json::Value>| -> Option<Option<i64>> { v.map(|val| if val.is_null() { None } else { val.as_i64() }) }; let patch = FactPatch { predicate: args .get("predicate") .and_then(|v| v.as_str()) .map(|s| s.to_string()), object_value: args .get("object_value") .and_then(|v| v.as_str()) .map(|s| s.to_string()), status: args .get("status") .and_then(|v| v.as_str()) .filter(|s| matches!(*s, "active" | "reviewed" | "rejected")) .map(|s| s.to_string()), confidence: args .get("confidence") .and_then(|v| v.as_f64()) .map(|f| f.clamp(0.0, 0.95) as f32), valid_from: parse_optional_i64(args.get("valid_from")), valid_until: parse_optional_i64(args.get("valid_until")), }; log::info!("tool_update_fact: fact_id={}", fact_id); let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); match kdao.update_fact(cx, fact_id, patch, Some((model, backend))) { Ok(Some(f)) => format!( "Updated fact ID:{} (status={}, confidence={:.2})", f.id, f.status, f.confidence ), Ok(None) => format!("Error: fact ID:{} not found", fact_id), Err(e) => format!("Error updating fact: {:?}", e), } } /// Tool: supersede_fact — replace one fact with another. Same /// gating as update_fact. async fn tool_supersede_fact( &self, args: &serde_json::Value, user_id: i32, persona_id: &str, model: &str, backend: &str, cx: &opentelemetry::Context, ) -> String { let allowed = { let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(cx, user_id, persona_id) .ok() .flatten() .map(|p| p.allow_agent_corrections) .unwrap_or(false) }; if !allowed { return "Error: agent corrections are disabled for this persona.".to_string(); } let old_fact_id = match args.get("old_fact_id").and_then(|v| v.as_i64()) { Some(id) => id as i32, None => return "Error: missing required parameter 'old_fact_id'".to_string(), }; let new_fact_id = match args.get("new_fact_id").and_then(|v| v.as_i64()) { Some(id) => id as i32, None => return "Error: missing required parameter 'new_fact_id'".to_string(), }; if old_fact_id == new_fact_id { return "Error: old_fact_id and new_fact_id must differ".to_string(); } log::info!( "tool_supersede_fact: old={}, new={}", old_fact_id, new_fact_id ); let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); match kdao.supersede_fact(cx, old_fact_id, new_fact_id, Some((model, backend))) { Ok(Some(f)) => format!( "Superseded fact ID:{} (now status={}, valid_until={:?})", f.id, f.status, f.valid_until ), Ok(None) => "Error: old or new fact not found".to_string(), Err(e) => format!("Error superseding fact: {:?}", e), } } /// Tool: get_current_datetime — returns the current local date and time fn tool_get_current_datetime() -> String { let now = Local::now(); format!( "Current date/time: {} ({})", now.format("%Y-%m-%d %H:%M:%S %Z"), now.format("%A") ) } // ── Agentic insight generation ────────────────────────────────────── /// Build the list of tool definitions for the agentic loop, gated by /// `opts`. Always-on tools: `search_messages`, `get_sms_messages`, /// `get_file_tags`, `reverse_geocode`, `get_current_datetime`, the /// five knowledge-memory tools. Conditional: `describe_photo` (vision /// model), `get_personal_place_at` (Apollo configured), `search_rag` /// (daily_summaries populated), `get_calendar_events` (calendar /// populated), `get_location_history` (location history populated). pub(crate) fn build_tool_definitions(opts: ToolGateOpts) -> Vec<Tool> { let mut tools: Vec<Tool> = Vec::new(); if opts.daily_summaries_present { tools.push(Tool::function( "search_rag", "Semantic search over the user's daily-summary corpus. Returns up to \ `limit` summaries most semantically similar to `query`. Pass `date` \ to anchor in time: summaries near that date rank higher and matches \ months away decay sharply. Omit `date` to rank purely by semantic \ similarity across all time — do this for \"when did X happen?\" \ questions where the date is unknown. For raw message text, prefer \ `search_messages`. \ Examples: `{query: \"family dinner\"}` — best matches across all \ time. `{query: \"family dinner\", date: \"2018-12-24\"}` — what \ daily summaries near Christmas Eve mention family / dinner / gathering. \ `{query: \"work travel\", date: \"2019-06-15\", contact: \"Alice\"}` — \ biased toward summaries that involve Alice.", serde_json::json!({ "type": "object", "required": ["query"], "properties": { "query": { "type": "string", "description": "Free-text query, semantically matched." }, "date": { "type": "string", "description": "Optional anchor date, YYYY-MM-DD. When set, summaries near this date rank higher; omit to search all time evenly." }, "contact": { "type": "string", "description": "Optional contact name to bias toward conversations with that person (soft semantic bias, not a hard filter)." }, "limit": { "type": "integer", "description": "Max summaries to return (default 10, max 25)." } } }), )); } tools.push(Tool::function( "search_messages", "Search SMS/MMS messages — bodies and (for MMS) attachment text + filenames. \ Modes: `fts5` (keyword + phrase + prefix + AND/OR/NOT + NEAR proximity), \ `semantic` (embedding similarity, requires generated embeddings), `hybrid` (RRF merge, recommended; \ degrades to fts5 when embeddings absent). Optional filters: `start_ts` / `end_ts` (real-UTC unix \ seconds), `contact` (contact name, case-insensitive), `contact_id` (numeric), `is_mms` \ (true = MMS only, false = SMS only), `has_media` (true = messages with image/video/audio \ attachments only). Prefer `contact` over `contact_id` — the name is resolved server-side. \ If both are provided, `contact_id` takes precedence. \ For pure date / contact browsing without keywords, prefer `get_sms_messages`. \ \n\nFTS5 query syntax (works in fts5 + hybrid modes):\n\ - Phrase: `\"trader joe's\"` — exact word sequence (use double quotes).\n\ - Prefix: `restaur*` — matches restaurant, restaurants, restauranteur, ….\n\ - Boolean: `dinner AND tahoe`, `wedding OR reception OR ceremony`, `vacation NOT work` (operators must be UPPERCASE).\n\ - Proximity: `NEAR(meeting work, 5)` — both terms within 5 tokens of each other.\n\ - Combine: `(reception OR ceremony) AND tahoe*` — group with parens.\n\ Unquoted multi-word queries are treated as implicit AND. Apostrophes / hyphens / colons are safe — they no longer downgrade to a slow LIKE scan. Use `mode: \"fts5\"` when you want the operators above to be authoritative; `hybrid` still respects them but may surface semantically-similar non-keyword hits alongside.\n\n\ Examples:\n\ - `{query: \"trader joe's\"}` — phrase across all time.\n\ - `{query: \"dinner\", contact: \"Mom\"}` — keyword scoped to Mom's messages.\n\ - `{query: \"dinner\", contact_id: 42, start_ts: 1700000000, end_ts: 1700604800}` — keyword within a contact and a week.\n\ - `{query: \"vacation\", has_media: true}` — only matches that include photos / videos.\n\ - `{query: \"wedding OR reception OR ceremony\", mode: \"fts5\"}` — any of several synonyms.\n\ - `{query: \"restaur*\", mode: \"fts5\"}` — prefix expansion for varying word forms.\n\ - `{query: \"NEAR(birthday cake, 5)\", mode: \"fts5\"}` — terms close together but in any order.", serde_json::json!({ "type": "object", "required": ["query"], "properties": { "query": { "type": "string", "description": "Search query. Min 3 chars. fts5 supports phrase (\"\"), prefix (*), AND/OR/NOT, and NEAR proximity. Matches both message body and MMS attachment text/filename." }, "mode": { "type": "string", "enum": ["fts5", "semantic", "hybrid"], "description": "Search strategy. Default: hybrid." }, "limit": { "type": "integer", "description": "Max results (default 20, max 50)." }, "contact_id": { "type": "integer", "description": "Optional numeric contact id to scope the search." }, "contact": { "type": "string", "description": "Optional contact name (case-insensitive). Resolved to contact_id server-side. Use this when you know the name but not the ID." }, "start_ts": { "type": "integer", "description": "Optional inclusive lower bound, real-UTC unix seconds." }, "end_ts": { "type": "integer", "description": "Optional inclusive upper bound, real-UTC unix seconds." }, "is_mms": { "type": "boolean", "description": "Optional: true to restrict to MMS, false to restrict to SMS." }, "has_media":{ "type": "boolean", "description": "Optional: true to restrict to messages with image / video / audio attachments." } } }), )); tools.push(Tool::function( "get_sms_messages", "Fetch SMS/MMS messages near a date (and optionally from a specific contact). Use when you know the date \ or want context around a photo's timestamp. For keyword search without a date, use `search_messages`. \ Returns up to `limit` messages within `±days_radius` of `date`, sorted by proximity. \ Example: `{date: \"2018-08-12\", contact: \"Mom\", days_radius: 2}` — messages from Mom within ±2 days of Aug 12 2018.", serde_json::json!({ "type": "object", "required": ["date"], "properties": { "date": { "type": "string", "description": "Center date, YYYY-MM-DD." }, "contact": { "type": "string", "description": "Optional contact name (case-insensitive). Falls back to all contacts on no match." }, "days_radius": { "type": "integer", "description": "Days before and after to include (default 4)." }, "limit": { "type": "integer", "description": "Max messages to return (default 60, max 150)." } } }), )); if opts.calendar_present { tools.push(Tool::function( "get_calendar_events", "Fetch calendar events near a date — meetings, scheduled activities, all-day events. \ Returns events within `±days_radius` of `date`. \ Example: `{date: \"2019-03-22\", days_radius: 3}` — events within a week of March 22 2019.", serde_json::json!({ "type": "object", "required": ["date"], "properties": { "date": { "type": "string", "description": "Center date, YYYY-MM-DD." }, "days_radius": { "type": "integer", "description": "Days before and after to include (default 7)." }, "limit": { "type": "integer", "description": "Max events to return (default 20, max 50)." } } }), )); } if opts.location_history_present { tools.push(Tool::function( "get_location_history", "Fetch raw location records (lat/lon/timestamp/activity) near a date. The default 14-day radius is \ wide because location density varies; tighten to ±1 day for a single-trip query. For a coordinate's \ named place, use `reverse_geocode` (or `get_personal_place_at` when Apollo is enabled).", serde_json::json!({ "type": "object", "required": ["date"], "properties": { "date": { "type": "string", "description": "Center date, YYYY-MM-DD." }, "days_radius": { "type": "integer", "description": "Days before and after to include (default 14)." } } }), )); } tools.push(Tool::function( "get_file_tags", "Get user-applied tags for a specific photo file path. Tags are user-curated, not auto-detected.", serde_json::json!({ "type": "object", "required": ["file_path"], "properties": { "file_path": { "type": "string", "description": "File path of the photo." } } }), )); tools.push(Tool::function( "reverse_geocode", "Convert GPS lat/lon to a human-readable place name (city, state). Use for any coordinate the LLM has \ obtained from EXIF or `get_location_history`. When Apollo is configured, prefer `get_personal_place_at` \ — it returns the user's named places (Home / Work / etc.) which are more specific.", serde_json::json!({ "type": "object", "required": ["latitude", "longitude"], "properties": { "latitude": { "type": "number", "description": "Decimal degrees." }, "longitude": { "type": "number", "description": "Decimal degrees." } } }), )); if opts.apollo_enabled { tools.push(Tool::function( "get_personal_place_at", "Return any of the user's named Places (e.g. Home, Work, Cabin) whose radius contains (latitude, longitude). \ Smallest radius first — most specific match wins. More specific than `reverse_geocode`; prefer this when \ both apply. Returns place name, category, free-text description, and radius.", serde_json::json!({ "type": "object", "required": ["latitude", "longitude"], "properties": { "latitude": { "type": "number", "description": "Decimal degrees." }, "longitude": { "type": "number", "description": "Decimal degrees." } } }), )); } if opts.faces_present { tools.push(Tool::function( "get_faces_in_photo", "Return the faces detected in this photo with their bounding boxes and assigned person names \ (when bound). Each face carries `person_name` (string or null), `bbox` ({x, y, w, h} normalized 0–1), \ `confidence` (0–1), and `source` ('auto' from detector or 'manual' from a user-drawn bbox). \ More authoritative than `get_file_tags` for counting people in a photo or naming who is present, \ since it returns detected-but-unbound faces too. \ Example: `{file_path: \"2019/06/IMG_4242.jpg\"}`.", serde_json::json!({ "type": "object", "required": ["file_path"], "properties": { "file_path": { "type": "string", "description": "File path of the photo." } } }), )); } tools.push(Tool::function( "recall_entities", "Search the persistent knowledge memory for previously learned people, places, events, or things. \ Use BEFORE writing the insight to ground the model on what's already known.", serde_json::json!({ "type": "object", "properties": { "name": { "type": "string", "description": "Name or partial name (case-insensitive substring match)." }, "entity_type": { "type": "string", "enum": ["person", "place", "event", "thing"] }, "limit": { "type": "integer", "description": "Max results (default 20, max 50)." } } }), )); tools.push(Tool::function( "recall_facts_for_photo", "Retrieve all stored facts linked to a specific photo. Call at the start of insight generation to load \ prior knowledge about subjects in this photo without scanning the whole knowledge base.", serde_json::json!({ "type": "object", "required": ["file_path"], "properties": { "file_path": { "type": "string", "description": "File path of the photo." } } }), )); tools.push(Tool::function( "recall_facts_for_entity", "Retrieve all stored facts about one specific entity by its ID. Use to follow up on an entity \ surfaced by `recall_entities` (or referenced by another fact's object) that is NOT linked to the \ current photo — `recall_facts_for_photo` only covers photo-linked entities. \ Example: `{entity_id: 7}` — everything known about entity 7.", serde_json::json!({ "type": "object", "required": ["entity_id"], "properties": { "entity_id": { "type": "integer", "description": "ID of the entity to fetch facts for (from recall_entities, store_entity, or a fact's object reference)." }, "limit": { "type": "integer", "description": "Max facts to return (default 50, max 100)." } } }), )); tools.push(Tool::function( "store_entity", "Upsert a person / place / event / thing into the knowledge memory. Returns the entity id (use it as \ `subject_entity_id` or `object_entity_id` in `store_fact`). Idempotent on canonical name.", serde_json::json!({ "type": "object", "required": ["name", "entity_type"], "properties": { "name": { "type": "string", "description": "Canonical name (e.g. \"John Smith\", \"Banff National Park\")." }, "entity_type": { "type": "string", "enum": ["person", "place", "event", "thing"] }, "description": { "type": "string", "description": "Brief description." } } }), )); tools.push(Tool::function( "store_fact", "Record a fact about an entity in the knowledge memory. Always linked to the current photo. \ You must provide EITHER `object_entity_id` (when the object is itself a stored entity — e.g. \ person A is_friend_of person B) OR `object_value` (free-text attribute — e.g. role=\"software engineer\"). \ `object_entity_id` takes precedence when both are present. \ Examples: \ `{subject_entity_id: 7, predicate: \"is_friend_of\", object_entity_id: 12}` — links two known entities. \ `{subject_entity_id: 7, predicate: \"lives_in\", object_value: \"Portland, Oregon\"}` — free-text attribute.", serde_json::json!({ "type": "object", "required": ["subject_entity_id", "predicate"], "properties": { "subject_entity_id": { "type": "integer", "description": "Entity id this fact is about." }, "predicate": { "type": "string", "description": "Relationship or attribute (e.g. is_friend_of, located_in, attended_event)." }, "object_entity_id": { "type": "integer", "description": "Use when the object is itself a stored entity. Takes precedence over object_value." }, "object_value": { "type": "string", "description": "Use for free-text attributes where the object is not a stored entity." }, "photo_role": { "type": "string", "description": "How this entity appears in the photo (default \"subject\")." } } }), )); // Self-correction tools — only exposed when the active persona // has allow_agent_corrections=true. Gating happens here AND in // the tool method itself (the runtime check is the load-bearing // one; this just keeps the tool out of the model's catalog so // it doesn't waste iterations trying calls it can't make). if opts.allow_agent_corrections { tools.push(Tool::function( "update_fact", "Correct an existing fact in the knowledge memory. Use sparingly — only when you have \ stronger evidence than the original write justified (e.g. a clearer photo, a \ contradicting timestamp on a related fact, or explicit user correction). Common \ patches: tighten `valid_from` / `valid_until` to a known interval, downgrade \ `confidence` after seeing contradicting evidence, or flip `status` to 'reviewed' if \ you've verified the fact independently. Cannot change subject / object — supersede \ instead. Pass `fact_id` plus any subset of patchable fields.", serde_json::json!({ "type": "object", "required": ["fact_id"], "properties": { "fact_id": { "type": "integer", "description": "ID of the fact to patch (from recall_facts_for_photo or list)." }, "predicate": { "type": "string", "description": "New predicate string. Rare." }, "object_value": { "type": "string", "description": "New free-text object. Use for typed-fact corrections." }, "status": { "type": "string", "description": "'active' | 'reviewed' | 'rejected'. 'superseded' is for the supersede_fact tool." }, "confidence": { "type": "number", "description": "0.0–0.95. Lower when you've seen contradicting evidence." }, "valid_from": { "type": "integer", "description": "Unix-seconds lower bound on when the fact began being true. Null clears." }, "valid_until": { "type": "integer", "description": "Unix-seconds upper bound on when the fact stopped being true. Null clears." } } }), )); tools.push(Tool::function( "supersede_fact", "Mark an old fact as replaced by a newer one. Use when the new fact contradicts the \ old AND the contradiction is a *time-bounded change* (relationship changed, address \ changed, role changed), not a correction of a mistake — for mistakes, set the old \ fact's status to 'rejected' via update_fact. Atomically: flips old.status to \ 'superseded', points old.superseded_by at the new fact, and stamps old.valid_until \ from new.valid_from (when not already set) so the two intervals are disjoint.", serde_json::json!({ "type": "object", "required": ["old_fact_id", "new_fact_id"], "properties": { "old_fact_id": { "type": "integer", "description": "The fact being replaced." }, "new_fact_id": { "type": "integer", "description": "The fact that replaces it. Must already exist (use store_fact first if you're recording a new one)." } } }), )); } tools.push(Tool::function( "get_current_datetime", "Get the current date and time. Useful when reasoning about how long ago a photo was taken.", serde_json::json!({ "type": "object", "properties": {} }), )); if opts.has_vision { tools.push(Tool::function( "describe_photo", "Generate a visual description of the current photo — people, location, objects, activity visible \ in the image. Only available with vision-capable models.", serde_json::json!({ "type": "object", "properties": {} }), )); } tools } /// Generate an AI insight for a photo using an agentic tool-calling loop. /// The model decides which tools to call to gather context before writing the final insight. /// /// `backend` selects the chat provider: `"local"` (default) routes the /// agentic loop through the configured Ollama server with the image /// attached to the first user message; `"hybrid"` asks the local Ollama /// vision model to describe the image once, inlines the description as /// text, and runs the loop through OpenRouter (chat only — embeddings /// and describe calls stay local in either mode). #[allow(clippy::too_many_arguments)] /// Render a set of prior-conversation transcripts into a compact /// trajectory block for inclusion in the system prompt. Tool results /// are summarised to one line each so the prompt stays small. fn render_fewshot_examples(examples: &[Vec<ChatMessage>]) -> String { if examples.is_empty() { return String::new(); } let mut out = String::from("## Examples of strong context-gathering\n\n"); out.push_str( "The following are compressed trajectories from prior high-quality insights. \ They show the *pattern* of tool use, not answers to copy.\n\n", ); for (i, msgs) in examples.iter().enumerate() { out.push_str(&format!("### Example {}\n\n", i + 1)); out.push_str(&Self::render_single_trajectory(msgs)); out.push('\n'); } out.push_str("---\n\n"); out } fn render_single_trajectory(msgs: &[ChatMessage]) -> String { let mut out = String::new(); if let Some(first_user) = msgs.iter().find(|m| m.role == "user") { let trimmed = first_user .content .lines() .filter(|l| !l.trim().is_empty()) .take(8) .collect::<Vec<_>>() .join("\n"); out.push_str(&format!("Input:\n{}\n\n", trimmed)); } out.push_str("Trajectory:\n"); let mut step = 1; let mut final_content: Option<String> = None; for (i, m) in msgs.iter().enumerate() { if m.role != "assistant" { continue; } if let Some(ref calls) = m.tool_calls { for call in calls { let args_brief = Self::brief_json_args(&call.function.arguments); let result_summary = msgs .get(i + 1) .filter(|r| r.role == "tool") .map(|r| Self::summarize_tool_result(&call.function.name, &r.content)) .unwrap_or_else(|| "(no result)".to_string()); out.push_str(&format!( "{}. {}({}) -> {}\n", step, call.function.name, args_brief, result_summary )); step += 1; } } else if !m.content.is_empty() { final_content = Some(m.content.clone()); } } if let Some(content) = final_content { let short: String = content.chars().take(240).collect(); out.push_str(&format!("\nFinal insight: {}...\n", short)); } out } fn brief_json_args(v: &serde_json::Value) -> String { let Some(obj) = v.as_object() else { return v.to_string(); }; obj.iter() .map(|(k, v)| { let rendered = match v { serde_json::Value::String(s) if s.chars().count() > 40 => { format!("\"{}...\"", s.chars().take(40).collect::<String>()) } _ => v.to_string(), }; format!("{}={}", k, rendered) }) .collect::<Vec<_>>() .join(", ") } /// Collapse a raw tool-result string (the text the model saw) into a /// short phrase suitable for a few-shot trajectory. Detects the /// "Found N ...", "No ...", and "Error ..." idioms used by the tool /// implementations in this file. Unknown shapes fall back to a char /// count, which is deliberately visible so drift shows up in output. fn summarize_tool_result(tool_name: &str, raw: &str) -> String { if raw.starts_with("Error ") { return "error".to_string(); } if raw.starts_with("No ") || raw.starts_with("Could not ") { return "empty (pivoted)".to_string(); } if let Some(rest) = raw.strip_prefix("Found ") && let Some(n_str) = rest.split_whitespace().next() && let Ok(n) = n_str.parse::<usize>() { let kind = match tool_name { "search_messages" | "get_sms_messages" => "messages", "get_calendar_events" => "events", "get_location_history" => "location records", _ => "results", }; return format!("{} {}", n, kind); } match tool_name { "search_rag" => { let n = raw.split("\n\n").filter(|s| !s.trim().is_empty()).count(); format!("{} rag hits", n) } "get_file_tags" => { let n = raw.split(',').filter(|s| !s.trim().is_empty()).count(); format!("{} tags", n) } "describe_photo" => { let short: String = raw.chars().take(80).collect(); format!("described: \"{}...\"", short) } "reverse_geocode" => { let short: String = raw.chars().take(60).collect(); format!("place: {}", short) } "get_personal_place_at" => { if raw.starts_with("No personal place") || raw.starts_with("Personal place lookup") { "no personal place".to_string() } else { let n = raw.lines().filter(|l| l.starts_with("- ")).count().max(1); format!("{} personal place(s)", n) } } "recall_entities" | "recall_facts_for_photo" | "recall_facts_for_entity" => { let n = raw.lines().skip(1).filter(|l| !l.trim().is_empty()).count(); let kind = if tool_name == "recall_entities" { "entities" } else { "facts" }; format!("{} {}", n, kind) } "store_entity" | "store_fact" => raw .split_whitespace() .find_map(|tok| tok.strip_prefix("ID:")) .map(|id| format!("stored id={}", id.trim_end_matches(','))) .unwrap_or_else(|| "stored".to_string()), "get_current_datetime" => "time noted".to_string(), _ => format!("{} chars", raw.len()), } } /// Assemble the chat system prompt from two named blocks: /// /// 1. **Identity / voice / format** — `custom_system_prompt` verbatim /// when supplied, or a neutral default that doesn't fight a future /// persona. The framework never asserts an identity that could /// contradict the persona. /// 2. **Procedural scaffolding** — tool-use guidance, iteration budget, /// contact-filter rule. Identity-free; never asserts voice or shape. /// /// `owner_id_note` and `fewshot_block` are pre-rendered strings (they /// already encode their own headers / blank lines). Pass empty / None /// to skip. pub(crate) fn build_system_content( custom_system_prompt: Option<&str>, owner_id_note: Option<&str>, fewshot_block: &str, max_iterations: usize, ) -> String { let identity = match custom_system_prompt { Some(s) if !s.trim().is_empty() => s.trim().to_string(), _ => String::from( "You are reconstructing a memory from a photo. Use the gathered \ context to write a thoughtful summary; you decide voice, length, and shape.", ), }; let owner = owner_id_note.unwrap_or(""); let procedural = format!( "Tool-use guidance:\n\ - You have a budget of {max_iterations} tool-calling iterations.\n\ - Call tools to gather context BEFORE writing your final answer; don't answer after one or two calls.\n\ - When calling get_sms_messages or search_rag, make at least one call WITHOUT a contact filter \ — surrounding events matter even when a contact is known.\n\ - Use recall_facts_for_photo + recall_entities to load any prior knowledge about subjects in the photo.\n\ - When you identify people / places / events / things, use store_entity + store_fact to grow the persistent memory.\n\ - Before store_entity, call recall_entities to check whether a similar name already exists; reuse the existing entity_id rather than creating a near-duplicate (e.g. \"Sara\" vs \"Sarah J.\"). The DAO will collapse obvious cosine matches, but choosing the existing id keeps facts and photo links consolidated.\n\ - Predicates should be relationship-shaped verbs that encode a queryable claim — `lives_in`, `works_at`, `attended`, `is_friend_of`, `is_parent_of`, `interested_in`, `married_to`, `owns`. DO NOT use vague speech-act predicates like `expressed`, `said`, `mentioned`, `stated`, `quoted`, `noted`, `discussed`, `thought`, `wondered`. DO NOT store quotations or sentence fragments as `object_value` — paraphrase into a structured claim. Bad: `(Cameron, expressed, \"I'm tempted to get a part-time job there\")`. Good: `(Cameron, considered_employment_at, <Place>)` or `(Cameron, interested_in, \"part-time work\")`.\n\ - A tool returning no results is informative; continue with the others.\n\ - When writing your final answer, start with \"Title: <short title>\" (max 8 words) on the first line, then a blank line, then the body.", ); let mut out = identity; if !owner.is_empty() { out.push_str(owner); } out.push_str("\n\n"); if !fewshot_block.is_empty() { out.push_str(fewshot_block); } out.push_str(&procedural); out } /// Consolidate client construction for the agentic insight loop. /// /// Returns a [`ResolvedBackend`] containing the **chat** client (the model /// that drives the agent loop), the **local** client (always the configured /// local backend — Ollama or llama-swap — for utility calls like /// describe_image, rerank, embeddings), the backend kind, and whether the /// chat model receives images inline. pub async fn resolve_backend( &self, kind: BackendKind, overrides: &SamplingOverrides, ) -> Result<ResolvedBackend> { let local_via_llamacpp = crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some(); let is_hybrid = kind == BackendKind::Hybrid; // ── chat client ──────────────────────────────────────────────── let chat: Box<dyn LlmClient> = if is_hybrid { // Hybrid: chat through OpenRouter. let arc = self.openrouter.as_ref().ok_or_else(|| { anyhow::anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured") })?; let mut c: OpenRouterClient = (**arc).clone(); if let Some(ref m) = overrides.model { c.primary_model = m.clone(); } if overrides.has_sampling() { c.set_sampling_params( overrides.temperature, overrides.top_p, overrides.top_k, overrides.min_p, ); } if let Some(ctx) = overrides.num_ctx { c.set_num_ctx(Some(ctx)); } Box::new(c) } else if local_via_llamacpp { // Local via llama-swap. let arc = self.llamacpp.as_ref().ok_or_else(|| { anyhow::anyhow!("LLM_BACKEND=llamacpp but LLAMA_SWAP_URL not configured") })?; let mut c: LlamaCppClient = (**arc).clone(); if let Some(ref m) = overrides.model { c.primary_model = m.clone(); } if overrides.has_sampling() { c.set_sampling_params( overrides.temperature, overrides.top_p, overrides.top_k, overrides.min_p, ); } if let Some(ctx) = overrides.num_ctx { c.set_num_ctx(Some(ctx)); } Box::new(c) } else { // Pure Ollama local. let mut ollama_client = if let Some(ref model) = overrides.model { OllamaClient::new( self.ollama.primary_url.clone(), self.ollama.fallback_url.clone(), model.clone(), Some(model.clone()), ) } else { self.ollama.clone() }; if overrides.has_sampling() { ollama_client.set_sampling_params( overrides.temperature, overrides.top_p, overrides.top_k, overrides.min_p, ); } if let Some(ctx) = overrides.num_ctx { ollama_client.set_num_ctx(Some(ctx)); } Box::new(ollama_client) }; // ── local client (utility calls: rerank, describe_image, etc.) ─ // For llamacpp in local mode: mirror the chat model selection so // rerank / describe_image hit the same model that's already // loaded — avoids a mid-turn model swap in llama-swap exclusive // mode. In hybrid mode the override is an OpenRouter model id // (e.g. "google/gemini-3-flash-preview") which llama-swap can't // serve — keep the configured local slots. let local: Box<dyn LlmClient> = if local_via_llamacpp { let mut lc = self.llamacpp.as_ref().unwrap().as_ref().clone(); if !is_hybrid && let Some(ref m) = overrides.model { lc.primary_model = m.clone(); lc.set_vision_model(m.clone()); } Box::new(lc) } else { Box::new(self.ollama.clone()) }; // ── images_inline ────────────────────────────────────────────── let images_inline = if is_hybrid { // Hybrid: chat model never sees images — describe-then-inject. false } else if local_via_llamacpp { // llama-swap models receive images directly via OpenAI content // parts. Capability probing isn't available (no `/api/show`), // so assume vision support; a misconfigured model surfaces as // a chat-call error. true } else { // Pure Ollama: probe model capabilities. let ollama_for_caps = if let Some(ref model) = overrides.model { // Verify custom model is available on at least one server. let available_on_primary = OllamaClient::is_model_available(&self.ollama.primary_url, model) .await .unwrap_or(false); let available_on_fallback = if let Some(ref fallback_url) = self.ollama.fallback_url { OllamaClient::is_model_available(fallback_url, model) .await .unwrap_or(false) } else { false }; if !available_on_primary && !available_on_fallback { anyhow::bail!( "model not available: '{}' not found on any configured server", model ); } model.as_str() } else { self.ollama.primary_model.as_str() }; let capabilities = match OllamaClient::check_model_capabilities( &self.ollama.primary_url, ollama_for_caps, ) .await { Ok(caps) => caps, Err(_) => { let fallback_url = self.ollama.fallback_url.as_deref().ok_or_else(|| { anyhow::anyhow!( "Failed to check model capabilities for '{}': model not found on primary server and no fallback configured", ollama_for_caps ) })?; OllamaClient::check_model_capabilities(fallback_url, ollama_for_caps) .await .map_err(|e| { anyhow::anyhow!( "Failed to check model capabilities for '{}': {}", ollama_for_caps, e ) })? } }; if !capabilities.has_tool_calling { anyhow::bail!("tool calling not supported by model '{}'", ollama_for_caps); } capabilities.has_vision }; Ok(ResolvedBackend::new(chat, local, kind, images_inline)) } pub async fn generate_agentic_insight_for_photo( &self, file_path: &str, custom_model: Option<String>, custom_system_prompt: Option<String>, num_ctx: Option<i32>, temperature: Option<f32>, top_p: Option<f32>, top_k: Option<i32>, min_p: Option<f32>, max_iterations: usize, backend: Option<String>, fewshot_examples: Vec<Vec<ChatMessage>>, fewshot_source_ids: Vec<i32>, user_id: i32, persona_id: String, ) -> Result<(Option<i32>, Option<i32>, Option<i32>)> { let tracer = global_tracer(); let current_cx = opentelemetry::Context::current(); let mut span = tracer.start_with_context("ai.insight.generate_agentic", ¤t_cx); let file_path = normalize_path(file_path); log::info!("Generating agentic insight for photo: {}", file_path); span.set_attribute(KeyValue::new("file_path", file_path.clone())); span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64)); // 1. Resolve backend + build clients. let kind = BackendKind::parse(backend.as_deref().unwrap_or("local"))?; span.set_attribute(KeyValue::new("backend", kind.as_str())); let overrides = SamplingOverrides { model: custom_model, num_ctx, temperature, top_p, top_k, min_p, }; let backend = self.resolve_backend(kind, &overrides).await?; span.set_attribute(KeyValue::new("model", backend.model().to_string())); span.set_attribute(KeyValue::new("images_inline", backend.images_inline)); let insight_cx = current_cx.with_span(span); // 3. Fetch EXIF let exif = { let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao"); exif_dao .get_exif(&insight_cx, &file_path) .map_err(|e| anyhow::anyhow!("Failed to get EXIF: {:?}", e))? }; // 4. Extract timestamp and contact let timestamp = if let Some(ts) = exif.as_ref().and_then(|e| e.date_taken) { ts } else { log::warn!("No date_taken in EXIF for {}, trying filename", file_path); extract_date_from_filename(&file_path) .map(|dt| dt.timestamp()) .or_else(|| { let full_path = self.resolve_full_path(&file_path)?; File::open(&full_path) .and_then(|f| f.metadata()) .inspect_err(|e| { log::warn!( "Failed to get file timestamp for agentic insight {}: {}", file_path, e ) }) .ok() .and_then(|m| earliest_fs_time(&m)) .map(|t| DateTime::<Utc>::from(t).timestamp()) }) .unwrap_or_else(|| Utc::now().timestamp()) }; let date_taken = DateTime::from_timestamp(timestamp, 0) .map(|dt| dt.date_naive()) .unwrap_or_else(|| Utc::now().date_naive()); let contact = Self::extract_contact_from_path(&file_path); log::info!("Agentic: date_taken={}, contact={:?}", date_taken, contact); // 5. Fetch tags let tag_names: Vec<String> = { let mut dao = self.tag_dao.lock().expect("Unable to lock TagDao"); dao.get_tags_for_path(&insight_cx, &file_path) .unwrap_or_else(|e| { log::warn!("Failed to fetch tags for agentic {}: {}", file_path, e); Vec::new() }) .into_iter() .map(|t| t.name) .collect() }; // 6. Ensure the owner entity exists so the agent can reference it. // Prior entity_photo_links for this file are intentionally preserved // across regenerations — clearing them made `recall_facts_for_photo` // always return empty and discarded hard-won knowledge. Re-linking // the same entity is a no-op (INSERT OR IGNORE). let owner_name = user_display_name(); let owner_entity_id: Option<i32> = { let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); // Upsert the owner entity so the agent always has a stable entity ID to reference. let owner = crate::database::models::InsertEntity { name: owner_name.clone(), entity_type: "person".to_string(), description: format!( "The owner of this photo collection. All memories are written from {}'s perspective.", owner_name ), embedding: None, confidence: 1.0, status: "active".to_string(), created_at: Utc::now().timestamp(), updated_at: Utc::now().timestamp(), }; match kdao.upsert_entity(&insight_cx, owner) { Ok(e) => { log::info!("Owner entity '{}' ID: {}", owner_name, e.id); Some(e.id) } Err(e) => { log::warn!("Failed to upsert owner entity '{}': {:?}", owner_name, e); None } } }; // 7. Load image. Always attempted — vision-capable models get the // base64 inline; hybrid mode describes it locally and injects text. let image_base64 = match self.load_image_as_base64(&file_path) { Ok(b64) => { log::info!("Loaded image for agentic model"); Some(b64) } Err(e) => { log::warn!("Failed to load image for agentic: {}", e); None } }; // Describe-then-inline (hybrid only). Vision describe routes through // the local backend so non-text work stays off OpenRouter. let inlined_visual_description: Option<String> = if !backend.images_inline { match image_base64.as_deref() { Some(b64) => match backend.local().describe_image(b64).await { Ok(desc) => { log::info!("{}: vision describe succeeded ({} chars)", kind, desc.len()); Some(desc) } Err(e) => { log::warn!( "{}: vision describe failed, continuing without: {}", kind, e ); None } }, None => None, } } else { None }; // 8. Build system message via the two-block helper. Custom prompt // (when supplied) is the authoritative identity — the framework // never appends a competing "you are a personal photo memory // assistant" line. The procedural block stays identity-free. let owner_id_note = owner_entity_id.map(|id| { format!( "\n\nYour identity in the knowledge store: {name} (entity ID: {id}). \ When storing facts where you ({name}) are the object — for example, someone is your friend, \ sibling, or colleague — use subject_entity_id for the other person and set object_value to \ \"{name}\" (or use store_fact with the other person as subject). When storing facts about \ {name} directly, use {id} as the subject_entity_id.", name = owner_name, id = id ) }); let fewshot_block = Self::render_fewshot_examples(&fewshot_examples); let system_content = Self::build_system_content( custom_system_prompt.as_deref(), owner_id_note.as_deref(), &fewshot_block, max_iterations, ); // 9. Build user message let gps_info = exif .as_ref() .and_then(|e| { if let (Some(lat), Some(lon)) = (e.gps_latitude, e.gps_longitude) { Some(format!("GPS: {:.4}, {:.4}", lat, lon)) } else { None } }) .unwrap_or_else(|| "GPS: unknown".to_string()); let tags_info = if tag_names.is_empty() { "Tags: none".to_string() } else { format!("Tags: {}", tag_names.join(", ")) }; let contact_info = contact .as_ref() .map(|c| format!("Contact/Person: {}", c)) .unwrap_or_else(|| "Contact/Person: unknown".to_string()); let visual_block = inlined_visual_description .as_deref() .map(|d| format!("Visual description (from local vision model):\n{}\n\n", d)) .unwrap_or_default(); // Context-only payload — no output-shape prescription. The persona / // custom_system_prompt owns voice, length, and structure. The "title // and summary" claim that used to live here was unused (the title is // regenerated post-hoc from the summary by generate_photo_title). let user_content = format!( "{visual_block}Photo file path: {file_path}\n\ Date taken: {date}\n\ {contact_info}\n\ {gps_info}\n\ {tags_info}\n\n\ Gather context with the available tools, then respond.", date = date_taken.format("%B %d, %Y"), ); // 10. Define tools. describe_photo offered only when the chat model // sees images directly (images_inline); in hybrid mode the visual // description is already inlined as text. Persona-aware so the // persona's allow_agent_corrections gate opens here exactly like // it does for chat turns (insight_chat does the same). let gate_opts = self.current_gate_opts_for_persona(backend.images_inline, Some((user_id, &persona_id))); let tools = Self::build_tool_definitions(gate_opts); // 11. Build initial messages. images_inline → attach base64 to the // user message; describe-then-inline → text was already injected. let system_msg = ChatMessage::system(system_content); let mut user_msg = ChatMessage::user(user_content); if backend.images_inline && let Some(ref img) = image_base64 { user_msg.images = Some(vec![img.clone()]); } let mut messages = vec![system_msg, user_msg]; let loop_span = tracer.start_with_context("ai.agentic.loop", &insight_cx); let loop_cx = insight_cx.with_span(loop_span); let mut final_content = String::new(); let mut iterations_used = 0usize; let mut last_prompt_eval_count: Option<i32> = None; let mut last_eval_count: Option<i32> = None; for iteration in 0..max_iterations { iterations_used = iteration + 1; log::info!("Agentic iteration {}/{}", iteration + 1, max_iterations); let (response, prompt_tokens, eval_tokens) = backend .chat() .chat_with_tools(messages.clone(), tools.clone()) .await?; last_prompt_eval_count = prompt_tokens; last_eval_count = eval_tokens; // Sanitize tool call arguments before pushing back into history. // Some models occasionally return non-object arguments (bool, string, null) // which Ollama rejects when they are re-sent in a subsequent request. let mut response = response; if let Some(ref mut tool_calls) = response.tool_calls { for tc in tool_calls.iter_mut() { if !tc.function.arguments.is_object() { log::warn!( "Tool '{}' returned non-object arguments ({:?}), normalising to {{}}", tc.function.name, tc.function.arguments ); tc.function.arguments = serde_json::Value::Object(Default::default()); } } } messages.push(response.clone()); if let Some(ref tool_calls) = response.tool_calls && !tool_calls.is_empty() { for tool_call in tool_calls { log::info!( "Agentic tool call [{}]: {} {}", iteration, tool_call.function.name, tool_call.function.arguments ); let result = self .execute_tool( &tool_call.function.name, &tool_call.function.arguments, &backend, &image_base64, &file_path, user_id, &persona_id, &loop_cx, ) .await; messages.push(ChatMessage::tool_result(result)); } continue; } // No tool calls — this is the final answer final_content = response.content; break; } // If loop exhausted without final answer, ask for one if final_content.is_empty() { log::info!( "Agentic loop exhausted after {} iterations, requesting final answer", iterations_used ); messages.push(ChatMessage::user(format!( "Based on the context gathered, please write the final photo insight. Start with \"Title: <short title>\" on the first line (max 8 words), then a blank line, then the detailed personal summary. Write in first person as {}.", user_display_name() ))); let (final_response, prompt_tokens, eval_tokens) = backend .chat() .chat_with_tools(messages.clone(), vec![]) .await?; last_prompt_eval_count = prompt_tokens; last_eval_count = eval_tokens; final_content = final_response.content.clone(); messages.push(final_response); } loop_cx .span() .set_attribute(KeyValue::new("iterations_used", iterations_used as i64)); loop_cx.span().set_status(Status::Ok); // 13. Strip any leaked <think>…</think> reasoning block (thinking // models emit it ahead of the answer; the raw transcript in // training_messages keeps it), then parse the title. final_content = crate::ai::llm_client::strip_think_blocks(&final_content); let (title, body) = parse_title_body(&final_content); final_content = body; log::info!("Agentic parsed title: {}", title); let summary_preview: String = final_content.chars().take(200).collect(); log::info!( "Agentic generated summary ({} chars): {}", final_content.len(), summary_preview ); // 14. Serialize the full message history for training data let training_messages = match serde_json::to_string(&messages) { Ok(json) => Some(json), Err(e) => { log::warn!("Failed to serialize training messages: {}", e); None } }; // 15. Store insight (returns the persisted row including its new id) let model_version = backend.model().to_string(); let fewshot_source_ids_json = if fewshot_source_ids.is_empty() { None } else { Some(serde_json::to_string(&fewshot_source_ids).unwrap_or_else(|_| "[]".to_string())) }; let insight = InsertPhotoInsight { library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), title, summary: final_content, generated_at: Utc::now().timestamp(), model_version, is_current: true, training_messages, backend: kind.as_str().to_string(), fewshot_source_ids: fewshot_source_ids_json, content_hash: None, num_ctx, temperature, top_p, top_k, min_p, system_prompt: custom_system_prompt.clone(), persona_id: Some(persona_id.clone()), prompt_eval_count: last_prompt_eval_count, eval_count: last_eval_count, }; let stored = { let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); dao.store_insight(&insight_cx, insight) .map_err(|e| anyhow::anyhow!("Failed to store agentic insight: {:?}", e)) }; match &stored { Ok(_) => { log::info!("Successfully stored agentic insight for {}", file_path); insight_cx.span().set_status(Status::Ok); } Err(e) => { log::error!("Failed to store agentic insight: {:?}", e); insight_cx.span().set_status(Status::error(e.to_string())); } } let stored_insight = stored?; // 16. Backfill source_insight_id on all facts recorded for this photo during the loop { let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); if let Err(e) = kdao.update_facts_insight_id(&insight_cx, &file_path, stored_insight.id) { log::warn!( "Failed to backfill source_insight_id for {}: {:?}", file_path, e ); } } Ok(( Some(stored_insight.id), last_prompt_eval_count, last_eval_count, )) } /// A read-only agentic tool loop: chat with tools until the model stops /// calling them, then return the final content. /// /// This is the loop body extracted from /// `generate_agentic_insight_for_photo` (lines 4316-4377) so it can be /// reused by the reel-scripter without the photo-specific context /// (image_base64, file_path, persona_id). The photo insight loop still /// has its own copy because it threads image/file context through /// `execute_tool`. /// /// Calls `execute_tool` with empty file/image context; enabled tools /// never read those fields. #[allow(dead_code)] pub(crate) async fn run_readonly_tool_loop( &self, backend: &ResolvedBackend, mut messages: Vec<ChatMessage>, tools: Vec<Tool>, max_iter: usize, ) -> Result<String> { let mut final_content = String::new(); for iteration in 0..max_iter { log::info!("Agentic iteration {}/{}", iteration + 1, max_iter); let (response, _prompt_tokens, _eval_tokens) = backend .chat() .chat_with_tools(messages.clone(), tools.clone()) .await?; // Sanitize tool call arguments before pushing back into history. // Some models occasionally return non-object arguments (bool, // string, null) which Ollama rejects when they are re-sent in // a subsequent request. let mut response = response; if let Some(ref mut tool_calls) = response.tool_calls { for tc in tool_calls.iter_mut() { if !tc.function.arguments.is_object() { log::warn!( "Tool '{}' returned non-object arguments ({:?}), normalising to {{}}", tc.function.name, tc.function.arguments ); tc.function.arguments = serde_json::Value::Object(Default::default()); } } } messages.push(response.clone()); if let Some(ref tool_calls) = response.tool_calls && !tool_calls.is_empty() { for tool_call in tool_calls { log::info!( "Agentic tool call [{}]: {} {}", iteration, tool_call.function.name, tool_call.function.arguments ); let result = self .execute_tool( &tool_call.function.name, &tool_call.function.arguments, backend, &None, "", 0, "", &opentelemetry::Context::new(), ) .await; messages.push(ChatMessage::tool_result(result)); } continue; } // No tool calls — this is the final answer final_content = response.content; break; } // If loop exhausted without final answer, ask for one if final_content.is_empty() { log::info!( "Agentic loop exhausted after {} iterations, requesting final answer", max_iter ); messages.push(ChatMessage::user( "Based on the context gathered, please write the final answer. Return ONLY the JSON object, no prose or code fences.", )); let (final_response, _, _) = backend .chat() .chat_with_tools(messages.clone(), vec![]) .await?; final_content = final_response.content.clone(); messages.push(final_response); } Ok(final_content) } /// Reverse geocode GPS coordinates to human-readable place names async fn reverse_geocode(&self, lat: f64, lon: f64) -> Option<String> { let url = format!( "https://nominatim.openstreetmap.org/reverse?format=json&lat={}&lon={}", lat, lon ); log::debug!("Reverse geocoding {}, {} via Nominatim", lat, lon); let client = reqwest::Client::new(); let response = match client .get(&url) .header("User-Agent", "ImageAPI/1.0") // Nominatim requires User-Agent .send() .await { Ok(resp) => resp, Err(e) => { log::warn!("Geocoding network error for {}, {}: {}", lat, lon, e); return None; } }; if !response.status().is_success() { log::warn!( "Geocoding HTTP error for {}, {}: {}", lat, lon, response.status() ); return None; } let data: NominatimResponse = match response.json().await { Ok(d) => d, Err(e) => { log::warn!("Geocoding JSON parse error for {}, {}: {}", lat, lon, e); return None; } }; // Try to build a concise location name if let Some(addr) = data.address { let mut parts = Vec::new(); // Prefer city/town/village if let Some(city) = addr.city.or(addr.town).or(addr.village) { parts.push(city); } // Add state if available if let Some(state) = addr.state { parts.push(state); } if !parts.is_empty() { log::info!("Reverse geocoded {}, {} -> {}", lat, lon, parts.join(", ")); return Some(parts.join(", ")); } } // Fallback to display_name if structured address not available if let Some(ref display_name) = data.display_name { log::info!( "Reverse geocoded {}, {} -> {} (display_name)", lat, lon, display_name ); } else { log::warn!("Geocoding returned no address data for {}, {}", lat, lon); } data.display_name } } #[cfg(test)] mod tests { use super::*; use crate::ai::ollama::{ToolCall, ToolCallFunction}; #[test] fn build_tool_definitions_drops_gated_tools() { let opts = ToolGateOpts { has_vision: false, apollo_enabled: false, daily_summaries_present: false, calendar_present: false, location_history_present: false, faces_present: false, allow_agent_corrections: false, }; let tools = InsightGenerator::build_tool_definitions(opts); let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect(); // Always-on tools survive. assert!(names.contains(&"search_messages")); assert!(names.contains(&"get_sms_messages")); assert!(names.contains(&"get_file_tags")); assert!(names.contains(&"reverse_geocode")); assert!(names.contains(&"get_current_datetime")); assert!(names.contains(&"recall_entities")); assert!(names.contains(&"recall_facts_for_photo")); assert!(names.contains(&"recall_facts_for_entity")); assert!(names.contains(&"store_entity")); assert!(names.contains(&"store_fact")); // Gated tools are absent. assert!(!names.contains(&"describe_photo")); assert!(!names.contains(&"get_personal_place_at")); assert!(!names.contains(&"search_rag")); assert!(!names.contains(&"get_calendar_events")); assert!(!names.contains(&"get_location_history")); assert!(!names.contains(&"get_faces_in_photo")); // Agent-correction tools are absent without the gate. assert!(!names.contains(&"update_fact")); assert!(!names.contains(&"supersede_fact")); } #[test] fn build_tool_definitions_includes_gated_tools_when_present() { let opts = ToolGateOpts { has_vision: true, apollo_enabled: true, daily_summaries_present: true, calendar_present: true, location_history_present: true, faces_present: true, allow_agent_corrections: true, }; let tools = InsightGenerator::build_tool_definitions(opts); let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect(); assert!(names.contains(&"describe_photo")); assert!(names.contains(&"get_personal_place_at")); assert!(names.contains(&"search_rag")); assert!(names.contains(&"get_calendar_events")); assert!(names.contains(&"get_location_history")); assert!(names.contains(&"get_faces_in_photo")); assert!(names.contains(&"update_fact")); assert!(names.contains(&"supersede_fact")); } fn place(name: &str, description: &str) -> ApolloPlace { ApolloPlace { id: 1, name: name.to_string(), description: description.to_string(), lat: 0.0, lon: 0.0, radius_m: 200, category: None, } } #[test] fn compose_location_with_apollo_and_nominatim_and_description() { let s = compose_location_string( Some(place("Home", "House in Cambridge")), Some("Cambridge, MA".to_string()), 42.36, -71.06, ); assert_eq!(s, "Home (House in Cambridge) — near Cambridge, MA"); } #[test] fn compose_location_apollo_without_description_drops_parenthetical() { let s = compose_location_string( Some(place("Home", "")), Some("Cambridge, MA".to_string()), 42.36, -71.06, ); assert_eq!(s, "Home — near Cambridge, MA"); } #[test] fn compose_location_nominatim_only() { let s = compose_location_string(None, Some("Cambridge, MA".to_string()), 42.36, -71.06); assert_eq!(s, "Cambridge, MA"); } #[test] fn compose_location_apollo_only_no_description() { let s = compose_location_string(Some(place("Home", "")), None, 42.36, -71.06); assert_eq!(s, "Home"); } #[test] fn compose_location_falls_back_to_coordinates() { let s = compose_location_string(None, None, 42.3601, -71.0589); assert_eq!(s, "42.3601, -71.0589"); } #[test] fn combine_contexts_includes_tags_section_when_tags_present() { let result = InsightGenerator::combine_contexts( None, None, None, None, Some("vacation, hiking, mountains".to_string()), ); assert!(result.contains("## Tags"), "Should include Tags section"); assert!( result.contains("vacation, hiking, mountains"), "Should include tag names" ); } #[test] fn combine_contexts_omits_tags_section_when_no_tags() { let result = InsightGenerator::combine_contexts( Some("some messages".to_string()), None, None, None, None, // no tags ); assert!( !result.contains("## Tags"), "Should not include Tags section when None" ); assert!( result.contains("## Messages"), "Should still include Messages" ); } #[test] fn combine_contexts_returns_no_context_message_when_all_none() { let result = InsightGenerator::combine_contexts(None, None, None, None, None); assert_eq!(result, "No additional context available"); } // These tests assert the shape of the strings returned by the tool // implementations above. If a tool's output format changes, update the // tool AND the corresponding arm of `summarize_tool_result` — these // tests exist to make that coupling loud. #[test] fn summarize_errors_uniformly() { assert_eq!( InsightGenerator::summarize_tool_result("search_rag", "Error searching RAG: boom"), "error" ); assert_eq!( InsightGenerator::summarize_tool_result( "get_sms_messages", "Error fetching SMS messages: timeout" ), "error" ); } #[test] fn summarize_empty_results_uniformly() { assert_eq!( InsightGenerator::summarize_tool_result("search_rag", "No relevant messages found."), "empty (pivoted)" ); assert_eq!( InsightGenerator::summarize_tool_result("get_sms_messages", "No messages found."), "empty (pivoted)" ); assert_eq!( InsightGenerator::summarize_tool_result( "reverse_geocode", "Could not resolve coordinates to a place name." ), "empty (pivoted)" ); assert_eq!( InsightGenerator::summarize_tool_result( "recall_facts_for_photo", "No knowledge facts found for this photo." ), "empty (pivoted)" ); } #[test] fn summarize_found_count_per_tool() { assert_eq!( InsightGenerator::summarize_tool_result( "get_sms_messages", "Found 7 messages:\n[2023-08-15 10:00] Sarah: hi" ), "7 messages" ); assert_eq!( InsightGenerator::summarize_tool_result( "search_messages", "Found 3 messages (mode: hybrid):\n\n[2023-08-15] Sarah — hi" ), "3 messages" ); assert_eq!( InsightGenerator::summarize_tool_result( "get_calendar_events", "Found 2 calendar events:\n[2023-08-15 10:00] Wedding" ), "2 events" ); assert_eq!( InsightGenerator::summarize_tool_result( "get_location_history", "Found 5 location records:\n[2023-08-15 10:00] 39.0, -120.0" ), "5 location records" ); } #[test] fn found_header_labels_truncation_honestly() { // Truncated: total exceeds what's listed below the header. assert_eq!( InsightGenerator::found_header(312, 60, "messages"), "Found 312 messages (showing first 60):" ); // Not truncated: plain header, no annotation noise. assert_eq!( InsightGenerator::found_header(7, 7, "messages"), "Found 7 messages:" ); assert_eq!( InsightGenerator::found_header(40, 20, "location records"), "Found 40 location records (showing first 20):" ); } #[test] fn summarize_parses_truncated_found_header() { // The "(showing first K)" annotation must not break the few-shot // trajectory summarizer's "Found N" count parsing. assert_eq!( InsightGenerator::summarize_tool_result( "get_sms_messages", "Found 312 messages (showing first 60):\n[2023-08-15 10:00] Sarah: hi" ), "312 messages" ); } fn make_search_hit( id: i64, contact: &str, body: &str, snippet: Option<&str>, type_: i32, ) -> SmsSearchHit { SmsSearchHit { message_id: id, contact_name: contact.to_string(), contact_address: "+15551234567".to_string(), body: body.to_string(), date: 1_700_000_000 + id * 86_400, type_, similarity_score: None, snippet: snippet.map(|s| s.to_string()), } } #[test] fn format_search_hits_falls_back_to_body_when_no_snippet() { // fts5 mode often returns no snippet for messages that didn't need // excerpting (whole body short, or semantic-only hit in hybrid). let hit = make_search_hit(1, "Sarah", "see you at the lake tomorrow", None, 1); let out = InsightGenerator::format_search_hits(&[hit], "fts5", false); assert!(out.starts_with("Found 1 messages (mode: fts5")); assert!(out.contains("see you at the lake tomorrow")); // Received message: contact is the sender. assert!(out.contains("Sarah →")); assert!(!out.contains("date-filtered")); } #[test] fn format_search_hits_labels_sent_direction() { // Sent messages must name the recipient — results can span multiple // conversations, and a sender-only label left them unattributable. let hit = make_search_hit(5, "Sarah", "on my way", None, 2); let out = InsightGenerator::format_search_hits(&[hit], "fts5", false); assert!(out.contains("→ Sarah —")); } #[test] fn format_search_hits_prefers_snippet_over_body_and_strips_marks() { let hit = make_search_hit( 2, "Sarah", "long body text that should be ignored once the server returns a focused snippet", Some("…at the lake tomorrow…"), 1, ); let out = InsightGenerator::format_search_hits(&[hit], "hybrid", true); // Snippet wins over body, tags stripped. assert!(out.contains("at the lake tomorrow")); assert!(!out.contains("")); assert!(!out.contains("")); assert!(!out.contains("long body text")); // Date-filter flag flows through to the header. assert!(out.contains("date-filtered")); } #[test] fn format_search_hits_surfaces_mms_attachment_only_matches() { // Regression guard: pre-snippet, MMS rows that matched via // message_parts_fts (filename / part text) had an empty `body` // and rendered as a blank preview to the LLM. let hit = make_search_hit(3, "Mom", "", Some("birthday_cake.jpg"), 1); let out = InsightGenerator::format_search_hits(&[hit], "fts5", false); assert!(out.contains("birthday_cake.jpg")); assert!(!out.contains("")); assert!(out.contains("Mom →")); } #[test] fn format_search_hits_empty_snippet_falls_back_to_body() { let hit = make_search_hit(4, "Dad", "fallback body", Some(""), 1); let out = InsightGenerator::format_search_hits(&[hit], "fts5", false); assert!(out.contains("fallback body")); } #[test] fn strip_mark_tags_handles_common_patterns() { assert_eq!( InsightGenerator::strip_mark_tags("plain text"), "plain text" ); assert_eq!( InsightGenerator::strip_mark_tags("…the lake…"), "…the lake…" ); // FTS5 highlights every match — multiple marks per snippet are normal. assert_eq!( InsightGenerator::strip_mark_tags("dinner at tahoe"), "dinner at tahoe" ); } #[test] fn summarize_search_rag_counts_hits() { let raw = "[2023-08-15] Sarah: venue confirmed\n\n[2023-08-14] Mom: travel plans\n\n[2023-08-13] Dad: weather"; assert_eq!( InsightGenerator::summarize_tool_result("search_rag", raw), "3 rag hits" ); } #[test] fn summarize_get_file_tags() { assert_eq!( InsightGenerator::summarize_tool_result("get_file_tags", "wedding, tahoe, 2023"), "3 tags" ); } #[test] fn summarize_describe_photo_truncates() { let raw = "A wedding ceremony at Lake Tahoe with about 40 guests seated in rows facing a lakeside arch decorated with white flowers."; let out = InsightGenerator::summarize_tool_result("describe_photo", raw); assert!(out.starts_with("described: \"")); assert!(out.contains("A wedding ceremony at Lake Tahoe")); assert!(out.ends_with("...\"")); } #[test] fn summarize_reverse_geocode_returns_place() { let out = InsightGenerator::summarize_tool_result("reverse_geocode", "South Lake Tahoe, CA, USA"); assert_eq!(out, "place: South Lake Tahoe, CA, USA"); } #[test] fn summarize_recall_entities_counts_lines() { let raw = "Known entities:\n- Sarah (person)\n- Tahoe (place)\n- Wedding 2023 (event)"; assert_eq!( InsightGenerator::summarize_tool_result("recall_entities", raw), "3 entities" ); } #[test] fn summarize_recall_facts_counts_lines() { let raw = "Knowledge for this photo:\n- Sarah: college friend\n- Tahoe: vacation spot"; assert_eq!( InsightGenerator::summarize_tool_result("recall_facts_for_photo", raw), "2 facts" ); } #[test] fn summarize_store_entity_extracts_id() { assert_eq!( InsightGenerator::summarize_tool_result( "store_entity", "Entity stored: ID:42 | person | Sarah | confidence:0.80" ), "stored id=42" ); } #[test] fn summarize_store_fact_extracts_id() { assert_eq!( InsightGenerator::summarize_tool_result( "store_fact", "Stored new fact: ID:17 | confidence:0.60" ), "stored id=17" ); assert_eq!( InsightGenerator::summarize_tool_result( "store_fact", "Corroborated existing fact: ID:17 | confidence:0.85" ), "stored id=17" ); } #[test] fn summarize_current_datetime() { assert_eq!( InsightGenerator::summarize_tool_result( "get_current_datetime", "Current date/time: 2024-01-15 12:00:00 PST (Monday)" ), "time noted" ); } #[test] fn summarize_unknown_tool_falls_back_to_char_count() { let out = InsightGenerator::summarize_tool_result("never_heard_of_it", "some output"); assert_eq!(out, "11 chars"); } #[test] fn build_system_content_uses_custom_prompt_verbatim_for_identity() { let out = InsightGenerator::build_system_content( Some("You are a journal writer in first person, warm and reflective."), None, "", 6, ); assert!( out.starts_with("You are a journal writer in first person, warm and reflective."), "custom prompt must lead the system content; got: {}", out.chars().take(200).collect::<String>(), ); assert!( !out.contains("personal photo memory assistant"), "framework identity must not leak when custom prompt is supplied" ); assert!(out.contains("Tool-use guidance")); assert!(out.contains("budget of 6")); } #[test] fn build_system_content_uses_neutral_default_when_no_custom() { let out = InsightGenerator::build_system_content(None, None, "", 6); assert!(out.contains("reconstructing a memory from a photo")); assert!(!out.contains("personal photo memory assistant")); assert!(out.contains("Tool-use guidance")); } #[test] fn build_system_content_includes_fewshot_and_owner_id() { let owner = "\n\nYour identity in the knowledge store: Alice (entity ID: 7)."; let fewshot = "## Examples\n\n### Example 1\n...\n\n---\n\n"; let out = InsightGenerator::build_system_content(None, Some(owner), fewshot, 6); assert!(out.contains("Alice (entity ID: 7)")); assert!(out.contains("## Examples")); } #[test] fn render_fewshot_empty_returns_empty_string() { assert!(InsightGenerator::render_fewshot_examples(&[]).is_empty()); } #[test] fn render_single_trajectory_walks_tool_calls_in_order() { let arguments = serde_json::json!({ "query": "wedding", "date": "2023-08-15" }); let msgs = vec![ ChatMessage::system("ignored"), ChatMessage::user("Photo file path: /photos/img.jpg\nDate taken: August 15, 2023"), ChatMessage { role: "assistant".to_string(), content: String::new(), tool_calls: Some(vec![ToolCall { function: ToolCallFunction { name: "search_rag".to_string(), arguments, }, id: None, }]), images: None, }, ChatMessage::tool_result("No relevant messages found."), ChatMessage { role: "assistant".to_string(), content: "Final title\n\nFinal body.".to_string(), tool_calls: None, images: None, }, ]; let out = InsightGenerator::render_single_trajectory(&msgs); assert!(out.contains("Input:")); assert!(out.contains("/photos/img.jpg")); assert!(out.contains("1. search_rag(")); assert!(out.contains("query=\"wedding\"")); assert!(out.contains("-> empty (pivoted)")); assert!(out.contains("Final insight: Final title")); } #[test] fn parse_title_body_standard_format() { let (t, b) = parse_title_body("Title: Summer at the Lake\n\nWe spent the afternoon..."); assert_eq!(t, "Summer at the Lake"); assert_eq!(b, "We spent the afternoon..."); } #[test] fn parse_title_body_single_newline() { let (t, b) = parse_title_body("Title: Morning Walk\nThe sun was rising..."); assert_eq!(t, "Morning Walk"); assert_eq!(b, "The sun was rising..."); } #[test] fn parse_title_body_lowercase_prefix() { let (t, b) = parse_title_body("title: Garden Party\n\nEveryone gathered..."); assert_eq!(t, "Garden Party"); assert_eq!(b, "Everyone gathered..."); } #[test] fn parse_title_body_strips_bold_wrapper() { let (t, b) = parse_title_body("**Title: A Day in the Woods**\n\nWe hiked the ridge trail."); assert_eq!(t, "A Day in the Woods"); assert_eq!(b, "We hiked the ridge trail."); } #[test] fn parse_title_body_strips_bold_label_only() { // Bold around just the label: "**Title:** X" let (t, b) = parse_title_body("**Title:** Garden Party\n\nEveryone gathered..."); assert_eq!(t, "Garden Party"); assert_eq!(b, "Everyone gathered..."); } #[test] fn parse_title_body_strips_heading_hashes() { let (t, b) = parse_title_body("## Title: Morning Walk\nThe sun was rising..."); assert_eq!(t, "Morning Walk"); assert_eq!(b, "The sun was rising..."); } #[test] fn parse_title_body_fallback_first_sentence() { let (t, b) = parse_title_body("A warm summer day. We gathered at the park for a picnic."); assert_eq!(t, "A warm summer day"); assert_eq!(b, "We gathered at the park for a picnic."); } #[test] fn parse_title_body_fallback_truncate() { let input = "A single long paragraph with no periods or title prefix that just keeps going on and on"; let (t, b) = parse_title_body(input); assert!(t.len() <= 60); assert_eq!(b, input); } /// Regression: hybrid mode was leaking the OpenRouter model override /// into the local llamacpp client, causing describe_image to send /// e.g. "google/gemini-3-flash-preview" to llama-swap (which 400s). #[test] fn resolve_backend_hybrid_does_not_leak_model_to_local_llamacpp() { use crate::ai::llamacpp::LlamaCppClient; let mut base = LlamaCppClient::new(Some("http://localhost:9292/v1".into()), Some("chat".into())); base.set_vision_model("vision".into()); base.set_embedding_model("embed".into()); let openrouter_model = "google/gemini-3-flash-preview"; let overrides_model: Option<String> = Some(openrouter_model.into()); let is_hybrid = true; // Replicate the resolve_backend local-client construction // (lines ~3686-3695 of this file). let mut lc = base.clone(); if !is_hybrid && let Some(ref m) = overrides_model { lc.primary_model = m.clone(); lc.set_vision_model(m.clone()); } // In hybrid mode the local client must keep its configured slots. assert_eq!( lc.vision_model, "vision", "hybrid mode must not override vision_model with OpenRouter model" ); assert_eq!( lc.primary_model, "chat", "hybrid mode must not override primary_model with OpenRouter model" ); assert_eq!(lc.embedding

use anyhow::Result; use base64::Engine as _; use chrono::{DateTime, Local, NaiveDate, Utc}; use image::ImageFormat; use opentelemetry::KeyValue; use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer}; use serde::Deserialize; use std::fs::File; use std::io::Cursor; use std::sync::{Arc, Mutex}; use crate::ai::apollo_client::{ApolloClient, ApolloPlace}; use crate::ai::backend::{BackendKind, ResolvedBackend, SamplingOverrides}; use crate::ai::llamacpp::LlamaCppClient; use crate::ai::llm_client::LlmClient; use crate::ai::ollama::{ChatMessage, OllamaClient, Tool}; use crate::ai::openrouter::OpenRouterClient; use crate::ai::sms_client::{SmsApiClient, SmsSearchHit, SmsSearchParams}; use crate::ai::user_display_name; use crate::database::models::InsertPhotoInsight; use crate::database::{ CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao, SearchHistoryDao, }; use crate::libraries::Library; use crate::memories::extract_date_from_filename; use crate::otel::global_tracer; use crate::tags::TagDao; use crate::utils::{earliest_fs_time, normalize_path}; /// Max location records rendered by `tool_get_location_history`. The DAO /// query is range-bounded, not limited, so the tool caps the rendered list /// and labels the truncation via `found_header`. const LOCATION_HISTORY_DISPLAY_LIMIT: usize = 20; /// Strip common markdown decoration (bold/italic markers, heading hashes, /// backticks, quotes) from both ends of a model-emitted title. Models wrap /// the line despite the prompt: `**Title: A Day in the Woods**`, /// `## Title: ...`, `"..."`. pub(crate) fn strip_title_markdown(s: &str) -> &str { s.trim_matches(|c: char| matches!(c, '*' | '_' | '`' | '#' | '"') || c.is_whitespace()) } /// Parse a "Title: ...\n\n" response into (title, body). /// Falls back to the first sentence as the title if the model didn't /// follow the format. pub(crate) fn parse_title_body(raw: &str) -> (String, String) { let trimmed = raw.trim(); // Try "Title: \n<body>", tolerating markdown decoration around // the title line. let (first_line, rest) = match trimmed.find('\n') { Some(pos) => (&trimmed[..pos], trimmed[pos..].trim()), None => (trimmed, ""), }; let first_line = strip_title_markdown(first_line); if let Some(t) = first_line .strip_prefix("Title:") .or_else(|| first_line.strip_prefix("title:")) { let title = strip_title_markdown(t); if !title.is_empty() && !rest.is_empty() { return (title.to_string(), rest.to_string()); } } // Fallback: first sentence (up to first `. ` or `.\n`) becomes the title if let Some(pos) = trimmed.find(". ").or_else(|| trimmed.find(".\n")) { let title = strip_title_markdown(&trimmed[..pos]); let body = trimmed[pos + 1..].trim(); if title.len() <= 100 && !body.is_empty() { return (title.to_string(), body.to_string()); } } // Last resort: truncate to 60 chars for title, full text as body let title: String = trimmed.chars().take(60).collect(); let title = strip_title_markdown(title.trim_end()).to_string(); (title, trimmed.to_string()) } /// Combine an optional personal Apollo Place with an optional Nominatim /// reverse-geocoded city, falling back to bare coordinates when neither /// resolves. Free function so we can test it cheaply without spinning up /// the whole InsightGenerator. fn compose_location_string( apollo: Option<ApolloPlace>, nominatim: Option<String>, lat: f64, lon: f64, ) -> String { match (apollo, nominatim) { (Some(p), Some(n)) if !p.description.is_empty() => { format!("{} ({}) — near {}", p.name, p.description, n) } (Some(p), Some(n)) => format!("{} — near {}", p.name, n), (Some(p), None) if !p.description.is_empty() => format!("{} ({})", p.name, p.description), (Some(p), None) => p.name, (None, Some(n)) => n, (None, None) => format!("{lat:.4}, {lon:.4}"), } } #[derive(Deserialize)] struct NominatimResponse { display_name: Option<String>, address: Option<NominatimAddress>, } #[derive(Deserialize)] struct NominatimAddress { city: Option<String>, town: Option<String>, village: Option<String>, state: Option<String>, } #[derive(Clone)] pub struct InsightGenerator { ollama: OllamaClient, /// Optional OpenRouter client, used when `backend=hybrid` is requested. /// `None` when `OPENROUTER_API_KEY` is not configured. openrouter: Option<Arc<OpenRouterClient>>, /// Optional llama-swap client, used when `backend=llamacpp` is requested. /// `None` when `LLAMA_SWAP_URL` is not configured. llamacpp: Option<Arc<LlamaCppClient>>, sms_client: SmsApiClient, /// Optional integration with Apollo's user-defined Places. When the /// integration is disabled (`APOLLO_API_BASE_URL` unset), every /// query returns empty and the legacy Nominatim path is used as-is. apollo_client: ApolloClient, insight_dao: Arc<Mutex<Box<dyn InsightDao>>>, exif_dao: Arc<Mutex<Box<dyn ExifDao>>>, daily_summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>>, // Google Takeout data sources calendar_dao: Arc<Mutex<Box<dyn CalendarEventDao>>>, location_dao: Arc<Mutex<Box<dyn LocationHistoryDao>>>, search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>>, tag_dao: Arc<Mutex<Box<dyn TagDao>>>, // Face detections (used by the get_faces_in_photo agentic tool) face_dao: Arc<Mutex<Box<dyn crate::faces::FaceDao>>>, // Knowledge memory knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>, // Persona settings (looked up by recall_facts_for_photo to honour // the per-persona "reviewed-only" strict-mode toggle). persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>>, libraries: Vec<Library>, } /// Per-call gating flags for `build_tool_definitions`. Tools whose backing /// data is empty (or whose env-var guard is unset) are dropped from the /// catalog so the LLM doesn't reach for a tool that always returns "No /// results found." — that wastes iteration budget. #[derive(Debug, Clone, Copy, Default)] pub struct ToolGateOpts { pub has_vision: bool, pub apollo_enabled: bool, pub daily_summaries_present: bool, pub calendar_present: bool, pub location_history_present: bool, pub faces_present: bool, /// Per-persona toggle from migration 2026-05-10-000500. When /// false the agent's update_fact / supersede_fact tools aren't /// in the catalog at all — defense-in-depth so a hallucinated /// tool name still 404s, and the agent doesn't waste iterations /// trying corrections it isn't allowed to do. pub allow_agent_corrections: bool, } impl InsightGenerator { pub fn new( ollama: OllamaClient, openrouter: Option<Arc<OpenRouterClient>>, llamacpp: Option<Arc<LlamaCppClient>>, sms_client: SmsApiClient, apollo_client: ApolloClient, insight_dao: Arc<Mutex<Box<dyn InsightDao>>>, exif_dao: Arc<Mutex<Box<dyn ExifDao>>>, daily_summary_dao: Arc<Mutex<Box<dyn DailySummaryDao>>>, calendar_dao: Arc<Mutex<Box<dyn CalendarEventDao>>>, location_dao: Arc<Mutex<Box<dyn LocationHistoryDao>>>, search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>>, tag_dao: Arc<Mutex<Box<dyn TagDao>>>, face_dao: Arc<Mutex<Box<dyn crate::faces::FaceDao>>>, knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>, persona_dao: Arc<Mutex<Box<dyn crate::database::PersonaDao>>>, libraries: Vec<Library>, ) -> Self { Self { ollama, openrouter, llamacpp, sms_client, apollo_client, insight_dao, exif_dao, daily_summary_dao, calendar_dao, location_dao, search_dao, tag_dao, face_dao, knowledge_dao, persona_dao, libraries, } } /// Accessor for the insight DAO (used by async job completion to /// look up the stored insight id). pub fn insight_dao(&self) -> &Arc<Mutex<Box<dyn InsightDao>>> { &self.insight_dao } /// Whether the optional Apollo Places integration is wired up. Drives /// tool-definition gating (no point offering `get_personal_place_at` /// when Apollo is unreachable) — exposed publicly so `insight_chat` /// can call `build_tool_definitions` with the same flag. pub fn apollo_enabled(&self) -> bool { self.apollo_client.is_enabled() } /// Compute the per-call tool gate options by probing each backing /// table for presence. `daily_summaries_present` uses a `LIMIT 1` /// existence probe; `calendar_present` and `location_history_present` /// use the existing `get_event_count` / `get_location_count` /// methods (small enough that a full `COUNT(*)` is fine). Meant to /// be called once per chat turn / generation. `has_vision` is /// supplied by the caller because it depends on the model selected /// for this turn, not on persistent state. /// /// Also resolves the per-persona `allow_agent_corrections` flag. /// Pass `Some((user_id, persona_id))` when generating in a persona /// context (every chat turn and agentic generation does); pass /// `None` for callers that don't have one, which defaults the gate /// to closed — the conservative posture. pub fn current_gate_opts_for_persona( &self, has_vision: bool, persona: Option<(i32, &str)>, ) -> ToolGateOpts { let cx = opentelemetry::Context::new(); let calendar_present = { let mut dao = self .calendar_dao .lock() .expect("Unable to lock CalendarEventDao"); dao.get_event_count(&cx).map(|n| n > 0).unwrap_or(false) }; let location_history_present = { let mut dao = self .location_dao .lock() .expect("Unable to lock LocationHistoryDao"); dao.get_location_count(&cx).map(|n| n > 0).unwrap_or(false) }; let daily_summaries_present = { let mut dao = self .daily_summary_dao .lock() .expect("Unable to lock DailySummaryDao"); dao.has_any_summaries(&cx).unwrap_or(false) }; let faces_present = { let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao"); dao.has_any_faces(&cx).unwrap_or(false) }; let allow_agent_corrections = persona .and_then(|(uid, pid)| { let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(&cx, uid, pid).ok().flatten() }) .map(|p| p.allow_agent_corrections) .unwrap_or(false); ToolGateOpts { has_vision, apollo_enabled: self.apollo_enabled(), daily_summaries_present, calendar_present, location_history_present, faces_present, allow_agent_corrections, } } /// Resolve the stored system prompt for `(user_id, persona_id)` from /// the persona store. Returns `None` when the persona row doesn't /// exist or its prompt is blank — callers fall back to their own /// default. Used for server-side persona resolution: a request that /// carries `persona_id` but no explicit `system_prompt` should speak /// in the persona's stored voice, not the neutral default. pub(crate) fn persona_system_prompt(&self, user_id: i32, persona_id: &str) -> Option<String> { let cx = opentelemetry::Context::new(); let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(&cx, user_id, persona_id) .ok() .flatten() .map(|p| p.system_prompt) .filter(|s| !s.trim().is_empty()) } /// Resolve `rel_path` against the configured libraries, returning the /// first root under which the file exists. Insights may be generated /// for any library — the generator itself doesn't know which — so we /// probe each root rather than trust a single `base_path`. pub(crate) fn resolve_full_path(&self, rel_path: &str) -> Option<std::path::PathBuf> { use std::path::Path; for lib in &self.libraries { let candidate = Path::new(&lib.root_path).join(rel_path); if candidate.exists() { return Some(candidate); } } None } /// Extract contact name from file path /// e.g., "Sarah/img.jpeg" -> Some("Sarah") /// e.g., "img.jpeg" -> None fn extract_contact_from_path(file_path: &str) -> Option<String> { use std::path::Path; let path = Path::new(file_path); let components: Vec<_> = path.components().collect(); // If path has at least 2 components (directory + file), extract first directory if components.len() >= 2 && let Some(component) = components.first() && let Some(os_str) = component.as_os_str().to_str() { return Some(os_str.to_string()); } None } /// Look up the EXIF row for a photo. Returns `None` when no row /// exists yet (file watcher hasn't reached it) or the DAO call /// fails. Used by callers — including the chat-bootstrap path — /// that need a few specific fields (date_taken, GPS) without /// duplicating DAO plumbing. pub(crate) fn fetch_exif(&self, file_path: &str) -> Option<crate::database::models::ImageExif> { let cx = opentelemetry::Context::current(); let mut dao = self.exif_dao.lock().expect("Unable to lock ExifDao"); dao.get_exif(&cx, file_path).ok().flatten() } /// Load image file, resize it, and encode as base64 for vision models /// Resizes to max 1024px on longest edge to reduce context usage pub(crate) fn load_image_as_base64(&self, file_path: &str) -> Result<String> { use image::imageops::FilterType; let full_path = self.resolve_full_path(file_path).ok_or_else(|| { anyhow::anyhow!( "File '{}' not found under any configured library", file_path ) })?; log::debug!("Loading image for vision model: {:?}", full_path); // Open and decode the image let img = image::open(&full_path) .map_err(|e| anyhow::anyhow!("Failed to open image file: {}", e))?; let (original_width, original_height) = (img.width(), img.height()); // Resize to max 1024px on longest edge let resized = img.resize(1024, 1024, FilterType::Lanczos3); log::debug!( "Resized image from {}x{} to {}x{}", original_width, original_height, resized.width(), resized.height() ); // Encode as JPEG at 85% quality let mut buffer = Vec::new(); let mut cursor = Cursor::new(&mut buffer); resized .write_to(&mut cursor, ImageFormat::Jpeg) .map_err(|e| anyhow::anyhow!("Failed to encode image as JPEG: {}", e))?; let base64_string = base64::engine::general_purpose::STANDARD.encode(&buffer); log::debug!( "Encoded image as base64 ({} bytes -> {} chars)", buffer.len(), base64_string.len() ); Ok(base64_string) } /// Find relevant messages using RAG, excluding recent messages (>30 days ago) /// This prevents RAG from returning messages already in the immediate time window async fn find_relevant_messages_rag_historical( &self, parent_cx: &opentelemetry::Context, date: chrono::NaiveDate, location: Option<&str>, contact: Option<&str>, topics: Option<&[String]>, limit: usize, extra_context: Option<&str>, ) -> Result<Vec<String>> { let tracer = global_tracer(); let span = tracer.start_with_context("ai.rag.filter_historical", parent_cx); let filter_cx = parent_cx.with_span(span); filter_cx .span() .set_attribute(KeyValue::new("date", date.to_string())); filter_cx .span() .set_attribute(KeyValue::new("limit", limit as i64)); filter_cx .span() .set_attribute(KeyValue::new("exclusion_window_days", 30)); if let Some(t) = topics { filter_cx .span() .set_attribute(KeyValue::new("topics", t.join(", "))); } let query_results = self .find_relevant_messages_rag(date, location, contact, topics, limit * 2, extra_context) .await?; filter_cx.span().set_attribute(KeyValue::new( "rag_results_count", query_results.len() as i64, )); // Filter out messages from within 30 days of the photo date let photo_timestamp = date .and_hms_opt(12, 0, 0) .ok_or_else(|| anyhow::anyhow!("Invalid date"))? .and_utc() .timestamp(); let exclusion_window = 30 * 86400; // 30 days in seconds let historical_only: Vec<String> = query_results .into_iter() .filter(|msg| { // Extract date from formatted daily summary "[2024-08-15] Contact ..." if let Some(bracket_end) = msg.find(']') && let Some(date_str) = msg.get(1..bracket_end) { // Parse just the date (daily summaries don't have time) if let Ok(msg_date) = chrono::NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { let msg_timestamp = msg_date .and_hms_opt(12, 0, 0) .unwrap() .and_utc() .timestamp(); let time_diff = (photo_timestamp - msg_timestamp).abs(); return time_diff > exclusion_window; } } false }) .take(limit) .collect(); log::info!( "Found {} historical messages (>30 days from photo date)", historical_only.len() ); filter_cx.span().set_attribute(KeyValue::new( "historical_results_count", historical_only.len() as i64, )); filter_cx.span().set_status(Status::Ok); Ok(historical_only) } /// Find relevant daily summaries using RAG (semantic search) /// Returns formatted daily summary strings for LLM context async fn find_relevant_messages_rag( &self, date: chrono::NaiveDate, location: Option<&str>, contact: Option<&str>, topics: Option<&[String]>, limit: usize, extra_context: Option<&str>, ) -> Result<Vec<String>> { let tracer = global_tracer(); let current_cx = opentelemetry::Context::current(); let mut span = tracer.start_with_context("ai.rag.search_daily_summaries", ¤t_cx); span.set_attribute(KeyValue::new("date", date.to_string())); span.set_attribute(KeyValue::new("limit", limit as i64)); if let Some(loc) = location { span.set_attribute(KeyValue::new("location", loc.to_string())); } if let Some(c) = contact { span.set_attribute(KeyValue::new("contact", c.to_string())); } // Build query string - prioritize topics if available (semantically meaningful) let base_query = if let Some(topics) = topics { if !topics.is_empty() { // Use topics for semantic search - these are actual content keywords let topic_str = topics.join(", "); if let Some(c) = contact { format!("Conversations about {} with {}", topic_str, c) } else { format!("Conversations about {}", topic_str) } } else { // Fallback to metadata-based query Self::build_metadata_query(date, location, contact) } } else { // Fallback to metadata-based query Self::build_metadata_query(date, location, contact) }; let query = if let Some(extra) = extra_context { format!("{}. {}", base_query, extra) } else { base_query }; span.set_attribute(KeyValue::new("query", query.clone())); // Create context with this span for child operations let search_cx = current_cx.with_span(span); log::info!("========================================"); log::info!("RAG QUERY: {}", query); log::info!("========================================"); // Generate embedding for the query via the configured local backend // (`LLM_BACKEND` switch). Must match the backend that populated the // daily-summary embeddings or similarity search will be garbage. let query_embedding = crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), &query).await?; // Search for similar daily summaries with time-based weighting // This prioritizes summaries temporally close to the query date let mut summary_dao = self .daily_summary_dao .lock() .expect("Unable to lock DailySummaryDao"); let date_str = date.format("%Y-%m-%d").to_string(); let similar_summaries = summary_dao .find_similar_summaries_with_time_weight(&search_cx, &query_embedding, &date_str, limit) .map_err(|e| anyhow::anyhow!("Failed to find similar summaries: {:?}", e))?; log::info!( "Found {} relevant daily summaries via RAG", similar_summaries.len() ); search_cx.span().set_attribute(KeyValue::new( "results_count", similar_summaries.len() as i64, )); // Format daily summaries for LLM context let formatted = similar_summaries .into_iter() .map(|s| { format!( "[{}] {} ({} messages):\n{}", s.date, s.contact, s.message_count, s.summary ) }) .collect(); search_cx.span().set_status(Status::Ok); Ok(formatted) } /// Semantic search over daily summaries for the agentic `search_rag` /// tool. Embeds the caller's query as-is (no metadata boilerplate) and /// only applies time weighting when an anchor date is provided — /// without one, results rank purely by similarity across all time. async fn search_summaries_semantic( &self, query: &str, date: Option<chrono::NaiveDate>, limit: usize, ) -> Result<Vec<String>> { let tracer = global_tracer(); let current_cx = opentelemetry::Context::current(); let mut span = tracer.start_with_context("ai.rag.search_daily_summaries", ¤t_cx); span.set_attribute(KeyValue::new("query", query.to_string())); span.set_attribute(KeyValue::new("limit", limit as i64)); span.set_attribute(KeyValue::new("time_weighted", date.is_some())); if let Some(d) = date { span.set_attribute(KeyValue::new("date", d.to_string())); } let search_cx = current_cx.with_span(span); log::info!("RAG QUERY: {} (anchor date: {:?})", query, date); // Must use the same backend that populated the daily-summary // embeddings or similarity search is garbage (see embed_one docs). let query_embedding = crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), query).await?; let mut summary_dao = self .daily_summary_dao .lock() .expect("Unable to lock DailySummaryDao"); let similar_summaries = match date { Some(d) => summary_dao.find_similar_summaries_with_time_weight( &search_cx, &query_embedding, &d.format("%Y-%m-%d").to_string(), limit, ), None => summary_dao.find_similar_summaries(&search_cx, &query_embedding, limit), } .map_err(|e| anyhow::anyhow!("Failed to find similar summaries: {:?}", e))?; search_cx.span().set_attribute(KeyValue::new( "results_count", similar_summaries.len() as i64, )); search_cx.span().set_status(Status::Ok); Ok(similar_summaries .into_iter() .map(|s| { format!( "[{}] {} ({} messages):\n{}", s.date, s.contact, s.message_count, s.summary ) }) .collect()) } /// Build a metadata-based query (fallback when no topics available) fn build_metadata_query( date: chrono::NaiveDate, location: Option<&str>, contact: Option<&str>, ) -> String { let mut query_parts = Vec::new(); // Add temporal context query_parts.push(format!("On {}", date.format("%B %d, %Y"))); // Add location if available if let Some(loc) = location { query_parts.push(format!("at {}", loc)); } // Add contact context if available if let Some(c) = contact { query_parts.push(format!("conversation with {}", c)); } // Add day of week for temporal context let weekday = date.format("%A"); query_parts.push(format!("it was a {}", weekday)); query_parts.join(", ") } /// Haversine distance calculation for GPS proximity (in kilometers) fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 { const R: f64 = 6371.0; // Earth radius in km let d_lat = (lat2 - lat1).to_radians(); let d_lon = (lon2 - lon1).to_radians(); let a = (d_lat / 2.0).sin().powi(2) + lat1.to_radians().cos() * lat2.to_radians().cos() * (d_lon / 2.0).sin().powi(2); R * 2.0 * a.sqrt().atan2((1.0 - a).sqrt()) } /// Gather calendar context for photo timestamp /// Uses hybrid time + semantic search (±7 days, ranked by relevance) async fn gather_calendar_context( &self, parent_cx: &opentelemetry::Context, timestamp: i64, location: Option<&str>, ) -> Result<Option<String>> { let tracer = global_tracer(); let span = tracer.start_with_context("ai.context.calendar", parent_cx); let calendar_cx = parent_cx.with_span(span); let query_embedding = if let Some(loc) = location { match crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), loc).await { Ok(emb) => Some(emb), Err(e) => { log::warn!("Failed to generate embedding for location '{}': {}", loc, e); None } } } else { None }; let events = { let mut dao = self .calendar_dao .lock() .expect("Unable to lock CalendarEventDao"); dao.find_relevant_events_hybrid( &calendar_cx, timestamp, 7, // ±7 days window query_embedding.as_deref(), 5, // Top 5 events ) .ok() }; calendar_cx.span().set_status(Status::Ok); if let Some(events) = events { if events.is_empty() { return Ok(None); } let formatted = events .iter() .map(|e| { let date = DateTime::from_timestamp(e.start_time, 0) .map(|dt| { dt.with_timezone(&Local) .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); let attendees = e .attendees .as_ref() .and_then(|a| serde_json::from_str::<Vec<String>>(a).ok()) .map(|list| format!(" (with {})", list.join(", "))) .unwrap_or_default(); format!("[{}] {}{}", date, e.summary, attendees) }) .collect::<Vec<_>>() .join("\n"); Ok(Some(format!("Calendar events:\n{}", formatted))) } else { Ok(None) } } /// Gather location context for photo timestamp /// Finds nearest location record (±30 minutes) async fn gather_location_context( &self, parent_cx: &opentelemetry::Context, timestamp: i64, exif_gps: Option<(f64, f64)>, ) -> Result<Option<String>> { let tracer = global_tracer(); let span = tracer.start_with_context("ai.context.location", parent_cx); let location_cx = parent_cx.with_span(span); let nearest = { let mut dao = self .location_dao .lock() .expect("Unable to lock LocationHistoryDao"); dao.find_nearest_location( &location_cx, timestamp, 10800, // ±3 hours (more realistic for photo timing) ) .ok() .flatten() }; location_cx.span().set_status(Status::Ok); if let Some(loc) = nearest { // Check if this adds NEW information compared to EXIF if let Some((exif_lat, exif_lon)) = exif_gps { let distance = Self::haversine_distance(exif_lat, exif_lon, loc.latitude, loc.longitude); // Skip only if very close AND no useful activity/place info // Allow activity context even if coordinates match if distance < 0.5 && loc.activity.is_none() && loc.place_name.is_none() { log::debug!( "Location history matches EXIF GPS ({}m) with no extra context, skipping", (distance * 1000.0) as i32 ); return Ok(None); } else if distance < 0.5 { log::debug!( "Location history close to EXIF ({}m) but has activity/place info", (distance * 1000.0) as i32 ); } } let activity = loc .activity .as_ref() .map(|a| format!(" ({})", a)) .unwrap_or_default(); let place = loc .place_name .as_ref() .map(|p| format!(" at {}", p)) .unwrap_or_default(); Ok(Some(format!( "Location history: You were{}{}{}", activity, place, if activity.is_empty() && place.is_empty() { format!(" near {:.4}, {:.4}", loc.latitude, loc.longitude) } else { String::new() } ))) } else { Ok(None) } } /// Gather search context for photo date /// Uses semantic search on queries (±30 days, top 5 relevant) async fn gather_search_context( &self, parent_cx: &opentelemetry::Context, timestamp: i64, location: Option<&str>, contact: Option<&str>, enrichment: Option<&str>, ) -> Result<Option<String>> { let tracer = global_tracer(); let span = tracer.start_with_context("ai.context.search", parent_cx); let search_cx = parent_cx.with_span(span); // Use enrichment (topics + photo description + tags) if available; // fall back to generic temporal query. let query_text = if let Some(enriched) = enrichment { enriched.to_string() } else { // Fallback: generic temporal query format!( "searches about {} {} {}", DateTime::from_timestamp(timestamp, 0) .map(|dt| dt.format("%B %Y").to_string()) .unwrap_or_default(), location.unwrap_or(""), contact .map(|c| format!("involving {}", c)) .unwrap_or_default() ) }; let query_embedding = match crate::ai::embed_query(&self.ollama, self.llamacpp.as_deref(), &query_text).await { Ok(emb) => emb, Err(e) => { log::warn!("Failed to generate search embedding: {}", e); search_cx.span().set_status(Status::Error { description: e.to_string().into(), }); return Ok(None); } }; let searches = { let mut dao = self .search_dao .lock() .expect("Unable to lock SearchHistoryDao"); dao.find_relevant_searches_hybrid( &search_cx, timestamp, 30, // ±30 days (wider window than calendar) Some(&query_embedding), 5, // Top 5 searches ) .ok() }; search_cx.span().set_status(Status::Ok); if let Some(searches) = searches { if searches.is_empty() { log::warn!( "No relevant searches found for photo timestamp {}", timestamp ); return Ok(None); } let formatted = searches .iter() .map(|s| { let date = DateTime::from_timestamp(s.timestamp, 0) .map(|dt| dt.format("%Y-%m-%d").to_string()) .unwrap_or_else(|| "unknown".to_string()); format!("[{}] \"{}\"", date, s.query) }) .collect::<Vec<_>>() .join("\n"); Ok(Some(format!("Search history:\n{}", formatted))) } else { Ok(None) } } /// Combine all context sources with equal weight fn combine_contexts( sms: Option<String>, calendar: Option<String>, location: Option<String>, search: Option<String>, tags: Option<String>, ) -> String { let mut parts = Vec::new(); if let Some(s) = sms { parts.push(format!("## Messages\n{}", s)); } if let Some(c) = calendar { parts.push(format!("## Calendar\n{}", c)); } if let Some(l) = location { parts.push(format!("## Location\n{}", l)); } if let Some(s) = search { parts.push(format!("## Searches\n{}", s)); } if let Some(t) = tags { parts.push(format!("## Tags\n{}", t)); } if parts.is_empty() { "No additional context available".to_string() } else { parts.join("\n\n") } } /// Generate AI insight for a single photo with custom configuration pub async fn generate_insight_for_photo_with_config( &self, file_path: &str, custom_model: Option<String>, custom_system_prompt: Option<String>, num_ctx: Option<i32>, temperature: Option<f32>, top_p: Option<f32>, top_k: Option<i32>, min_p: Option<f32>, ) -> Result<()> { let tracer = global_tracer(); let current_cx = opentelemetry::Context::current(); let mut span = tracer.start_with_context("ai.insight.generate", ¤t_cx); // Normalize path to ensure consistent forward slashes in database let file_path = normalize_path(file_path); log::info!("Generating insight for photo: {}", file_path); span.set_attribute(KeyValue::new("file_path", file_path.clone())); // Create custom Ollama client if model is specified let mut ollama_client = if let Some(model) = custom_model { log::info!("Using custom model: {}", model); span.set_attribute(KeyValue::new("custom_model", model.clone())); OllamaClient::new( self.ollama.primary_url.clone(), self.ollama.fallback_url.clone(), model.clone(), Some(model), // Use the same custom model for fallback server ) } else { span.set_attribute(KeyValue::new("model", self.ollama.primary_model.clone())); self.ollama.clone() }; // Set context size if specified if let Some(ctx) = num_ctx { log::info!("Using custom context size: {}", ctx); span.set_attribute(KeyValue::new("num_ctx", ctx as i64)); ollama_client.set_num_ctx(Some(ctx)); } // Apply sampling parameters if any were provided if temperature.is_some() || top_p.is_some() || top_k.is_some() || min_p.is_some() { log::info!( "Using sampling params — temperature: {:?}, top_p: {:?}, top_k: {:?}, min_p: {:?}", temperature, top_p, top_k, min_p ); if let Some(t) = temperature { span.set_attribute(KeyValue::new("temperature", t as f64)); } if let Some(p) = top_p { span.set_attribute(KeyValue::new("top_p", p as f64)); } if let Some(k) = top_k { span.set_attribute(KeyValue::new("top_k", k as i64)); } if let Some(m) = min_p { span.set_attribute(KeyValue::new("min_p", m as f64)); } ollama_client.set_sampling_params(temperature, top_p, top_k, min_p); } // Create context with this span for child operations let insight_cx = current_cx.with_span(span); // 1. Get EXIF data for the photo let exif = { let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao"); exif_dao .get_exif(&insight_cx, &file_path) .map_err(|e| anyhow::anyhow!("Failed to get EXIF: {:?}", e))? }; // Get full timestamp for proximity-based message filtering let timestamp = if let Some(ts) = exif.as_ref().and_then(|e| e.date_taken) { ts } else { log::warn!("No date_taken in EXIF for {}, trying filename", file_path); extract_date_from_filename(&file_path) .map(|dt| dt.timestamp()) .or_else(|| { let full_path = self.resolve_full_path(&file_path)?; File::open(&full_path) .and_then(|f| f.metadata()) .inspect_err(|e| { log::warn!( "Failed to get file timestamp for insight {}: {}", file_path, e ) }) .ok() .and_then(|m| earliest_fs_time(&m)) .map(|t| DateTime::<Utc>::from(t).timestamp()) }) .unwrap_or_else(|| Utc::now().timestamp()) }; let date_taken = DateTime::from_timestamp(timestamp, 0) .map(|dt| dt.date_naive()) .unwrap_or_else(|| Utc::now().date_naive()); // 3. Extract contact name from file path let contact = Self::extract_contact_from_path(&file_path); log::info!("Extracted contact from path: {:?}", contact); insight_cx .span() .set_attribute(KeyValue::new("date_taken", date_taken.to_string())); if let Some(ref c) = contact { insight_cx .span() .set_attribute(KeyValue::new("contact", c.clone())); } // Fetch file tags (used to enrich RAG and final context) let tag_names: Vec<String> = { let mut dao = self.tag_dao.lock().expect("Unable to lock TagDao"); dao.get_tags_for_path(&insight_cx, &file_path) .unwrap_or_else(|e| { log::warn!("Failed to fetch tags for insight {}: {}", file_path, e); Vec::new() }) .into_iter() .map(|t| t.name) .collect() }; log::info!( "Fetched {} tags for photo: {:?}", tag_names.len(), tag_names ); // 4. Get location name from GPS coordinates (needed for RAG query). // Personal Apollo Place wins over Nominatim when both apply — // "Home (My house in Cambridge) — near Cambridge, MA" is more // grounding for the LLM than the city name alone. let location = match exif { Some(ref exif) => { if let (Some(lat), Some(lon)) = (exif.gps_latitude, exif.gps_longitude) { let lat = lat as f64; let lon = lon as f64; let nominatim = self.reverse_geocode(lat, lon).await; let apollo_primary = self .apollo_client .places_containing(lat, lon) .await .into_iter() .next(); let combined = compose_location_string(apollo_primary, nominatim, lat, lon); insight_cx .span() .set_attribute(KeyValue::new("location", combined.clone())); Some(combined) } else { None } } None => None, }; // Check if the model has vision capabilities let model_to_check = ollama_client.primary_model.clone(); let has_vision = match OllamaClient::check_model_capabilities( &ollama_client.primary_url, &model_to_check, ) .await { Ok(capabilities) => { log::info!( "Model '{}' vision capability: {}", model_to_check, capabilities.has_vision ); capabilities.has_vision } Err(e) => { log::warn!( "Failed to check vision capabilities for model '{}', assuming no vision support: {}", model_to_check, e ); false } }; insight_cx .span() .set_attribute(KeyValue::new("model_has_vision", has_vision)); // Load image and encode as base64 only if model supports vision let image_base64 = if has_vision { match self.load_image_as_base64(&file_path) { Ok(b64) => { log::info!( "Successfully loaded image for vision-capable model '{}'", model_to_check ); Some(b64) } Err(e) => { log::warn!("Failed to load image for vision model: {}", e); None } } } else { log::info!( "Model '{}' does not support vision, skipping image processing", model_to_check ); None }; // Generate brief photo description for RAG enrichment (vision models only) let photo_description: Option<String> = if let Some(ref img_b64) = image_base64 { match ollama_client.generate_photo_description(img_b64).await { Ok(desc) => { log::info!("Photo description for RAG enrichment: {}", desc); Some(desc) } Err(e) => { log::warn!( "Failed to generate photo description for RAG enrichment: {}", e ); None } } } else { None }; // Build enriched context string for RAG: photo description + tags // (SMS topics are passed separately to RAG functions) let enriched_query: Option<String> = { let mut parts: Vec<String> = Vec::new(); if let Some(ref desc) = photo_description { parts.push(desc.clone()); } if !tag_names.is_empty() { parts.push(format!("tags: {}", tag_names.join(", "))); } if parts.is_empty() { None } else { Some(parts.join(". ")) } }; let mut search_enrichment: Option<String> = enriched_query.clone(); // 5. Intelligent retrieval: Hybrid approach for better context let mut sms_summary = None; let mut used_rag = false; // TEMPORARY: Set to true to disable RAG and use only time-based retrieval for testing let disable_rag_for_testing = false; if disable_rag_for_testing { log::warn!("RAG DISABLED FOR TESTING - Using only time-based retrieval (±4 days)"); // Skip directly to fallback } else { // ALWAYS use Strategy B: Expanded immediate context + historical RAG // This is more reliable than pure semantic search which can match irrelevant messages log::info!("Using expanded immediate context + historical RAG approach"); // Step 1: Get FULL immediate temporal context (±4 days, ALL messages) let immediate_messages = self .sms_client .fetch_messages_for_contact(contact.as_deref(), timestamp, 4) .await .unwrap_or_else(|e| { log::error!("Failed to fetch immediate messages: {}", e); Vec::new() }); log::info!( "Fetched {} messages from ±4 days window (using ALL for immediate context)", immediate_messages.len() ); if !immediate_messages.is_empty() { // Step 2: Extract topics from immediate messages to enrich RAG query let topics = self .extract_topics_from_messages(&immediate_messages, &ollama_client) .await; log::info!("Extracted topics for query enrichment: {:?}", topics); // Build full search enrichment: SMS topics + photo description + tag names search_enrichment = { let mut parts: Vec<String> = Vec::new(); if !topics.is_empty() { parts.push(topics.join(", ")); } if let Some(ref desc) = photo_description { parts.push(desc.clone()); } if !tag_names.is_empty() { parts.push(format!("tags: {}", tag_names.join(", "))); } if parts.is_empty() { None } else { Some(parts.join(". ")) } }; // Step 3: Try historical RAG (>30 days ago) using extracted topics let topics_slice = if topics.is_empty() { None } else { Some(topics.as_slice()) }; match self .find_relevant_messages_rag_historical( &insight_cx, date_taken, None, contact.as_deref(), topics_slice, 10, // Top 10 historical matches enriched_query.as_deref(), ) .await { Ok(historical_messages) if !historical_messages.is_empty() => { log::info!( "Two-context approach: {} immediate (full conversation) + {} historical (similar past moments)", immediate_messages.len(), historical_messages.len() ); used_rag = true; // Step 4: Summarize contexts separately, then combine let immediate_summary = self .summarize_context_from_messages( &immediate_messages, &ollama_client, custom_system_prompt.as_deref(), ) .await .unwrap_or_else(|| String::from("No immediate context")); let historical_summary = self .summarize_messages( &historical_messages, &ollama_client, custom_system_prompt.as_deref(), ) .await .unwrap_or_else(|| String::from("No historical context")); // Combine summaries sms_summary = Some(format!( "Immediate context (±4 days): {}\n\nSimilar moments from the past: {}", immediate_summary, historical_summary )); } Ok(_) => { // RAG found no historical matches, just use immediate context log::info!("No historical RAG matches, using immediate context only"); sms_summary = self .summarize_context_from_messages( &immediate_messages, &ollama_client, custom_system_prompt.as_deref(), ) .await; } Err(e) => { log::warn!("Historical RAG failed, using immediate context only: {}", e); sms_summary = self .summarize_context_from_messages( &immediate_messages, &ollama_client, custom_system_prompt.as_deref(), ) .await; } } } else { log::info!("No immediate messages found, trying basic RAG as fallback"); // Fallback to basic RAG even without strong query match self .find_relevant_messages_rag( date_taken, None, contact.as_deref(), None, 20, enriched_query.as_deref(), ) .await { Ok(rag_messages) if !rag_messages.is_empty() => { used_rag = true; sms_summary = self .summarize_messages( &rag_messages, &ollama_client, custom_system_prompt.as_deref(), ) .await; } _ => {} } } } // 6. Fallback to traditional time-based message retrieval if RAG didn't work if !used_rag { log::info!("Using traditional time-based message retrieval (±4 days)"); let sms_messages = self .sms_client .fetch_messages_for_contact(contact.as_deref(), timestamp, 4) .await .unwrap_or_else(|e| { log::error!("Failed to fetch SMS messages: {}", e); Vec::new() }); log::info!( "Fetched {} SMS messages closest to {}", sms_messages.len(), DateTime::from_timestamp(timestamp, 0) .map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string()) .unwrap_or_else(|| "unknown time".to_string()) ); // Summarize time-based messages if !sms_messages.is_empty() { match self .sms_client .summarize_context(&sms_messages, &ollama_client) .await { Ok(summary) => { sms_summary = Some(summary); } Err(e) => { log::warn!("Failed to summarize SMS context: {}", e); } } } } let retrieval_method = if used_rag { "RAG" } else { "time-based" }; insight_cx .span() .set_attribute(KeyValue::new("retrieval_method", retrieval_method)); insight_cx .span() .set_attribute(KeyValue::new("has_sms_context", sms_summary.is_some())); log::info!( "Photo context: date={}, location={:?}, retrieval_method={}", date_taken, location, retrieval_method ); // 6. Gather Google Takeout context from all sources let calendar_context = self .gather_calendar_context(&insight_cx, timestamp, location.as_deref()) .await .ok() .flatten(); let exif_gps = exif.as_ref().and_then(|e| { if let (Some(lat), Some(lon)) = (e.gps_latitude, e.gps_longitude) { Some((lat as f64, lon as f64)) } else { None } }); let location_context = self .gather_location_context(&insight_cx, timestamp, exif_gps) .await .ok() .flatten(); let search_context = self .gather_search_context( &insight_cx, timestamp, location.as_deref(), contact.as_deref(), search_enrichment.as_deref(), ) .await .ok() .flatten(); // 7. Combine all context sources with equal weight let tags_context = if tag_names.is_empty() { None } else { Some(tag_names.join(", ")) }; let combined_context = Self::combine_contexts( sms_summary, calendar_context, location_context, search_context, tags_context, ); log::info!( "Combined context from all sources ({} chars)", combined_context.len() ); // 10. Generate summary first, then derive title from the summary let summary = ollama_client .generate_photo_summary( date_taken, location.as_deref(), contact.as_deref(), Some(&combined_context), custom_system_prompt.as_deref(), image_base64.clone(), ) .await?; let title = ollama_client .generate_photo_title(&summary, custom_system_prompt.as_deref()) .await?; log::info!("Generated title: {}", title); log::info!("Generated summary: {}", summary); insight_cx .span() .set_attribute(KeyValue::new("title_length", title.len() as i64)); insight_cx .span() .set_attribute(KeyValue::new("summary_length", summary.len() as i64)); // 11. Store in database. content_hash is None here — store_insight // looks it up from image_exif before persisting; reconciliation // backfills if the hash isn't known yet. let insight = InsertPhotoInsight { library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), title, summary, generated_at: Utc::now().timestamp(), model_version: ollama_client.primary_model.clone(), is_current: true, training_messages: None, backend: "local".to_string(), fewshot_source_ids: None, content_hash: None, num_ctx, temperature, top_p, top_k, min_p, system_prompt: custom_system_prompt.clone(), persona_id: None, prompt_eval_count: None, eval_count: None, }; let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); let result = dao .store_insight(&insight_cx, insight) .map_err(|e| anyhow::anyhow!("Failed to store insight: {:?}", e)); match &result { Ok(_) => { log::info!("Successfully stored insight for {}", file_path); insight_cx.span().set_status(Status::Ok); } Err(e) => { log::error!("Failed to store insight: {:?}", e); insight_cx.span().set_status(Status::error(e.to_string())); } } result?; Ok(()) } /// Extract key topics/entities from messages using LLM for query enrichment async fn extract_topics_from_messages( &self, messages: &[crate::ai::SmsMessage], ollama: &OllamaClient, ) -> Vec<String> { if messages.is_empty() { return Vec::new(); } // Format a sample of messages for topic extraction let sample_size = messages.len().min(20); let user_name = user_display_name(); let sample_text: Vec<String> = messages .iter() .take(sample_size) .map(|m| { let sender: &str = if m.is_sent { &user_name } else { &m.contact }; format!("{}: {}", sender, m.body) }) .collect(); let prompt = format!( r#"Extract important entities from these messages that provide context about what was happening. Focus on: 1. **People**: Names of specific people mentioned (first names, nicknames) 2. **Places**: Locations, cities, buildings, workplaces, parks, restaurants, venues 3. **Activities**: Specific events, hobbies, groups, organizations (e.g., "drum corps", "auditions") 4. **Unique terms**: Domain-specific words or phrases that might need explanation (e.g., "Hyland", "Vanguard", "DCI") Messages: {} Return a comma-separated list of 3-7 specific entities (people, places, activities, unique terms). Focus on proper nouns and specific terms that provide context. Return ONLY the comma-separated list, nothing else."#, sample_text.join("\n") ); match ollama .generate(&prompt, Some("You are an entity extraction assistant. Extract proper nouns, people, places, and domain-specific terms that provide context.")) .await { Ok(response) => { log::debug!("Topic extraction raw response: {}", response); // Parse comma-separated topics let topics: Vec<String> = response .split(',') .map(|s| s.trim().to_string()) .filter(|s| !s.is_empty() && s.len() > 1) // Filter out single chars .take(7) // Increased from 5 to 7 .collect(); if topics.is_empty() { log::warn!("Topic extraction returned empty list from {} messages", messages.len()); } else { log::info!("Extracted {} topics from {} messages: {}", topics.len(), messages.len(), topics.join(", ")); } topics } Err(e) => { log::warn!("Failed to extract topics from messages: {}", e); Vec::new() } } } /// Summarize pre-formatted message strings using LLM (concise version for historical context) async fn summarize_messages( &self, messages: &[String], ollama: &OllamaClient, custom_system: Option<&str>, ) -> Option<String> { if messages.is_empty() { return None; } let messages_text = messages.join("\n"); let prompt = format!( r#"Summarize the context from these messages in 2-3 sentences. Focus on activities, locations, events, and relationships mentioned. Messages: {} Return ONLY the summary, nothing else."#, messages_text ); let system = custom_system .unwrap_or("You are a context summarization assistant. Be concise and factual."); match ollama.generate(&prompt, Some(system)).await { Ok(summary) => Some(summary), Err(e) => { log::warn!("Failed to summarize messages: {}", e); None } } } /// Convert SmsMessage objects to formatted strings and summarize with more detail /// This is used for immediate context (±2 days) to preserve conversation details async fn summarize_context_from_messages( &self, messages: &[crate::ai::SmsMessage], ollama: &OllamaClient, custom_system: Option<&str>, ) -> Option<String> { if messages.is_empty() { return None; } // Format messages let user_name = user_display_name(); let formatted: Vec<String> = messages .iter() .map(|m| { let sender: &str = if m.is_sent { &user_name } else { &m.contact }; let timestamp = chrono::DateTime::from_timestamp(m.timestamp, 0) .map(|dt| { dt.with_timezone(&Local) .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown time".to_string()); format!("[{}] {}: {}", timestamp, sender, m.body) }) .collect(); let messages_text = formatted.join("\n"); // Use a more detailed prompt for immediate context let prompt = format!( r#"Provide a detailed summary of the conversation context from these messages. Include: - Key activities, events, and plans discussed - Important locations or places mentioned - Emotional tone and relationship dynamics - Any significant details that provide context about what was happening Be thorough but organized. Messages: {} Return ONLY the summary, nothing else."#, messages_text ); let system = custom_system.unwrap_or( "You are a context summarization assistant. Be detailed and factual, preserving important context.", ); match ollama.generate(&prompt, Some(system)).await { Ok(summary) => Some(summary), Err(e) => { log::warn!("Failed to summarize immediate context: {}", e); None } } } // ── Tool executors for agentic loop ──────────────────────────────── /// Dispatch a tool call to the appropriate executor. /// /// `(user_id, persona_id)` identifies the author this loop is /// generating for — `store_fact` tags new facts with both, /// `recall_facts_for_photo` filters reads to both (always Single /// in the agentic loop, even when the persona has /// `include_all_memories=true`; the hive-mind toggle is for human /// browsing of `/knowledge/*`, where mixing voices is the explicit /// goal — during generation the persona's own voice must stay /// clean). The composite (user_id, persona_id) is required for the /// FK to personas to hold (migration 2026-05-10-000000). pub(crate) async fn execute_tool( &self, tool_name: &str, arguments: &serde_json::Value, backend: &ResolvedBackend, image_base64: &Option<String>, file_path: &str, user_id: i32, persona_id: &str, cx: &opentelemetry::Context, ) -> String { let model = backend.model(); let backend_label = backend.kind.as_str(); let result = match tool_name { "search_rag" => self.tool_search_rag(arguments, backend.local(), cx).await, "search_messages" => self.tool_search_messages(arguments, cx).await, "get_sms_messages" => self.tool_get_sms_messages(arguments, cx).await, "get_calendar_events" => self.tool_get_calendar_events(arguments, cx).await, "get_location_history" => self.tool_get_location_history(arguments, cx).await, "get_file_tags" => self.tool_get_file_tags(arguments, cx).await, "get_faces_in_photo" => self.tool_get_faces_in_photo(arguments, cx).await, "describe_photo" => { self.tool_describe_photo(backend.local(), image_base64) .await } "reverse_geocode" => self.tool_reverse_geocode(arguments).await, "get_personal_place_at" => self.tool_get_personal_place_at(arguments).await, "recall_entities" => self.tool_recall_entities(arguments, cx).await, "recall_facts_for_photo" => { self.tool_recall_facts_for_photo(arguments, user_id, persona_id, cx) .await } "recall_facts_for_entity" => { self.tool_recall_facts_for_entity(arguments, user_id, persona_id, cx) .await } "store_entity" => self.tool_store_entity(arguments, cx).await, "store_fact" => { self.tool_store_fact( arguments, file_path, user_id, persona_id, model, backend_label, cx, ) .await } "update_fact" => { self.tool_update_fact(arguments, user_id, persona_id, model, backend_label, cx) .await } "supersede_fact" => { self.tool_supersede_fact(arguments, user_id, persona_id, model, backend_label, cx) .await } "get_current_datetime" => Self::tool_get_current_datetime(), unknown => format!("Unknown tool: {}", unknown), }; if result.starts_with("Error") || result.starts_with("No ") { log::warn!("Tool '{}' result: {}", tool_name, result); } else { log::info!("Tool '{}' result: {} chars", tool_name, result.len()); } result } /// Tool: search_rag — semantic search over daily summaries async fn tool_search_rag( &self, args: &serde_json::Value, local: &dyn LlmClient, _cx: &opentelemetry::Context, ) -> String { let query = match args.get("query").and_then(|v| v.as_str()) { Some(q) => q.to_string(), None => return "Error: missing required parameter 'query'".to_string(), }; let date = match args.get("date").and_then(|v| v.as_str()) { Some(d) => match NaiveDate::parse_from_str(d, "%Y-%m-%d") { Ok(d) => Some(d), Err(e) => return format!("Error: failed to parse date '{}': {}", d, e), }, None => None, }; let contact = args .get("contact") .and_then(|v| v.as_str()) .map(|s| s.to_string()); let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(10) .clamp(1, 25) as usize; log::info!( "tool_search_rag: query='{}', date={:?}, contact={:?}, limit={}", query, date, contact, limit ); // Pull a wider candidate pool than the final limit so the LLM // reranker has room to promote less-obvious hits. Candidates_factor // is capped so a big `limit` doesn't blow past what the reranker // can sensibly judge in one prompt. let rerank_enabled = std::env::var("SEARCH_RAG_RERANK") .ok() .map(|v| v.to_lowercase() != "off" && v != "0") .unwrap_or(true); let candidate_limit = if rerank_enabled { (limit * 3).min(40) } else { limit }; // Embed the model's query verbatim — a soft contact bias is the // only decoration. The metadata boilerplate ("On <date>, it was a // <weekday>") that find_relevant_messages_rag prepends drowns the // semantic signal, so the tool path deliberately bypasses it. let search_query = match contact.as_deref() { Some(c) => format!("{} (conversation with {})", query, c), None => query.clone(), }; let results = match self .search_summaries_semantic(&search_query, date, candidate_limit) .await { Ok(results) if !results.is_empty() => results, Ok(_) => return "No relevant messages found.".to_string(), Err(e) => return format!("Error searching RAG: {}", e), }; let final_results = if rerank_enabled && results.len() > limit { match self.rerank_with_llm(&query, &results, limit, local).await { Ok(reordered) => reordered, Err(e) => { log::warn!("rerank failed, using vector order: {}", e); results.into_iter().take(limit).collect() } } } else { results.into_iter().take(limit).collect::<Vec<_>>() }; final_results.join("\n\n") } /// LLM-based reranker: ask the local model to pick the top-`limit` /// passages from `candidates` that are most relevant to `query`. /// Returns the reordered subset. /// /// Cheap-ish because the reranker prompt and output live outside the /// agent's visible context — only the final selection lands in the /// tool_result. On parse failure we fall back to the input order. async fn rerank_with_llm( &self, query: &str, candidates: &[String], limit: usize, local: &dyn LlmClient, ) -> Result<Vec<String>> { let query_preview: String = query.chars().take(60).collect(); log::info!( "rerank: {} candidates -> top {} (query=\"{}\")", candidates.len(), limit, query_preview ); // Build numbered list (1-based for readability). Cap each passage // at ~1000 chars so very long summaries don't eat the prompt. let numbered: String = candidates .iter() .enumerate() .map(|(i, c)| { let trimmed = if c.chars().count() > 1000 { format!("{}…", c.chars().take(1000).collect::<String>()) } else { c.clone() }; format!("[{}] {}", i + 1, trimmed) }) .collect::<Vec<_>>() .join("\n\n"); let prompt = format!( "You are ranking search results. From the numbered passages below, \ select the {} most relevant to the query. Respond with ONLY a \ comma-separated list of passage numbers in order from most to \ least relevant. No explanation, no other text.\n\n\ Query: {}\n\n\ Passages:\n{}\n\n\ Top {} passage numbers:", limit, query, numbered, limit ); let started = std::time::Instant::now(); let system = Some("You are a terse relevance ranker. You output only numbers separated by commas."); let response = local.generate(&prompt, system, None).await?; log::info!( "rerank: finished in {} ms (prompt={} chars)", started.elapsed().as_millis(), prompt.len() ); // Extract indices from the response. Accept "3, 1, 7" and also // tolerate "[3, 1, 7]" or "3,1,7,..." with trailing junk. let picks: Vec<usize> = response .split(|c: char| !c.is_ascii_digit()) .filter_map(|s| s.parse::<usize>().ok()) .filter(|&n| n >= 1 && n <= candidates.len()) .collect(); if picks.is_empty() { return Err(anyhow::anyhow!( "reranker returned no usable indices (raw: {})", response.chars().take(120).collect::<String>() )); } let mut seen = std::collections::HashSet::new(); let mut reordered: Vec<String> = Vec::with_capacity(limit); let mut final_indices: Vec<usize> = Vec::with_capacity(limit); for n in picks { if seen.insert(n) { reordered.push(candidates[n - 1].clone()); final_indices.push(n); if reordered.len() >= limit { break; } } } // Top-up from original order if the reranker returned fewer than // `limit` distinct entries. if reordered.len() < limit { for (i, c) in candidates.iter().enumerate() { if !seen.contains(&(i + 1)) { reordered.push(c.clone()); final_indices.push(i + 1); if reordered.len() >= limit { break; } } } } // Debug snapshot: show what the reranker changed. Position p holds // the 1-based index of the candidate that now sits at position p. // A value that equals its position means "no change at that slot". let swapped = final_indices .iter() .enumerate() .filter(|(pos, idx)| **idx != pos + 1) .count(); log::info!( "rerank: final indices (1-based): {:?} — {} of top {} swapped from vector order", final_indices, swapped, final_indices.len() ); let show = final_indices.len().min(5); log::debug!("rerank: vector-order top {}:", show); for (i, c) in candidates.iter().enumerate().take(show) { let preview: String = c.chars().take(100).collect(); log::debug!("rerank: [{}] {}", i + 1, preview); } log::debug!("rerank: reranked top {}:", show); for (pos, idx) in final_indices.iter().enumerate().take(show) { let preview: String = candidates[*idx - 1].chars().take(100).collect(); log::debug!("rerank: [{}] (orig #{}) {}", pos + 1, idx, preview); } Ok(reordered) } /// Tool: search_messages — keyword / semantic / hybrid search over all /// SMS message bodies via the Django FTS5 + embeddings pipeline. Now /// supports optional `contact_id`, `start_ts`, `end_ts` filters. async fn tool_search_messages( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { let query = match args.get("query").and_then(|v| v.as_str()) { Some(q) if !q.trim().is_empty() => q.trim(), _ => { // Common LLM mistake: calling search_messages with // { date, ... } as if it were date-browsing. The two // tools share the "messages" word, and search_messages // sounds like the natural verb. Instead of returning // an error and burning a turn, transparently route to // get_sms_messages when there's a `date` (and a // contact-name string, optional). The LLM gets real // data on its first try; the result is logged with a // routing note so a human reading the trace can see // what happened. let has_date_str = args .get("date") .and_then(|v| v.as_str()) .map(|s| !s.trim().is_empty()) .unwrap_or(false); let has_numeric_contact = args.get("contact_id").is_some(); let has_ts_window = args.get("start_ts").is_some() || args.get("end_ts").is_some(); if has_date_str && !has_numeric_contact && !has_ts_window { log::info!( "search_messages with `date` and no `query` — routing to get_sms_messages" ); let routed = self.tool_get_sms_messages(args, cx).await; return format!( "(Note: routed to get_sms_messages — search_messages requires a \ `query`; date-only browsing belongs on get_sms_messages. Prefer \ get_sms_messages directly next time.)\n\n{}", routed ); } let has_contact_name = args.get("contact").and_then(|v| v.as_str()).is_some(); if has_ts_window || has_numeric_contact || has_contact_name { return "Error: search_messages needs a 'query' (keywords/phrase). \ To fetch messages around a date or from a contact without keywords, \ call get_sms_messages with { date, contact? } instead." .to_string(); } return "Error: missing required parameter 'query'".to_string(); } }; if query.len() < 3 { return "Error: query must be at least 3 characters".to_string(); } let mode = args .get("mode") .and_then(|v| v.as_str()) .map(|s| s.to_lowercase()) .unwrap_or_else(|| "hybrid".to_string()); if !matches!(mode.as_str(), "fts5" | "semantic" | "hybrid") { return format!( "Error: unknown mode '{}'; expected one of: fts5, semantic, hybrid", mode ); } let user_limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(20) .clamp(1, 50) as usize; let contact_id = args.get("contact_id").and_then(|v| v.as_i64()); let contact = args .get("contact") .and_then(|v| v.as_str()) .map(|s| s.trim().to_string()) .filter(|s| !s.is_empty()) .filter(|_| contact_id.is_none()); let start_ts = args.get("start_ts").and_then(|v| v.as_i64()); let end_ts = args.get("end_ts").and_then(|v| v.as_i64()); let is_mms = args.get("is_mms").and_then(|v| v.as_bool()); let has_media = args.get("has_media").and_then(|v| v.as_bool()); let has_date_filter = start_ts.is_some() || end_ts.is_some(); log::info!( "tool_search_messages: query='{}', mode={}, contact_id={:?}, contact={:?}, range=[{:?}, {:?}], is_mms={:?}, has_media={:?}, limit={}", query, mode, contact_id, contact, start_ts, end_ts, is_mms, has_media, user_limit ); // SMS-API applies all of date/contact/mms/media filtering and // pagination server-side, so the response is already exactly the // page we want — no over-fetch, no client-side post-filter. let params = SmsSearchParams { mode: mode.as_str(), limit: user_limit, contact_id, contact, date_from: start_ts, date_to: end_ts, is_mms, has_media, offset: None, }; let hits = match self.sms_client.search_messages(query, ¶ms).await { Ok(h) => h, Err(e) => return format!("Error searching messages: {}", e), }; if hits.is_empty() { return "No messages matched.".to_string(); } Self::format_search_hits(&hits, &mode, has_date_filter) } /// Render a list of [`SmsSearchHit`] for the LLM. Prefers the SMS-API /// snippet (which already excerpts the matched span and is the only /// preview MMS-attachment-only matches have) over the full body, and /// strips the `` tags the snippet ships with. Each line names /// both parties (`sender → recipient`) — results can span multiple /// conversations, and a sender-only label leaves sent messages /// unattributable to a thread. fn format_search_hits(hits: &[SmsSearchHit], mode: &str, date_filtered: bool) -> String { let user_name = user_display_name(); let mut out = String::new(); out.push_str(&format!( "Found {} messages (mode: {}{}, sender → recipient):\n\n", hits.len(), mode, if date_filtered { ", date-filtered" } else { "" } )); for h in hits { let date = chrono::DateTime::from_timestamp(h.date, 0) .map(|dt| dt.format("%Y-%m-%d").to_string()) .unwrap_or_else(|| h.date.to_string()); let direction = if h.type_ == 2 { format!("{} → {}", user_name, h.contact_name) } else { format!("{} → {}", h.contact_name, user_name) }; let score = h .similarity_score .map(|s| format!(" [score {:.2}]", s)) .unwrap_or_default(); let preview = match h.snippet.as_deref() { Some(s) if !s.is_empty() => Self::strip_mark_tags(s), _ => h.body.clone(), }; out.push_str(&format!( "[{}]{} {} — {}\n\n", date, score, direction, preview )); } out } /// Strip the `` / `` highlight tags that SMS-API wraps /// matched terms in. The tags carry no signal beyond what the query /// already conveys to the LLM, and leaving them in adds prompt noise. fn strip_mark_tags(s: &str) -> String { s.replace("", "").replace("", "") } /// Tool: get_sms_messages — fetch SMS messages near a date for a contact async fn tool_get_sms_messages( &self, args: &serde_json::Value, _cx: &opentelemetry::Context, ) -> String { let date_str = match args.get("date").and_then(|v| v.as_str()) { Some(d) => d, None => return "Error: missing required parameter 'date'".to_string(), }; let contact = args .get("contact") .and_then(|v| v.as_str()) .map(|s| s.to_string()); let days_radius = args .get("days_radius") .and_then(|v| v.as_i64()) .unwrap_or(4) .clamp(1, 30); let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(60) .clamp(1, 150) as usize; let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { Ok(d) => d, Err(e) => return format!("Error: failed to parse date '{}': {}", date_str, e), }; let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); log::info!( "tool_get_sms_messages: date={}, contact={:?}, days_radius={}, limit={}", date, contact, days_radius, limit ); match self .sms_client .fetch_messages_for_contact(contact.as_deref(), timestamp, days_radius) .await { Ok(messages) if !messages.is_empty() => { let user_name = user_display_name(); // Name both parties — without a contact filter the window // spans every conversation, and a sender-only label leaves // sent messages unattributable to a thread. let formatted: Vec<String> = messages .iter() .take(limit) .map(|m| { let direction = if m.is_sent { format!("{} → {}", user_name, m.contact) } else { format!("{} → {}", m.contact, user_name) }; let ts = DateTime::from_timestamp(m.timestamp, 0) .map(|dt| { dt.with_timezone(&Local) .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); format!("[{}] {}: {}", ts, direction, m.body) }) .collect(); format!( "{}\n{}", Self::found_header(messages.len(), formatted.len(), "messages"), formatted.join("\n") ) } Ok(_) => "No messages found.".to_string(), Err(e) => { log::warn!("tool_get_sms_messages failed: {}", e); format!("Error fetching SMS messages: {}", e) } } } /// Tool: get_calendar_events — fetch calendar events near a date async fn tool_get_calendar_events( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { let date_str = match args.get("date").and_then(|v| v.as_str()) { Some(d) => d, None => return "Error: missing required parameter 'date'".to_string(), }; let days_radius = args .get("days_radius") .and_then(|v| v.as_i64()) .unwrap_or(7) .clamp(1, 30); let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(20) .clamp(1, 50) as usize; let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { Ok(d) => d, Err(e) => return format!("Error: failed to parse date '{}': {}", date_str, e), }; let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); log::info!( "tool_get_calendar_events: date={}, days_radius={}, limit={}", date, days_radius, limit ); let events = { let mut dao = self .calendar_dao .lock() .expect("Unable to lock CalendarEventDao"); dao.find_relevant_events_hybrid(cx, timestamp, days_radius, None, limit) .ok() }; match events { Some(evts) if !evts.is_empty() => { let formatted: Vec<String> = evts .iter() .map(|e| { let dt = DateTime::from_timestamp(e.start_time, 0) .map(|dt| { dt.with_timezone(&Local) .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); let loc = e .location .as_ref() .map(|l| format!(" at {}", l)) .unwrap_or_default(); let attendees = e .attendees .as_ref() .and_then(|a| serde_json::from_str::<Vec<String>>(a).ok()) .map(|list| format!(" (with {})", list.join(", "))) .unwrap_or_default(); format!("[{}] {}{}{}", dt, e.summary, loc, attendees) }) .collect(); format!( "Found {} calendar events:\n{}", evts.len(), formatted.join("\n") ) } Some(_) => "No calendar events found.".to_string(), None => "No calendar events found.".to_string(), } } /// Tool: get_location_history — fetch location records near a date async fn tool_get_location_history( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { let date_str = match args.get("date").and_then(|v| v.as_str()) { Some(d) => d, None => return "Error: missing required parameter 'date'".to_string(), }; let days_radius = args .get("days_radius") .and_then(|v| v.as_i64()) .unwrap_or(14) .clamp(1, 30); let date = match NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { Ok(d) => d, Err(e) => return format!("Error: failed to parse date '{}': {}", date_str, e), }; let timestamp = date.and_hms_opt(12, 0, 0).unwrap().and_utc().timestamp(); log::info!( "tool_get_location_history: date={}, days_radius={}", date, days_radius ); let start_ts = timestamp - (days_radius * 86400); let end_ts = timestamp + (days_radius * 86400); let locations = { let mut dao = self .location_dao .lock() .expect("Unable to lock LocationHistoryDao"); dao.find_locations_in_range(cx, start_ts, end_ts).ok() }; match locations { Some(locs) if !locs.is_empty() => { let formatted: Vec<String> = locs .iter() .take(LOCATION_HISTORY_DISPLAY_LIMIT) .map(|loc| { let dt = DateTime::from_timestamp(loc.timestamp, 0) .map(|dt| { dt.with_timezone(&Local) .format("%Y-%m-%d %H:%M") .to_string() }) .unwrap_or_else(|| "unknown".to_string()); let activity = loc .activity .as_ref() .map(|a| format!(" ({})", a)) .unwrap_or_default(); let place = loc .place_name .as_ref() .map(|p| format!(" at {}", p)) .unwrap_or_default(); format!( "[{}] {:.4}, {:.4}{}{}", dt, loc.latitude, loc.longitude, place, activity ) }) .collect(); format!( "{}\n{}", Self::found_header(locs.len(), formatted.len(), "location records"), formatted.join("\n") ) } Some(_) => "No location history found.".to_string(), None => "No location history found.".to_string(), } } /// Render a `Found N <noun>:` tool-result header, annotating when only /// the first `shown` of `total` items are listed below it. Without the /// annotation the model believes it saw everything ("Found 312 /// messages:" followed by 60 lines) and reasons from a silently /// truncated list. `summarize_tool_result` keeps parsing the leading /// count either way. fn found_header(total: usize, shown: usize, noun: &str) -> String { if shown < total { format!("Found {} {} (showing first {}):", total, noun, shown) } else { format!("Found {} {}:", total, noun) } } /// Tool: get_file_tags — fetch tags for a file path async fn tool_get_file_tags( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { let file_path = match args.get("file_path").and_then(|v| v.as_str()) { Some(p) => p.to_string(), None => return "Error: missing required parameter 'file_path'".to_string(), }; log::info!("tool_get_file_tags: file_path='{}'", file_path); let tags = { let mut dao = self.tag_dao.lock().expect("Unable to lock TagDao"); dao.get_tags_for_path(cx, &file_path).ok() }; match tags { Some(t) if !t.is_empty() => { let names: Vec<String> = t.into_iter().map(|tag| tag.name).collect(); names.join(", ") } Some(_) => "No tags found.".to_string(), None => "No tags found.".to_string(), } } /// Tool: get_faces_in_photo — list face detections + person names for /// the given file path. Resolves rel_path → content_hash via FaceDao, /// then queries face_detections joined with persons (status='detected' /// only). Returns a compact bullet list keyed for human-LLM readability. async fn tool_get_faces_in_photo( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { let file_path = match args.get("file_path").and_then(|v| v.as_str()) { Some(p) if !p.trim().is_empty() => p.trim().to_string(), _ => return "Error: missing required parameter 'file_path'".to_string(), }; log::info!("tool_get_faces_in_photo: file_path='{}'", file_path); // Resolve content_hash from any library that has this rel_path. // Hold the FaceDao lock once across all libraries — resolve_content_hash // is synchronous and there's no await in the loop body. let content_hash: Option<String> = { let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao"); self.libraries.iter().find_map(|lib| { dao.resolve_content_hash(cx, lib.id, &file_path) .ok() .flatten() }) }; let Some(content_hash) = content_hash else { return "No content_hash found for that file path (the photo may not be indexed yet, \ or the path doesn't match any library)." .to_string(); }; let faces = { let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao"); match dao.list_for_content_hash(cx, &content_hash) { Ok(rows) => rows, Err(e) => return format!("Error querying faces: {}", e), } }; if faces.is_empty() { return "No faces detected in this photo.".to_string(); } // Render: bound faces grouped by person first, then unbound. The // model uses the bound names directly; the unbound count + bbox // helps it count people without naming them. let bound: Vec<&_> = faces.iter().filter(|f| f.person_name.is_some()).collect(); let unbound: Vec<&_> = faces.iter().filter(|f| f.person_name.is_none()).collect(); let mut out = format!("Found {} face(s) in this photo:\n", faces.len()); for f in &bound { // Invariant: `bound` is filtered on `person_name.is_some()` above. let name = f .person_name .as_deref() .expect("bound face must have a name"); out.push_str(&format!( "- {} (confidence {:.2}, bbox x={:.2} y={:.2} w={:.2} h={:.2}, source: {})\n", name, f.confidence, f.bbox_x, f.bbox_y, f.bbox_w, f.bbox_h, f.source, )); } for f in &unbound { out.push_str(&format!( "- (unidentified) confidence {:.2}, bbox x={:.2} y={:.2} w={:.2} h={:.2}, source: {}\n", f.confidence, f.bbox_x, f.bbox_y, f.bbox_w, f.bbox_h, f.source, )); } out } async fn tool_describe_photo( &self, local: &dyn LlmClient, image_base64: &Option<String>, ) -> String { log::info!("tool_describe_photo: generating visual description"); match image_base64 { Some(img) => match local.describe_image(img).await { Ok(desc) => desc, Err(e) => format!("Error describing photo: {}", e), }, None => "No image available for description.".to_string(), } } /// Tool: reverse_geocode — convert GPS coordinates to a human-readable place name async fn tool_reverse_geocode(&self, args: &serde_json::Value) -> String { let lat = match args.get("latitude").and_then(|v| v.as_f64()) { Some(v) => v, None => return "Error: missing required parameter 'latitude'".to_string(), }; let lon = match args.get("longitude").and_then(|v| v.as_f64()) { Some(v) => v, None => return "Error: missing required parameter 'longitude'".to_string(), }; log::info!("tool_reverse_geocode: lat={}, lon={}", lat, lon); match self.reverse_geocode(lat, lon).await { Some(place) => place, None => "Could not resolve coordinates to a place name.".to_string(), } } /// Tool: get_personal_place_at — look up the user's named places (Home, /// Work, Cabin) at a coordinate. Server-side filter; results sorted /// smallest-radius first. async fn tool_get_personal_place_at(&self, args: &serde_json::Value) -> String { if !self.apollo_client.is_enabled() { return "Personal place lookup is disabled.".to_string(); } let lat = match args.get("latitude").and_then(|v| v.as_f64()) { Some(v) => v, None => return "Error: missing required parameter 'latitude'".to_string(), }; let lon = match args.get("longitude").and_then(|v| v.as_f64()) { Some(v) => v, None => return "Error: missing required parameter 'longitude'".to_string(), }; log::info!("tool_get_personal_place_at: lat={}, lon={}", lat, lon); let places = self.apollo_client.places_containing(lat, lon).await; if places.is_empty() { return "No personal place contains this coordinate.".to_string(); } places .iter() .map(|p| { let category = p .category .as_deref() .map(|c| format!(" [{c}]")) .unwrap_or_default(); let desc = if p.description.is_empty() { "(no description)".to_string() } else { p.description.clone() }; format!( "- {}{}: {} (radius {} m)", p.name, category, desc, p.radius_m ) }) .collect::<Vec<_>>() .join("\n") } /// Tool: recall_entities — search the knowledge memory for known entities async fn tool_recall_entities( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { use crate::database::EntityFilter; let name_search = args .get("name") .and_then(|v| v.as_str()) .map(|s| s.to_string()); let entity_type = args .get("entity_type") .and_then(|v| v.as_str()) .map(|s| s.to_string()); let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(20) .clamp(1, 50); log::info!( "tool_recall_entities: name={:?}, type={:?}, limit={}", name_search, entity_type, limit ); // Status scope mirrors recall_facts_for_photo: by default // include both 'active' (agent-generated, not yet reviewed) // and 'reviewed' (human-verified). The DAO list_entities only // takes a single status string, so use 'all' here and filter // out 'rejected' below — slightly broader but fine for the // recall use case (entities are global, persona-agnostic). let filter = EntityFilter { entity_type, status: Some("all".to_string()), search: name_search, limit, offset: 0, }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); match kdao.list_entities(cx, filter) { Ok((entities, _total)) if entities.iter().all(|e| e.status == "rejected") => { "No known entities found matching the query.".to_string() } Ok((entities, _total)) => { let lines: Vec<String> = entities .iter() .filter(|e| e.status != "rejected") .map(|e| { format!( "ID:{} | {} | {} | {} | confidence:{:.2}", e.id, e.entity_type, e.name, e.description, e.confidence ) }) .collect(); if lines.is_empty() { "No known entities found matching the query.".to_string() } else { format!("Known entities:\n{}", lines.join("\n")) } } Err(e) => format!("Error recalling entities: {:?}", e), } } /// Tool: recall_facts_for_photo — retrieve facts linked to a specific photo async fn tool_recall_facts_for_photo( &self, args: &serde_json::Value, user_id: i32, persona_id: &str, cx: &opentelemetry::Context, ) -> String { use crate::database::PersonaFilter; let persona_filter = PersonaFilter::Single { user_id, persona_id: persona_id.to_string(), }; let file_path = match args.get("file_path").and_then(|v| v.as_str()) { Some(p) => p.to_string(), None => return "Error: missing required parameter 'file_path'".to_string(), }; log::info!("tool_recall_facts_for_photo: file_path={}", file_path); // Resolve the persona's reviewed-only-mode flag once. If the // persona row is missing (shouldn't happen — composite FK // enforces existence on writes), fall back to the permissive // default of active+reviewed. let reviewed_only = { let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(cx, user_id, persona_id) .ok() .flatten() .map(|p| p.reviewed_only_facts) .unwrap_or(false) }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); // Fetch photo links to find which entities appear in this photo let links = match kdao.get_links_for_photo(cx, &file_path) { Ok(l) => l, Err(e) => return format!("Error fetching photo links: {:?}", e), }; if links.is_empty() { return "No knowledge facts found for this photo.".to_string(); } let mut output_lines = Vec::new(); let entity_ids: Vec<i32> = links.iter().map(|l| l.entity_id).collect(); // For each linked entity, fetch its facts for entity_id in entity_ids { if let Ok(entity) = kdao.get_entity_by_id(cx, entity_id) && let Some(e) = entity { let role = links .iter() .find(|l| l.entity_id == entity_id) .map(|l| l.role.as_str()) .unwrap_or("subject"); output_lines.push(format!( "Entity: {} ({}, role: {})", e.name, e.entity_type, role )); if let Ok(facts) = kdao.get_facts_for_entity(cx, entity_id, &persona_filter) { // Default scope: active + reviewed (everything not // rejected / superseded). Strict mode trims to // reviewed only — the persona has opted into seeing // exclusively human-verified facts. let allow = |s: &str| -> bool { if reviewed_only { s == "reviewed" } else { s == "active" || s == "reviewed" } }; for f in facts.iter().filter(|f| allow(&f.status)) { let obj = if let Some(ref v) = f.object_value { v.clone() } else if let Some(oid) = f.object_entity_id { kdao.get_entity_by_id(cx, oid) .ok() .flatten() .map(|e| format!("{} (entity ID: {})", e.name, e.id)) .unwrap_or_else(|| format!("entity:{}", oid)) } else { "(unknown)".to_string() }; output_lines.push(format!(" - {} {}", f.predicate, obj)); } } } } if output_lines.is_empty() { "No active knowledge facts found for this photo.".to_string() } else { format!("Knowledge for this photo:\n{}", output_lines.join("\n")) } } /// Tool: recall_facts_for_entity — retrieve facts for one entity by id. /// Persona scoping and the reviewed-only-facts filter mirror /// `tool_recall_facts_for_photo` exactly: reads are always Single /// (user_id, persona_id), and strict-mode personas see only /// human-reviewed facts. async fn tool_recall_facts_for_entity( &self, args: &serde_json::Value, user_id: i32, persona_id: &str, cx: &opentelemetry::Context, ) -> String { use crate::database::PersonaFilter; let persona_filter = PersonaFilter::Single { user_id, persona_id: persona_id.to_string(), }; let entity_id = match args.get("entity_id").and_then(|v| v.as_i64()) { Some(id) => id as i32, None => return "Error: missing required parameter 'entity_id'".to_string(), }; let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(50) .clamp(1, 100) as usize; log::info!( "tool_recall_facts_for_entity: entity_id={}, limit={}", entity_id, limit ); // Resolve the persona's reviewed-only-mode flag once — identical // fallback semantics to recall_facts_for_photo (missing persona // row → permissive active+reviewed default). let reviewed_only = { let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(cx, user_id, persona_id) .ok() .flatten() .map(|p| p.reviewed_only_facts) .unwrap_or(false) }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); let entity = match kdao.get_entity_by_id(cx, entity_id) { Ok(Some(e)) => e, Ok(None) => return format!("No entity found with ID {}.", entity_id), Err(e) => return format!("Error fetching entity: {:?}", e), }; let mut output_lines = vec![format!("Entity: {} ({})", entity.name, entity.entity_type)]; match kdao.get_facts_for_entity(cx, entity_id, &persona_filter) { Ok(facts) => { // Default scope: active + reviewed. Strict mode trims to // reviewed only — same allow rule as recall_facts_for_photo. let allow = |s: &str| -> bool { if reviewed_only { s == "reviewed" } else { s == "active" || s == "reviewed" } }; for f in facts.iter().filter(|f| allow(&f.status)).take(limit) { let obj = if let Some(ref v) = f.object_value { v.clone() } else if let Some(oid) = f.object_entity_id { kdao.get_entity_by_id(cx, oid) .ok() .flatten() .map(|e| format!("{} (entity ID: {})", e.name, e.id)) .unwrap_or_else(|| format!("entity:{}", oid)) } else { "(unknown)".to_string() }; output_lines.push(format!(" - {} {}", f.predicate, obj)); } } Err(e) => return format!("Error fetching facts: {:?}", e), } if output_lines.len() == 1 { format!( "No active knowledge facts found for entity {} (ID: {}).", entity.name, entity_id ) } else { format!("Knowledge for this entity:\n{}", output_lines.join("\n")) } } /// Tool: store_entity — upsert an entity into the knowledge memory. /// Embeddings go through the configured local backend (`LLM_BACKEND`), /// independent of the per-request chat backend in the caller. async fn tool_store_entity( &self, args: &serde_json::Value, cx: &opentelemetry::Context, ) -> String { use crate::database::models::InsertEntity; let name = match args.get("name").and_then(|v| v.as_str()) { Some(n) => n.to_string(), None => return "Error: missing required parameter 'name'".to_string(), }; let entity_type = match args.get("entity_type").and_then(|v| v.as_str()) { Some(t) => t.to_string(), None => return "Error: missing required parameter 'entity_type'".to_string(), }; let description = args .get("description") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); log::info!( "tool_store_entity: name='{}', type='{}', description='{}'", name, entity_type, description ); // Pre-flight similarity check — surface near-duplicates to the model // before it commits to a new entity. Uses the first name token as the // search term so "Sarah" matches when storing "Sarah Johnson" and vice // versa. Exact-name matches are excluded (upsert_entity deduplicates // those already). Results are appended to the tool response so the // model can choose to use an existing entity's ID instead. let similar_entities: Vec<String> = { use crate::database::EntityFilter; use crate::database::knowledge_dao::normalize_entity_type; let normalised_type = normalize_entity_type(&entity_type); let first_token = name.split_whitespace().next().unwrap_or(&name).to_string(); let filter = EntityFilter { entity_type: None, // search all types, filter client-side to avoid case issues status: Some("active".to_string()), search: Some(first_token), limit: 10, offset: 0, }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); kdao.list_entities(cx, filter) .unwrap_or_default() .0 .into_iter() .filter(|e| { normalize_entity_type(&e.entity_type) == normalised_type && e.name.to_lowercase() != name.to_lowercase() }) .map(|e| format!(" ID:{} | {} | {}", e.id, e.name, e.description)) .collect() }; // Generate embedding for name + description (best-effort) via the // configured local backend. let embed_text = format!("{} {}", name, description); let embedding: Option<Vec<u8>> = match crate::ai::embed_document( &self.ollama, self.llamacpp.as_deref(), &embed_text, ) .await { // The entities table has no dim check at the DAO layer, and a // wrong-dim vector silently kills dedup/recall (cosine over // mismatched lengths is 0) — guard here, store None instead. Ok(vec) if vec.len() == crate::ai::embedding_dim() => { let bytes: Vec<u8> = vec.iter().flat_map(|f| f.to_le_bytes()).collect(); Some(bytes) } Ok(vec) => { log::warn!( "Entity '{}' embedding has {} dims (expected {}) — storing without embedding", name, vec.len(), crate::ai::embedding_dim() ); None } Err(e) => { log::warn!("Embedding generation failed for entity '{}': {}", name, e); None } }; let now = chrono::Utc::now().timestamp(); let insert = InsertEntity { name, entity_type, description, embedding, confidence: 0.6, status: "active".to_string(), created_at: now, updated_at: now, }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); match kdao.upsert_entity(cx, insert) { Ok(entity) => { let mut response = format!( "Entity stored: ID:{} | {} | {} | confidence:{:.2}", entity.id, entity.entity_type, entity.name, entity.confidence ); if !similar_entities.is_empty() { response.push_str( "\nSimilar existing entities found — verify this is not a duplicate:\n", ); response.push_str(&similar_entities.join("\n")); response.push_str( "\nIf one of these is the same entity, use their existing ID in store_fact instead of the newly created one.", ); } response } Err(e) => format!("Error storing entity: {:?}", e), } } /// Tool: store_fact — record a fact about an entity, linked to the current photo async fn tool_store_fact( &self, args: &serde_json::Value, file_path: &str, user_id: i32, persona_id: &str, model: &str, backend: &str, cx: &opentelemetry::Context, ) -> String { use crate::database::models::{InsertEntityFact, InsertEntityPhotoLink}; let subject_entity_id = match args.get("subject_entity_id").and_then(|v| v.as_i64()) { Some(id) => id as i32, None => return "Error: missing required parameter 'subject_entity_id'".to_string(), }; let predicate = match args.get("predicate").and_then(|v| v.as_str()) { Some(p) => p.to_string(), None => return "Error: missing required parameter 'predicate'".to_string(), }; let object_entity_id = args .get("object_entity_id") .and_then(|v| v.as_i64()) .map(|id| id as i32); let object_value = args .get("object_value") .and_then(|v| v.as_str()) .map(|s| s.to_string()); if object_entity_id.is_none() && object_value.is_none() { return "Error: provide either object_entity_id or object_value".to_string(); } let photo_role = args .get("photo_role") .and_then(|v| v.as_str()) .unwrap_or("subject") .to_string(); log::info!( "tool_store_fact: entity_id={}, predicate='{}', object_entity_id={:?}, object_value={:?}, photo='{}'", subject_entity_id, predicate, object_entity_id, object_value, file_path ); // Anchor the fact in valid-time using the source photo's // `date_taken` (Apollo's naive-as-UTC convention is fine // here — we only care about calendar ordering, not absolute // UTC). The semantic stretch: a photo *evidences* the fact at // that date — the fact may have started earlier — so this is // best read as "no later than this it started being true", // not a strict lower bound. Still useful: gives the curator a // calendar anchor and lets supersession (next slice) close // intervals cleanly when a newer fact arrives. valid_until // stays NULL — a single photo can't tell us when something // *stopped* being true. let valid_from = self.fetch_exif(file_path).and_then(|e| e.date_taken); let fact = InsertEntityFact { subject_entity_id, predicate, object_entity_id, object_value, source_photo: Some(file_path.to_string()), source_insight_id: None, // will be backfilled after store_insight confidence: 0.6, status: "active".to_string(), created_at: chrono::Utc::now().timestamp(), persona_id: persona_id.to_string(), user_id, valid_from, valid_until: None, superseded_by: None, created_by_model: Some(model.to_string()), created_by_backend: Some(backend.to_string()), // Initial write — no modification yet; last_modified_* // intentionally NULL so the audit feed only shows real // post-creation changes. last_modified_by_model: None, last_modified_by_backend: None, last_modified_at: None, }; let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); // Upsert the fact (corroboration bumps confidence if duplicate) let (stored_fact, is_new) = match kdao.upsert_fact(cx, fact) { Ok(r) => r, Err(e) => return format!("Error storing fact: {:?}", e), }; // Upsert a photo link so this entity is associated with this photo let link = InsertEntityPhotoLink { entity_id: subject_entity_id, library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), role: photo_role, }; if let Err(e) = kdao.upsert_photo_link(cx, link) { log::warn!( "Failed to upsert photo link for entity {}: {:?}", subject_entity_id, e ); } let action = if is_new { "Stored new fact" } else { "Corroborated existing fact" }; format!( "{}: ID:{} | confidence:{:.2}", action, stored_fact.id, stored_fact.confidence ) } /// Tool: update_fact — patch a fact's mutable fields. Gated by the /// active persona's `allow_agent_corrections` flag at the schema / /// catalog layer (build_tool_definitions); rechecked here as a /// defense in depth in case a hallucinated tool call slips /// through. async fn tool_update_fact( &self, args: &serde_json::Value, user_id: i32, persona_id: &str, model: &str, backend: &str, cx: &opentelemetry::Context, ) -> String { use crate::database::FactPatch; // Defense-in-depth gate check. let allowed = { let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(cx, user_id, persona_id) .ok() .flatten() .map(|p| p.allow_agent_corrections) .unwrap_or(false) }; if !allowed { return "Error: agent corrections are disabled for this persona. Ask the operator to flip allow_agent_corrections.".to_string(); } let fact_id = match args.get("fact_id").and_then(|v| v.as_i64()) { Some(id) => id as i32, None => return "Error: missing required parameter 'fact_id'".to_string(), }; // Build the patch from any fields present on `args`. valid_* // are tri-state — JSON null → Some(None) → clear back to NULL, // omitted → None → leave alone, value → Some(Some(value)) → // set. The match-on-presence pattern below mirrors the HTTP // PATCH path's serde-helper behaviour. let parse_optional_i64 = |v: Option<&serde_json::Value>| -> Option<Option<i64>> { v.map(|val| if val.is_null() { None } else { val.as_i64() }) }; let patch = FactPatch { predicate: args .get("predicate") .and_then(|v| v.as_str()) .map(|s| s.to_string()), object_value: args .get("object_value") .and_then(|v| v.as_str()) .map(|s| s.to_string()), status: args .get("status") .and_then(|v| v.as_str()) .filter(|s| matches!(*s, "active" | "reviewed" | "rejected")) .map(|s| s.to_string()), confidence: args .get("confidence") .and_then(|v| v.as_f64()) .map(|f| f.clamp(0.0, 0.95) as f32), valid_from: parse_optional_i64(args.get("valid_from")), valid_until: parse_optional_i64(args.get("valid_until")), }; log::info!("tool_update_fact: fact_id={}", fact_id); let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); match kdao.update_fact(cx, fact_id, patch, Some((model, backend))) { Ok(Some(f)) => format!( "Updated fact ID:{} (status={}, confidence={:.2})", f.id, f.status, f.confidence ), Ok(None) => format!("Error: fact ID:{} not found", fact_id), Err(e) => format!("Error updating fact: {:?}", e), } } /// Tool: supersede_fact — replace one fact with another. Same /// gating as update_fact. async fn tool_supersede_fact( &self, args: &serde_json::Value, user_id: i32, persona_id: &str, model: &str, backend: &str, cx: &opentelemetry::Context, ) -> String { let allowed = { let mut pdao = self.persona_dao.lock().expect("Unable to lock PersonaDao"); pdao.get_persona(cx, user_id, persona_id) .ok() .flatten() .map(|p| p.allow_agent_corrections) .unwrap_or(false) }; if !allowed { return "Error: agent corrections are disabled for this persona.".to_string(); } let old_fact_id = match args.get("old_fact_id").and_then(|v| v.as_i64()) { Some(id) => id as i32, None => return "Error: missing required parameter 'old_fact_id'".to_string(), }; let new_fact_id = match args.get("new_fact_id").and_then(|v| v.as_i64()) { Some(id) => id as i32, None => return "Error: missing required parameter 'new_fact_id'".to_string(), }; if old_fact_id == new_fact_id { return "Error: old_fact_id and new_fact_id must differ".to_string(); } log::info!( "tool_supersede_fact: old={}, new={}", old_fact_id, new_fact_id ); let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); match kdao.supersede_fact(cx, old_fact_id, new_fact_id, Some((model, backend))) { Ok(Some(f)) => format!( "Superseded fact ID:{} (now status={}, valid_until={:?})", f.id, f.status, f.valid_until ), Ok(None) => "Error: old or new fact not found".to_string(), Err(e) => format!("Error superseding fact: {:?}", e), } } /// Tool: get_current_datetime — returns the current local date and time fn tool_get_current_datetime() -> String { let now = Local::now(); format!( "Current date/time: {} ({})", now.format("%Y-%m-%d %H:%M:%S %Z"), now.format("%A") ) } // ── Agentic insight generation ────────────────────────────────────── /// Build the list of tool definitions for the agentic loop, gated by /// `opts`. Always-on tools: `search_messages`, `get_sms_messages`, /// `get_file_tags`, `reverse_geocode`, `get_current_datetime`, the /// five knowledge-memory tools. Conditional: `describe_photo` (vision /// model), `get_personal_place_at` (Apollo configured), `search_rag` /// (daily_summaries populated), `get_calendar_events` (calendar /// populated), `get_location_history` (location history populated). pub(crate) fn build_tool_definitions(opts: ToolGateOpts) -> Vec<Tool> { let mut tools: Vec<Tool> = Vec::new(); if opts.daily_summaries_present { tools.push(Tool::function( "search_rag", "Semantic search over the user's daily-summary corpus. Returns up to \ `limit` summaries most semantically similar to `query`. Pass `date` \ to anchor in time: summaries near that date rank higher and matches \ months away decay sharply. Omit `date` to rank purely by semantic \ similarity across all time — do this for \"when did X happen?\" \ questions where the date is unknown. For raw message text, prefer \ `search_messages`. \ Examples: `{query: \"family dinner\"}` — best matches across all \ time. `{query: \"family dinner\", date: \"2018-12-24\"}` — what \ daily summaries near Christmas Eve mention family / dinner / gathering. \ `{query: \"work travel\", date: \"2019-06-15\", contact: \"Alice\"}` — \ biased toward summaries that involve Alice.", serde_json::json!({ "type": "object", "required": ["query"], "properties": { "query": { "type": "string", "description": "Free-text query, semantically matched." }, "date": { "type": "string", "description": "Optional anchor date, YYYY-MM-DD. When set, summaries near this date rank higher; omit to search all time evenly." }, "contact": { "type": "string", "description": "Optional contact name to bias toward conversations with that person (soft semantic bias, not a hard filter)." }, "limit": { "type": "integer", "description": "Max summaries to return (default 10, max 25)." } } }), )); } tools.push(Tool::function( "search_messages", "Search SMS/MMS messages — bodies and (for MMS) attachment text + filenames. \ Modes: `fts5` (keyword + phrase + prefix + AND/OR/NOT + NEAR proximity), \ `semantic` (embedding similarity, requires generated embeddings), `hybrid` (RRF merge, recommended; \ degrades to fts5 when embeddings absent). Optional filters: `start_ts` / `end_ts` (real-UTC unix \ seconds), `contact` (contact name, case-insensitive), `contact_id` (numeric), `is_mms` \ (true = MMS only, false = SMS only), `has_media` (true = messages with image/video/audio \ attachments only). Prefer `contact` over `contact_id` — the name is resolved server-side. \ If both are provided, `contact_id` takes precedence. \ For pure date / contact browsing without keywords, prefer `get_sms_messages`. \ \n\nFTS5 query syntax (works in fts5 + hybrid modes):\n\ - Phrase: `\"trader joe's\"` — exact word sequence (use double quotes).\n\ - Prefix: `restaur*` — matches restaurant, restaurants, restauranteur, ….\n\ - Boolean: `dinner AND tahoe`, `wedding OR reception OR ceremony`, `vacation NOT work` (operators must be UPPERCASE).\n\ - Proximity: `NEAR(meeting work, 5)` — both terms within 5 tokens of each other.\n\ - Combine: `(reception OR ceremony) AND tahoe*` — group with parens.\n\ Unquoted multi-word queries are treated as implicit AND. Apostrophes / hyphens / colons are safe — they no longer downgrade to a slow LIKE scan. Use `mode: \"fts5\"` when you want the operators above to be authoritative; `hybrid` still respects them but may surface semantically-similar non-keyword hits alongside.\n\n\ Examples:\n\ - `{query: \"trader joe's\"}` — phrase across all time.\n\ - `{query: \"dinner\", contact: \"Mom\"}` — keyword scoped to Mom's messages.\n\ - `{query: \"dinner\", contact_id: 42, start_ts: 1700000000, end_ts: 1700604800}` — keyword within a contact and a week.\n\ - `{query: \"vacation\", has_media: true}` — only matches that include photos / videos.\n\ - `{query: \"wedding OR reception OR ceremony\", mode: \"fts5\"}` — any of several synonyms.\n\ - `{query: \"restaur*\", mode: \"fts5\"}` — prefix expansion for varying word forms.\n\ - `{query: \"NEAR(birthday cake, 5)\", mode: \"fts5\"}` — terms close together but in any order.", serde_json::json!({ "type": "object", "required": ["query"], "properties": { "query": { "type": "string", "description": "Search query. Min 3 chars. fts5 supports phrase (\"\"), prefix (*), AND/OR/NOT, and NEAR proximity. Matches both message body and MMS attachment text/filename." }, "mode": { "type": "string", "enum": ["fts5", "semantic", "hybrid"], "description": "Search strategy. Default: hybrid." }, "limit": { "type": "integer", "description": "Max results (default 20, max 50)." }, "contact_id": { "type": "integer", "description": "Optional numeric contact id to scope the search." }, "contact": { "type": "string", "description": "Optional contact name (case-insensitive). Resolved to contact_id server-side. Use this when you know the name but not the ID." }, "start_ts": { "type": "integer", "description": "Optional inclusive lower bound, real-UTC unix seconds." }, "end_ts": { "type": "integer", "description": "Optional inclusive upper bound, real-UTC unix seconds." }, "is_mms": { "type": "boolean", "description": "Optional: true to restrict to MMS, false to restrict to SMS." }, "has_media":{ "type": "boolean", "description": "Optional: true to restrict to messages with image / video / audio attachments." } } }), )); tools.push(Tool::function( "get_sms_messages", "Fetch SMS/MMS messages near a date (and optionally from a specific contact). Use when you know the date \ or want context around a photo's timestamp. For keyword search without a date, use `search_messages`. \ Returns up to `limit` messages within `±days_radius` of `date`, sorted by proximity. \ Example: `{date: \"2018-08-12\", contact: \"Mom\", days_radius: 2}` — messages from Mom within ±2 days of Aug 12 2018.", serde_json::json!({ "type": "object", "required": ["date"], "properties": { "date": { "type": "string", "description": "Center date, YYYY-MM-DD." }, "contact": { "type": "string", "description": "Optional contact name (case-insensitive). Falls back to all contacts on no match." }, "days_radius": { "type": "integer", "description": "Days before and after to include (default 4)." }, "limit": { "type": "integer", "description": "Max messages to return (default 60, max 150)." } } }), )); if opts.calendar_present { tools.push(Tool::function( "get_calendar_events", "Fetch calendar events near a date — meetings, scheduled activities, all-day events. \ Returns events within `±days_radius` of `date`. \ Example: `{date: \"2019-03-22\", days_radius: 3}` — events within a week of March 22 2019.", serde_json::json!({ "type": "object", "required": ["date"], "properties": { "date": { "type": "string", "description": "Center date, YYYY-MM-DD." }, "days_radius": { "type": "integer", "description": "Days before and after to include (default 7)." }, "limit": { "type": "integer", "description": "Max events to return (default 20, max 50)." } } }), )); } if opts.location_history_present { tools.push(Tool::function( "get_location_history", "Fetch raw location records (lat/lon/timestamp/activity) near a date. The default 14-day radius is \ wide because location density varies; tighten to ±1 day for a single-trip query. For a coordinate's \ named place, use `reverse_geocode` (or `get_personal_place_at` when Apollo is enabled).", serde_json::json!({ "type": "object", "required": ["date"], "properties": { "date": { "type": "string", "description": "Center date, YYYY-MM-DD." }, "days_radius": { "type": "integer", "description": "Days before and after to include (default 14)." } } }), )); } tools.push(Tool::function( "get_file_tags", "Get user-applied tags for a specific photo file path. Tags are user-curated, not auto-detected.", serde_json::json!({ "type": "object", "required": ["file_path"], "properties": { "file_path": { "type": "string", "description": "File path of the photo." } } }), )); tools.push(Tool::function( "reverse_geocode", "Convert GPS lat/lon to a human-readable place name (city, state). Use for any coordinate the LLM has \ obtained from EXIF or `get_location_history`. When Apollo is configured, prefer `get_personal_place_at` \ — it returns the user's named places (Home / Work / etc.) which are more specific.", serde_json::json!({ "type": "object", "required": ["latitude", "longitude"], "properties": { "latitude": { "type": "number", "description": "Decimal degrees." }, "longitude": { "type": "number", "description": "Decimal degrees." } } }), )); if opts.apollo_enabled { tools.push(Tool::function( "get_personal_place_at", "Return any of the user's named Places (e.g. Home, Work, Cabin) whose radius contains (latitude, longitude). \ Smallest radius first — most specific match wins. More specific than `reverse_geocode`; prefer this when \ both apply. Returns place name, category, free-text description, and radius.", serde_json::json!({ "type": "object", "required": ["latitude", "longitude"], "properties": { "latitude": { "type": "number", "description": "Decimal degrees." }, "longitude": { "type": "number", "description": "Decimal degrees." } } }), )); } if opts.faces_present { tools.push(Tool::function( "get_faces_in_photo", "Return the faces detected in this photo with their bounding boxes and assigned person names \ (when bound). Each face carries `person_name` (string or null), `bbox` ({x, y, w, h} normalized 0–1), \ `confidence` (0–1), and `source` ('auto' from detector or 'manual' from a user-drawn bbox). \ More authoritative than `get_file_tags` for counting people in a photo or naming who is present, \ since it returns detected-but-unbound faces too. \ Example: `{file_path: \"2019/06/IMG_4242.jpg\"}`.", serde_json::json!({ "type": "object", "required": ["file_path"], "properties": { "file_path": { "type": "string", "description": "File path of the photo." } } }), )); } tools.push(Tool::function( "recall_entities", "Search the persistent knowledge memory for previously learned people, places, events, or things. \ Use BEFORE writing the insight to ground the model on what's already known.", serde_json::json!({ "type": "object", "properties": { "name": { "type": "string", "description": "Name or partial name (case-insensitive substring match)." }, "entity_type": { "type": "string", "enum": ["person", "place", "event", "thing"] }, "limit": { "type": "integer", "description": "Max results (default 20, max 50)." } } }), )); tools.push(Tool::function( "recall_facts_for_photo", "Retrieve all stored facts linked to a specific photo. Call at the start of insight generation to load \ prior knowledge about subjects in this photo without scanning the whole knowledge base.", serde_json::json!({ "type": "object", "required": ["file_path"], "properties": { "file_path": { "type": "string", "description": "File path of the photo." } } }), )); tools.push(Tool::function( "recall_facts_for_entity", "Retrieve all stored facts about one specific entity by its ID. Use to follow up on an entity \ surfaced by `recall_entities` (or referenced by another fact's object) that is NOT linked to the \ current photo — `recall_facts_for_photo` only covers photo-linked entities. \ Example: `{entity_id: 7}` — everything known about entity 7.", serde_json::json!({ "type": "object", "required": ["entity_id"], "properties": { "entity_id": { "type": "integer", "description": "ID of the entity to fetch facts for (from recall_entities, store_entity, or a fact's object reference)." }, "limit": { "type": "integer", "description": "Max facts to return (default 50, max 100)." } } }), )); tools.push(Tool::function( "store_entity", "Upsert a person / place / event / thing into the knowledge memory. Returns the entity id (use it as \ `subject_entity_id` or `object_entity_id` in `store_fact`). Idempotent on canonical name.", serde_json::json!({ "type": "object", "required": ["name", "entity_type"], "properties": { "name": { "type": "string", "description": "Canonical name (e.g. \"John Smith\", \"Banff National Park\")." }, "entity_type": { "type": "string", "enum": ["person", "place", "event", "thing"] }, "description": { "type": "string", "description": "Brief description." } } }), )); tools.push(Tool::function( "store_fact", "Record a fact about an entity in the knowledge memory. Always linked to the current photo. \ You must provide EITHER `object_entity_id` (when the object is itself a stored entity — e.g. \ person A is_friend_of person B) OR `object_value` (free-text attribute — e.g. role=\"software engineer\"). \ `object_entity_id` takes precedence when both are present. \ Examples: \ `{subject_entity_id: 7, predicate: \"is_friend_of\", object_entity_id: 12}` — links two known entities. \ `{subject_entity_id: 7, predicate: \"lives_in\", object_value: \"Portland, Oregon\"}` — free-text attribute.", serde_json::json!({ "type": "object", "required": ["subject_entity_id", "predicate"], "properties": { "subject_entity_id": { "type": "integer", "description": "Entity id this fact is about." }, "predicate": { "type": "string", "description": "Relationship or attribute (e.g. is_friend_of, located_in, attended_event)." }, "object_entity_id": { "type": "integer", "description": "Use when the object is itself a stored entity. Takes precedence over object_value." }, "object_value": { "type": "string", "description": "Use for free-text attributes where the object is not a stored entity." }, "photo_role": { "type": "string", "description": "How this entity appears in the photo (default \"subject\")." } } }), )); // Self-correction tools — only exposed when the active persona // has allow_agent_corrections=true. Gating happens here AND in // the tool method itself (the runtime check is the load-bearing // one; this just keeps the tool out of the model's catalog so // it doesn't waste iterations trying calls it can't make). if opts.allow_agent_corrections { tools.push(Tool::function( "update_fact", "Correct an existing fact in the knowledge memory. Use sparingly — only when you have \ stronger evidence than the original write justified (e.g. a clearer photo, a \ contradicting timestamp on a related fact, or explicit user correction). Common \ patches: tighten `valid_from` / `valid_until` to a known interval, downgrade \ `confidence` after seeing contradicting evidence, or flip `status` to 'reviewed' if \ you've verified the fact independently. Cannot change subject / object — supersede \ instead. Pass `fact_id` plus any subset of patchable fields.", serde_json::json!({ "type": "object", "required": ["fact_id"], "properties": { "fact_id": { "type": "integer", "description": "ID of the fact to patch (from recall_facts_for_photo or list)." }, "predicate": { "type": "string", "description": "New predicate string. Rare." }, "object_value": { "type": "string", "description": "New free-text object. Use for typed-fact corrections." }, "status": { "type": "string", "description": "'active' | 'reviewed' | 'rejected'. 'superseded' is for the supersede_fact tool." }, "confidence": { "type": "number", "description": "0.0–0.95. Lower when you've seen contradicting evidence." }, "valid_from": { "type": "integer", "description": "Unix-seconds lower bound on when the fact began being true. Null clears." }, "valid_until": { "type": "integer", "description": "Unix-seconds upper bound on when the fact stopped being true. Null clears." } } }), )); tools.push(Tool::function( "supersede_fact", "Mark an old fact as replaced by a newer one. Use when the new fact contradicts the \ old AND the contradiction is a *time-bounded change* (relationship changed, address \ changed, role changed), not a correction of a mistake — for mistakes, set the old \ fact's status to 'rejected' via update_fact. Atomically: flips old.status to \ 'superseded', points old.superseded_by at the new fact, and stamps old.valid_until \ from new.valid_from (when not already set) so the two intervals are disjoint.", serde_json::json!({ "type": "object", "required": ["old_fact_id", "new_fact_id"], "properties": { "old_fact_id": { "type": "integer", "description": "The fact being replaced." }, "new_fact_id": { "type": "integer", "description": "The fact that replaces it. Must already exist (use store_fact first if you're recording a new one)." } } }), )); } tools.push(Tool::function( "get_current_datetime", "Get the current date and time. Useful when reasoning about how long ago a photo was taken.", serde_json::json!({ "type": "object", "properties": {} }), )); if opts.has_vision { tools.push(Tool::function( "describe_photo", "Generate a visual description of the current photo — people, location, objects, activity visible \ in the image. Only available with vision-capable models.", serde_json::json!({ "type": "object", "properties": {} }), )); } tools } /// Generate an AI insight for a photo using an agentic tool-calling loop. /// The model decides which tools to call to gather context before writing the final insight. /// /// `backend` selects the chat provider: `"local"` (default) routes the /// agentic loop through the configured Ollama server with the image /// attached to the first user message; `"hybrid"` asks the local Ollama /// vision model to describe the image once, inlines the description as /// text, and runs the loop through OpenRouter (chat only — embeddings /// and describe calls stay local in either mode). #[allow(clippy::too_many_arguments)] /// Render a set of prior-conversation transcripts into a compact /// trajectory block for inclusion in the system prompt. Tool results /// are summarised to one line each so the prompt stays small. fn render_fewshot_examples(examples: &[Vec<ChatMessage>]) -> String { if examples.is_empty() { return String::new(); } let mut out = String::from("## Examples of strong context-gathering\n\n"); out.push_str( "The following are compressed trajectories from prior high-quality insights. \ They show the *pattern* of tool use, not answers to copy.\n\n", ); for (i, msgs) in examples.iter().enumerate() { out.push_str(&format!("### Example {}\n\n", i + 1)); out.push_str(&Self::render_single_trajectory(msgs)); out.push('\n'); } out.push_str("---\n\n"); out } fn render_single_trajectory(msgs: &[ChatMessage]) -> String { let mut out = String::new(); if let Some(first_user) = msgs.iter().find(|m| m.role == "user") { let trimmed = first_user .content .lines() .filter(|l| !l.trim().is_empty()) .take(8) .collect::<Vec<_>>() .join("\n"); out.push_str(&format!("Input:\n{}\n\n", trimmed)); } out.push_str("Trajectory:\n"); let mut step = 1; let mut final_content: Option<String> = None; for (i, m) in msgs.iter().enumerate() { if m.role != "assistant" { continue; } if let Some(ref calls) = m.tool_calls { for call in calls { let args_brief = Self::brief_json_args(&call.function.arguments); let result_summary = msgs .get(i + 1) .filter(|r| r.role == "tool") .map(|r| Self::summarize_tool_result(&call.function.name, &r.content)) .unwrap_or_else(|| "(no result)".to_string()); out.push_str(&format!( "{}. {}({}) -> {}\n", step, call.function.name, args_brief, result_summary )); step += 1; } } else if !m.content.is_empty() { final_content = Some(m.content.clone()); } } if let Some(content) = final_content { let short: String = content.chars().take(240).collect(); out.push_str(&format!("\nFinal insight: {}...\n", short)); } out } fn brief_json_args(v: &serde_json::Value) -> String { let Some(obj) = v.as_object() else { return v.to_string(); }; obj.iter() .map(|(k, v)| { let rendered = match v { serde_json::Value::String(s) if s.chars().count() > 40 => { format!("\"{}...\"", s.chars().take(40).collect::<String>()) } _ => v.to_string(), }; format!("{}={}", k, rendered) }) .collect::<Vec<_>>() .join(", ") } /// Collapse a raw tool-result string (the text the model saw) into a /// short phrase suitable for a few-shot trajectory. Detects the /// "Found N ...", "No ...", and "Error ..." idioms used by the tool /// implementations in this file. Unknown shapes fall back to a char /// count, which is deliberately visible so drift shows up in output. fn summarize_tool_result(tool_name: &str, raw: &str) -> String { if raw.starts_with("Error ") { return "error".to_string(); } if raw.starts_with("No ") || raw.starts_with("Could not ") { return "empty (pivoted)".to_string(); } if let Some(rest) = raw.strip_prefix("Found ") && let Some(n_str) = rest.split_whitespace().next() && let Ok(n) = n_str.parse::<usize>() { let kind = match tool_name { "search_messages" | "get_sms_messages" => "messages", "get_calendar_events" => "events", "get_location_history" => "location records", _ => "results", }; return format!("{} {}", n, kind); } match tool_name { "search_rag" => { let n = raw.split("\n\n").filter(|s| !s.trim().is_empty()).count(); format!("{} rag hits", n) } "get_file_tags" => { let n = raw.split(',').filter(|s| !s.trim().is_empty()).count(); format!("{} tags", n) } "describe_photo" => { let short: String = raw.chars().take(80).collect(); format!("described: \"{}...\"", short) } "reverse_geocode" => { let short: String = raw.chars().take(60).collect(); format!("place: {}", short) } "get_personal_place_at" => { if raw.starts_with("No personal place") || raw.starts_with("Personal place lookup") { "no personal place".to_string() } else { let n = raw.lines().filter(|l| l.starts_with("- ")).count().max(1); format!("{} personal place(s)", n) } } "recall_entities" | "recall_facts_for_photo" | "recall_facts_for_entity" => { let n = raw.lines().skip(1).filter(|l| !l.trim().is_empty()).count(); let kind = if tool_name == "recall_entities" { "entities" } else { "facts" }; format!("{} {}", n, kind) } "store_entity" | "store_fact" => raw .split_whitespace() .find_map(|tok| tok.strip_prefix("ID:")) .map(|id| format!("stored id={}", id.trim_end_matches(','))) .unwrap_or_else(|| "stored".to_string()), "get_current_datetime" => "time noted".to_string(), _ => format!("{} chars", raw.len()), } } /// Assemble the chat system prompt from two named blocks: /// /// 1. **Identity / voice / format** — `custom_system_prompt` verbatim /// when supplied, or a neutral default that doesn't fight a future /// persona. The framework never asserts an identity that could /// contradict the persona. /// 2. **Procedural scaffolding** — tool-use guidance, iteration budget, /// contact-filter rule. Identity-free; never asserts voice or shape. /// /// `owner_id_note` and `fewshot_block` are pre-rendered strings (they /// already encode their own headers / blank lines). Pass empty / None /// to skip. pub(crate) fn build_system_content( custom_system_prompt: Option<&str>, owner_id_note: Option<&str>, fewshot_block: &str, max_iterations: usize, ) -> String { let identity = match custom_system_prompt { Some(s) if !s.trim().is_empty() => s.trim().to_string(), _ => String::from( "You are reconstructing a memory from a photo. Use the gathered \ context to write a thoughtful summary; you decide voice, length, and shape.", ), }; let owner = owner_id_note.unwrap_or(""); let procedural = format!( "Tool-use guidance:\n\ - You have a budget of {max_iterations} tool-calling iterations.\n\ - Call tools to gather context BEFORE writing your final answer; don't answer after one or two calls.\n\ - When calling get_sms_messages or search_rag, make at least one call WITHOUT a contact filter \ — surrounding events matter even when a contact is known.\n\ - Use recall_facts_for_photo + recall_entities to load any prior knowledge about subjects in the photo.\n\ - When you identify people / places / events / things, use store_entity + store_fact to grow the persistent memory.\n\ - Before store_entity, call recall_entities to check whether a similar name already exists; reuse the existing entity_id rather than creating a near-duplicate (e.g. \"Sara\" vs \"Sarah J.\"). The DAO will collapse obvious cosine matches, but choosing the existing id keeps facts and photo links consolidated.\n\ - Predicates should be relationship-shaped verbs that encode a queryable claim — `lives_in`, `works_at`, `attended`, `is_friend_of`, `is_parent_of`, `interested_in`, `married_to`, `owns`. DO NOT use vague speech-act predicates like `expressed`, `said`, `mentioned`, `stated`, `quoted`, `noted`, `discussed`, `thought`, `wondered`. DO NOT store quotations or sentence fragments as `object_value` — paraphrase into a structured claim. Bad: `(Cameron, expressed, \"I'm tempted to get a part-time job there\")`. Good: `(Cameron, considered_employment_at, <Place>)` or `(Cameron, interested_in, \"part-time work\")`.\n\ - A tool returning no results is informative; continue with the others.\n\ - When writing your final answer, start with \"Title: <short title>\" (max 8 words) on the first line, then a blank line, then the body.", ); let mut out = identity; if !owner.is_empty() { out.push_str(owner); } out.push_str("\n\n"); if !fewshot_block.is_empty() { out.push_str(fewshot_block); } out.push_str(&procedural); out } /// Consolidate client construction for the agentic insight loop. /// /// Returns a [`ResolvedBackend`] containing the **chat** client (the model /// that drives the agent loop), the **local** client (always the configured /// local backend — Ollama or llama-swap — for utility calls like /// describe_image, rerank, embeddings), the backend kind, and whether the /// chat model receives images inline. pub async fn resolve_backend( &self, kind: BackendKind, overrides: &SamplingOverrides, ) -> Result<ResolvedBackend> { let local_via_llamacpp = crate::ai::local_backend_is_llamacpp() && self.llamacpp.is_some(); let is_hybrid = kind == BackendKind::Hybrid; // ── chat client ──────────────────────────────────────────────── let chat: Box<dyn LlmClient> = if is_hybrid { // Hybrid: chat through OpenRouter. let arc = self.openrouter.as_ref().ok_or_else(|| { anyhow::anyhow!("hybrid backend unavailable: OPENROUTER_API_KEY not configured") })?; let mut c: OpenRouterClient = (**arc).clone(); if let Some(ref m) = overrides.model { c.primary_model = m.clone(); } if overrides.has_sampling() { c.set_sampling_params( overrides.temperature, overrides.top_p, overrides.top_k, overrides.min_p, ); } if let Some(ctx) = overrides.num_ctx { c.set_num_ctx(Some(ctx)); } Box::new(c) } else if local_via_llamacpp { // Local via llama-swap. let arc = self.llamacpp.as_ref().ok_or_else(|| { anyhow::anyhow!("LLM_BACKEND=llamacpp but LLAMA_SWAP_URL not configured") })?; let mut c: LlamaCppClient = (**arc).clone(); if let Some(ref m) = overrides.model { c.primary_model = m.clone(); } if overrides.has_sampling() { c.set_sampling_params( overrides.temperature, overrides.top_p, overrides.top_k, overrides.min_p, ); } if let Some(ctx) = overrides.num_ctx { c.set_num_ctx(Some(ctx)); } Box::new(c) } else { // Pure Ollama local. let mut ollama_client = if let Some(ref model) = overrides.model { OllamaClient::new( self.ollama.primary_url.clone(), self.ollama.fallback_url.clone(), model.clone(), Some(model.clone()), ) } else { self.ollama.clone() }; if overrides.has_sampling() { ollama_client.set_sampling_params( overrides.temperature, overrides.top_p, overrides.top_k, overrides.min_p, ); } if let Some(ctx) = overrides.num_ctx { ollama_client.set_num_ctx(Some(ctx)); } Box::new(ollama_client) }; // ── local client (utility calls: rerank, describe_image, etc.) ─ // For llamacpp in local mode: mirror the chat model selection so // rerank / describe_image hit the same model that's already // loaded — avoids a mid-turn model swap in llama-swap exclusive // mode. In hybrid mode the override is an OpenRouter model id // (e.g. "google/gemini-3-flash-preview") which llama-swap can't // serve — keep the configured local slots. let local: Box<dyn LlmClient> = if local_via_llamacpp { let mut lc = self.llamacpp.as_ref().unwrap().as_ref().clone(); if !is_hybrid && let Some(ref m) = overrides.model { lc.primary_model = m.clone(); lc.set_vision_model(m.clone()); } Box::new(lc) } else { Box::new(self.ollama.clone()) }; // ── images_inline ────────────────────────────────────────────── let images_inline = if is_hybrid { // Hybrid: chat model never sees images — describe-then-inject. false } else if local_via_llamacpp { // llama-swap models receive images directly via OpenAI content // parts. Capability probing isn't available (no `/api/show`), // so assume vision support; a misconfigured model surfaces as // a chat-call error. true } else { // Pure Ollama: probe model capabilities. let ollama_for_caps = if let Some(ref model) = overrides.model { // Verify custom model is available on at least one server. let available_on_primary = OllamaClient::is_model_available(&self.ollama.primary_url, model) .await .unwrap_or(false); let available_on_fallback = if let Some(ref fallback_url) = self.ollama.fallback_url { OllamaClient::is_model_available(fallback_url, model) .await .unwrap_or(false) } else { false }; if !available_on_primary && !available_on_fallback { anyhow::bail!( "model not available: '{}' not found on any configured server", model ); } model.as_str() } else { self.ollama.primary_model.as_str() }; let capabilities = match OllamaClient::check_model_capabilities( &self.ollama.primary_url, ollama_for_caps, ) .await { Ok(caps) => caps, Err(_) => { let fallback_url = self.ollama.fallback_url.as_deref().ok_or_else(|| { anyhow::anyhow!( "Failed to check model capabilities for '{}': model not found on primary server and no fallback configured", ollama_for_caps ) })?; OllamaClient::check_model_capabilities(fallback_url, ollama_for_caps) .await .map_err(|e| { anyhow::anyhow!( "Failed to check model capabilities for '{}': {}", ollama_for_caps, e ) })? } }; if !capabilities.has_tool_calling { anyhow::bail!("tool calling not supported by model '{}'", ollama_for_caps); } capabilities.has_vision }; Ok(ResolvedBackend::new(chat, local, kind, images_inline)) } pub async fn generate_agentic_insight_for_photo( &self, file_path: &str, custom_model: Option<String>, custom_system_prompt: Option<String>, num_ctx: Option<i32>, temperature: Option<f32>, top_p: Option<f32>, top_k: Option<i32>, min_p: Option<f32>, max_iterations: usize, backend: Option<String>, fewshot_examples: Vec<Vec<ChatMessage>>, fewshot_source_ids: Vec<i32>, user_id: i32, persona_id: String, ) -> Result<(Option<i32>, Option<i32>, Option<i32>)> { let tracer = global_tracer(); let current_cx = opentelemetry::Context::current(); let mut span = tracer.start_with_context("ai.insight.generate_agentic", ¤t_cx); let file_path = normalize_path(file_path); log::info!("Generating agentic insight for photo: {}", file_path); span.set_attribute(KeyValue::new("file_path", file_path.clone())); span.set_attribute(KeyValue::new("max_iterations", max_iterations as i64)); // 1. Resolve backend + build clients. let kind = BackendKind::parse(backend.as_deref().unwrap_or("local"))?; span.set_attribute(KeyValue::new("backend", kind.as_str())); let overrides = SamplingOverrides { model: custom_model, num_ctx, temperature, top_p, top_k, min_p, }; let backend = self.resolve_backend(kind, &overrides).await?; span.set_attribute(KeyValue::new("model", backend.model().to_string())); span.set_attribute(KeyValue::new("images_inline", backend.images_inline)); let insight_cx = current_cx.with_span(span); // 3. Fetch EXIF let exif = { let mut exif_dao = self.exif_dao.lock().expect("Unable to lock ExifDao"); exif_dao .get_exif(&insight_cx, &file_path) .map_err(|e| anyhow::anyhow!("Failed to get EXIF: {:?}", e))? }; // 4. Extract timestamp and contact let timestamp = if let Some(ts) = exif.as_ref().and_then(|e| e.date_taken) { ts } else { log::warn!("No date_taken in EXIF for {}, trying filename", file_path); extract_date_from_filename(&file_path) .map(|dt| dt.timestamp()) .or_else(|| { let full_path = self.resolve_full_path(&file_path)?; File::open(&full_path) .and_then(|f| f.metadata()) .inspect_err(|e| { log::warn!( "Failed to get file timestamp for agentic insight {}: {}", file_path, e ) }) .ok() .and_then(|m| earliest_fs_time(&m)) .map(|t| DateTime::<Utc>::from(t).timestamp()) }) .unwrap_or_else(|| Utc::now().timestamp()) }; let date_taken = DateTime::from_timestamp(timestamp, 0) .map(|dt| dt.date_naive()) .unwrap_or_else(|| Utc::now().date_naive()); let contact = Self::extract_contact_from_path(&file_path); log::info!("Agentic: date_taken={}, contact={:?}", date_taken, contact); // 5. Fetch tags let tag_names: Vec<String> = { let mut dao = self.tag_dao.lock().expect("Unable to lock TagDao"); dao.get_tags_for_path(&insight_cx, &file_path) .unwrap_or_else(|e| { log::warn!("Failed to fetch tags for agentic {}: {}", file_path, e); Vec::new() }) .into_iter() .map(|t| t.name) .collect() }; // 6. Ensure the owner entity exists so the agent can reference it. // Prior entity_photo_links for this file are intentionally preserved // across regenerations — clearing them made `recall_facts_for_photo` // always return empty and discarded hard-won knowledge. Re-linking // the same entity is a no-op (INSERT OR IGNORE). let owner_name = user_display_name(); let owner_entity_id: Option<i32> = { let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); // Upsert the owner entity so the agent always has a stable entity ID to reference. let owner = crate::database::models::InsertEntity { name: owner_name.clone(), entity_type: "person".to_string(), description: format!( "The owner of this photo collection. All memories are written from {}'s perspective.", owner_name ), embedding: None, confidence: 1.0, status: "active".to_string(), created_at: Utc::now().timestamp(), updated_at: Utc::now().timestamp(), }; match kdao.upsert_entity(&insight_cx, owner) { Ok(e) => { log::info!("Owner entity '{}' ID: {}", owner_name, e.id); Some(e.id) } Err(e) => { log::warn!("Failed to upsert owner entity '{}': {:?}", owner_name, e); None } } }; // 7. Load image. Always attempted — vision-capable models get the // base64 inline; hybrid mode describes it locally and injects text. let image_base64 = match self.load_image_as_base64(&file_path) { Ok(b64) => { log::info!("Loaded image for agentic model"); Some(b64) } Err(e) => { log::warn!("Failed to load image for agentic: {}", e); None } }; // Describe-then-inline (hybrid only). Vision describe routes through // the local backend so non-text work stays off OpenRouter. let inlined_visual_description: Option<String> = if !backend.images_inline { match image_base64.as_deref() { Some(b64) => match backend.local().describe_image(b64).await { Ok(desc) => { log::info!("{}: vision describe succeeded ({} chars)", kind, desc.len()); Some(desc) } Err(e) => { log::warn!( "{}: vision describe failed, continuing without: {}", kind, e ); None } }, None => None, } } else { None }; // 8. Build system message via the two-block helper. Custom prompt // (when supplied) is the authoritative identity — the framework // never appends a competing "you are a personal photo memory // assistant" line. The procedural block stays identity-free. let owner_id_note = owner_entity_id.map(|id| { format!( "\n\nYour identity in the knowledge store: {name} (entity ID: {id}). \ When storing facts where you ({name}) are the object — for example, someone is your friend, \ sibling, or colleague — use subject_entity_id for the other person and set object_value to \ \"{name}\" (or use store_fact with the other person as subject). When storing facts about \ {name} directly, use {id} as the subject_entity_id.", name = owner_name, id = id ) }); let fewshot_block = Self::render_fewshot_examples(&fewshot_examples); let system_content = Self::build_system_content( custom_system_prompt.as_deref(), owner_id_note.as_deref(), &fewshot_block, max_iterations, ); // 9. Build user message let gps_info = exif .as_ref() .and_then(|e| { if let (Some(lat), Some(lon)) = (e.gps_latitude, e.gps_longitude) { Some(format!("GPS: {:.4}, {:.4}", lat, lon)) } else { None } }) .unwrap_or_else(|| "GPS: unknown".to_string()); let tags_info = if tag_names.is_empty() { "Tags: none".to_string() } else { format!("Tags: {}", tag_names.join(", ")) }; let contact_info = contact .as_ref() .map(|c| format!("Contact/Person: {}", c)) .unwrap_or_else(|| "Contact/Person: unknown".to_string()); let visual_block = inlined_visual_description .as_deref() .map(|d| format!("Visual description (from local vision model):\n{}\n\n", d)) .unwrap_or_default(); // Context-only payload — no output-shape prescription. The persona / // custom_system_prompt owns voice, length, and structure. The "title // and summary" claim that used to live here was unused (the title is // regenerated post-hoc from the summary by generate_photo_title). let user_content = format!( "{visual_block}Photo file path: {file_path}\n\ Date taken: {date}\n\ {contact_info}\n\ {gps_info}\n\ {tags_info}\n\n\ Gather context with the available tools, then respond.", date = date_taken.format("%B %d, %Y"), ); // 10. Define tools. describe_photo offered only when the chat model // sees images directly (images_inline); in hybrid mode the visual // description is already inlined as text. Persona-aware so the // persona's allow_agent_corrections gate opens here exactly like // it does for chat turns (insight_chat does the same). let gate_opts = self.current_gate_opts_for_persona(backend.images_inline, Some((user_id, &persona_id))); let tools = Self::build_tool_definitions(gate_opts); // 11. Build initial messages. images_inline → attach base64 to the // user message; describe-then-inline → text was already injected. let system_msg = ChatMessage::system(system_content); let mut user_msg = ChatMessage::user(user_content); if backend.images_inline && let Some(ref img) = image_base64 { user_msg.images = Some(vec![img.clone()]); } let mut messages = vec![system_msg, user_msg]; let loop_span = tracer.start_with_context("ai.agentic.loop", &insight_cx); let loop_cx = insight_cx.with_span(loop_span); let mut final_content = String::new(); let mut iterations_used = 0usize; let mut last_prompt_eval_count: Option<i32> = None; let mut last_eval_count: Option<i32> = None; for iteration in 0..max_iterations { iterations_used = iteration + 1; log::info!("Agentic iteration {}/{}", iteration + 1, max_iterations); let (response, prompt_tokens, eval_tokens) = backend .chat() .chat_with_tools(messages.clone(), tools.clone()) .await?; last_prompt_eval_count = prompt_tokens; last_eval_count = eval_tokens; // Sanitize tool call arguments before pushing back into history. // Some models occasionally return non-object arguments (bool, string, null) // which Ollama rejects when they are re-sent in a subsequent request. let mut response = response; if let Some(ref mut tool_calls) = response.tool_calls { for tc in tool_calls.iter_mut() { if !tc.function.arguments.is_object() { log::warn!( "Tool '{}' returned non-object arguments ({:?}), normalising to {{}}", tc.function.name, tc.function.arguments ); tc.function.arguments = serde_json::Value::Object(Default::default()); } } } messages.push(response.clone()); if let Some(ref tool_calls) = response.tool_calls && !tool_calls.is_empty() { for tool_call in tool_calls { log::info!( "Agentic tool call [{}]: {} {}", iteration, tool_call.function.name, tool_call.function.arguments ); let result = self .execute_tool( &tool_call.function.name, &tool_call.function.arguments, &backend, &image_base64, &file_path, user_id, &persona_id, &loop_cx, ) .await; messages.push(ChatMessage::tool_result(result)); } continue; } // No tool calls — this is the final answer final_content = response.content; break; } // If loop exhausted without final answer, ask for one if final_content.is_empty() { log::info!( "Agentic loop exhausted after {} iterations, requesting final answer", iterations_used ); messages.push(ChatMessage::user(format!( "Based on the context gathered, please write the final photo insight. Start with \"Title: <short title>\" on the first line (max 8 words), then a blank line, then the detailed personal summary. Write in first person as {}.", user_display_name() ))); let (final_response, prompt_tokens, eval_tokens) = backend .chat() .chat_with_tools(messages.clone(), vec![]) .await?; last_prompt_eval_count = prompt_tokens; last_eval_count = eval_tokens; final_content = final_response.content.clone(); messages.push(final_response); } loop_cx .span() .set_attribute(KeyValue::new("iterations_used", iterations_used as i64)); loop_cx.span().set_status(Status::Ok); // 13. Strip any leaked <think>…</think> reasoning block (thinking // models emit it ahead of the answer; the raw transcript in // training_messages keeps it), then parse the title. final_content = crate::ai::llm_client::strip_think_blocks(&final_content); let (title, body) = parse_title_body(&final_content); final_content = body; log::info!("Agentic parsed title: {}", title); let summary_preview: String = final_content.chars().take(200).collect(); log::info!( "Agentic generated summary ({} chars): {}", final_content.len(), summary_preview ); // 14. Serialize the full message history for training data let training_messages = match serde_json::to_string(&messages) { Ok(json) => Some(json), Err(e) => { log::warn!("Failed to serialize training messages: {}", e); None } }; // 15. Store insight (returns the persisted row including its new id) let model_version = backend.model().to_string(); let fewshot_source_ids_json = if fewshot_source_ids.is_empty() { None } else { Some(serde_json::to_string(&fewshot_source_ids).unwrap_or_else(|_| "[]".to_string())) }; let insight = InsertPhotoInsight { library_id: crate::libraries::PRIMARY_LIBRARY_ID, file_path: file_path.to_string(), title, summary: final_content, generated_at: Utc::now().timestamp(), model_version, is_current: true, training_messages, backend: kind.as_str().to_string(), fewshot_source_ids: fewshot_source_ids_json, content_hash: None, num_ctx, temperature, top_p, top_k, min_p, system_prompt: custom_system_prompt.clone(), persona_id: Some(persona_id.clone()), prompt_eval_count: last_prompt_eval_count, eval_count: last_eval_count, }; let stored = { let mut dao = self.insight_dao.lock().expect("Unable to lock InsightDao"); dao.store_insight(&insight_cx, insight) .map_err(|e| anyhow::anyhow!("Failed to store agentic insight: {:?}", e)) }; match &stored { Ok(_) => { log::info!("Successfully stored agentic insight for {}", file_path); insight_cx.span().set_status(Status::Ok); } Err(e) => { log::error!("Failed to store agentic insight: {:?}", e); insight_cx.span().set_status(Status::error(e.to_string())); } } let stored_insight = stored?; // 16. Backfill source_insight_id on all facts recorded for this photo during the loop { let mut kdao = self .knowledge_dao .lock() .expect("Unable to lock KnowledgeDao"); if let Err(e) = kdao.update_facts_insight_id(&insight_cx, &file_path, stored_insight.id) { log::warn!( "Failed to backfill source_insight_id for {}: {:?}", file_path, e ); } } Ok(( Some(stored_insight.id), last_prompt_eval_count, last_eval_count, )) } /// A read-only agentic tool loop: chat with tools until the model stops /// calling them, then return the final content. /// /// This is the loop body extracted from /// `generate_agentic_insight_for_photo` (lines 4316-4377) so it can be /// reused by the reel-scripter without the photo-specific context /// (image_base64, file_path, persona_id). The photo insight loop still /// has its own copy because it threads image/file context through /// `execute_tool`. /// /// Calls `execute_tool` with empty file/image context; enabled tools /// never read those fields. #[allow(dead_code)] pub(crate) async fn run_readonly_tool_loop( &self, backend: &ResolvedBackend, mut messages: Vec<ChatMessage>, tools: Vec<Tool>, max_iter: usize, ) -> Result<String> { let mut final_content = String::new(); for iteration in 0..max_iter { log::info!("Agentic iteration {}/{}", iteration + 1, max_iter); let (response, _prompt_tokens, _eval_tokens) = backend .chat() .chat_with_tools(messages.clone(), tools.clone()) .await?; // Sanitize tool call arguments before pushing back into history. // Some models occasionally return non-object arguments (bool, // string, null) which Ollama rejects when they are re-sent in // a subsequent request. let mut response = response; if let Some(ref mut tool_calls) = response.tool_calls { for tc in tool_calls.iter_mut() { if !tc.function.arguments.is_object() { log::warn!( "Tool '{}' returned non-object arguments ({:?}), normalising to {{}}", tc.function.name, tc.function.arguments ); tc.function.arguments = serde_json::Value::Object(Default::default()); } } } messages.push(response.clone()); if let Some(ref tool_calls) = response.tool_calls && !tool_calls.is_empty() { for tool_call in tool_calls { log::info!( "Agentic tool call [{}]: {} {}", iteration, tool_call.function.name, tool_call.function.arguments ); let result = self .execute_tool( &tool_call.function.name, &tool_call.function.arguments, backend, &None, "", 0, "", &opentelemetry::Context::new(), ) .await; messages.push(ChatMessage::tool_result(result)); } continue; } // No tool calls — this is the final answer final_content = response.content; break; } // If loop exhausted without final answer, ask for one if final_content.is_empty() { log::info!( "Agentic loop exhausted after {} iterations, requesting final answer", max_iter ); messages.push(ChatMessage::user( "Based on the context gathered, please write the final answer. Return ONLY the JSON object, no prose or code fences.", )); let (final_response, _, _) = backend .chat() .chat_with_tools(messages.clone(), vec![]) .await?; final_content = final_response.content.clone(); messages.push(final_response); } Ok(final_content) } /// Reverse geocode GPS coordinates to human-readable place names async fn reverse_geocode(&self, lat: f64, lon: f64) -> Option<String> { let url = format!( "https://nominatim.openstreetmap.org/reverse?format=json&lat={}&lon={}", lat, lon ); log::debug!("Reverse geocoding {}, {} via Nominatim", lat, lon); let client = reqwest::Client::new(); let response = match client .get(&url) .header("User-Agent", "ImageAPI/1.0") // Nominatim requires User-Agent .send() .await { Ok(resp) => resp, Err(e) => { log::warn!("Geocoding network error for {}, {}: {}", lat, lon, e); return None; } }; if !response.status().is_success() { log::warn!( "Geocoding HTTP error for {}, {}: {}", lat, lon, response.status() ); return None; } let data: NominatimResponse = match response.json().await { Ok(d) => d, Err(e) => { log::warn!("Geocoding JSON parse error for {}, {}: {}", lat, lon, e); return None; } }; // Try to build a concise location name if let Some(addr) = data.address { let mut parts = Vec::new(); // Prefer city/town/village if let Some(city) = addr.city.or(addr.town).or(addr.village) { parts.push(city); } // Add state if available if let Some(state) = addr.state { parts.push(state); } if !parts.is_empty() { log::info!("Reverse geocoded {}, {} -> {}", lat, lon, parts.join(", ")); return Some(parts.join(", ")); } } // Fallback to display_name if structured address not available if let Some(ref display_name) = data.display_name { log::info!( "Reverse geocoded {}, {} -> {} (display_name)", lat, lon, display_name ); } else { log::warn!("Geocoding returned no address data for {}, {}", lat, lon); } data.display_name } } #[cfg(test)] mod tests { use super::*; use crate::ai::ollama::{ToolCall, ToolCallFunction}; #[test] fn build_tool_definitions_drops_gated_tools() { let opts = ToolGateOpts { has_vision: false, apollo_enabled: false, daily_summaries_present: false, calendar_present: false, location_history_present: false, faces_present: false, allow_agent_corrections: false, }; let tools = InsightGenerator::build_tool_definitions(opts); let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect(); // Always-on tools survive. assert!(names.contains(&"search_messages")); assert!(names.contains(&"get_sms_messages")); assert!(names.contains(&"get_file_tags")); assert!(names.contains(&"reverse_geocode")); assert!(names.contains(&"get_current_datetime")); assert!(names.contains(&"recall_entities")); assert!(names.contains(&"recall_facts_for_photo")); assert!(names.contains(&"recall_facts_for_entity")); assert!(names.contains(&"store_entity")); assert!(names.contains(&"store_fact")); // Gated tools are absent. assert!(!names.contains(&"describe_photo")); assert!(!names.contains(&"get_personal_place_at")); assert!(!names.contains(&"search_rag")); assert!(!names.contains(&"get_calendar_events")); assert!(!names.contains(&"get_location_history")); assert!(!names.contains(&"get_faces_in_photo")); // Agent-correction tools are absent without the gate. assert!(!names.contains(&"update_fact")); assert!(!names.contains(&"supersede_fact")); } #[test] fn build_tool_definitions_includes_gated_tools_when_present() { let opts = ToolGateOpts { has_vision: true, apollo_enabled: true, daily_summaries_present: true, calendar_present: true, location_history_present: true, faces_present: true, allow_agent_corrections: true, }; let tools = InsightGenerator::build_tool_definitions(opts); let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect(); assert!(names.contains(&"describe_photo")); assert!(names.contains(&"get_personal_place_at")); assert!(names.contains(&"search_rag")); assert!(names.contains(&"get_calendar_events")); assert!(names.contains(&"get_location_history")); assert!(names.contains(&"get_faces_in_photo")); assert!(names.contains(&"update_fact")); assert!(names.contains(&"supersede_fact")); } fn place(name: &str, description: &str) -> ApolloPlace { ApolloPlace { id: 1, name: name.to_string(), description: description.to_string(), lat: 0.0, lon: 0.0, radius_m: 200, category: None, } } #[test] fn compose_location_with_apollo_and_nominatim_and_description() { let s = compose_location_string( Some(place("Home", "House in Cambridge")), Some("Cambridge, MA".to_string()), 42.36, -71.06, ); assert_eq!(s, "Home (House in Cambridge) — near Cambridge, MA"); } #[test] fn compose_location_apollo_without_description_drops_parenthetical() { let s = compose_location_string( Some(place("Home", "")), Some("Cambridge, MA".to_string()), 42.36, -71.06, ); assert_eq!(s, "Home — near Cambridge, MA"); } #[test] fn compose_location_nominatim_only() { let s = compose_location_string(None, Some("Cambridge, MA".to_string()), 42.36, -71.06); assert_eq!(s, "Cambridge, MA"); } #[test] fn compose_location_apollo_only_no_description() { let s = compose_location_string(Some(place("Home", "")), None, 42.36, -71.06); assert_eq!(s, "Home"); } #[test] fn compose_location_falls_back_to_coordinates() { let s = compose_location_string(None, None, 42.3601, -71.0589); assert_eq!(s, "42.3601, -71.0589"); } #[test] fn combine_contexts_includes_tags_section_when_tags_present() { let result = InsightGenerator::combine_contexts( None, None, None, None, Some("vacation, hiking, mountains".to_string()), ); assert!(result.contains("## Tags"), "Should include Tags section"); assert!( result.contains("vacation, hiking, mountains"), "Should include tag names" ); } #[test] fn combine_contexts_omits_tags_section_when_no_tags() { let result = InsightGenerator::combine_contexts( Some("some messages".to_string()), None, None, None, None, // no tags ); assert!( !result.contains("## Tags"), "Should not include Tags section when None" ); assert!( result.contains("## Messages"), "Should still include Messages" ); } #[test] fn combine_contexts_returns_no_context_message_when_all_none() { let result = InsightGenerator::combine_contexts(None, None, None, None, None); assert_eq!(result, "No additional context available"); } // These tests assert the shape of the strings returned by the tool // implementations above. If a tool's output format changes, update the // tool AND the corresponding arm of `summarize_tool_result` — these // tests exist to make that coupling loud. #[test] fn summarize_errors_uniformly() { assert_eq!( InsightGenerator::summarize_tool_result("search_rag", "Error searching RAG: boom"), "error" ); assert_eq!( InsightGenerator::summarize_tool_result( "get_sms_messages", "Error fetching SMS messages: timeout" ), "error" ); } #[test] fn summarize_empty_results_uniformly() { assert_eq!( InsightGenerator::summarize_tool_result("search_rag", "No relevant messages found."), "empty (pivoted)" ); assert_eq!( InsightGenerator::summarize_tool_result("get_sms_messages", "No messages found."), "empty (pivoted)" ); assert_eq!( InsightGenerator::summarize_tool_result( "reverse_geocode", "Could not resolve coordinates to a place name." ), "empty (pivoted)" ); assert_eq!( InsightGenerator::summarize_tool_result( "recall_facts_for_photo", "No knowledge facts found for this photo." ), "empty (pivoted)" ); } #[test] fn summarize_found_count_per_tool() { assert_eq!( InsightGenerator::summarize_tool_result( "get_sms_messages", "Found 7 messages:\n[2023-08-15 10:00] Sarah: hi" ), "7 messages" ); assert_eq!( InsightGenerator::summarize_tool_result( "search_messages", "Found 3 messages (mode: hybrid):\n\n[2023-08-15] Sarah — hi" ), "3 messages" ); assert_eq!( InsightGenerator::summarize_tool_result( "get_calendar_events", "Found 2 calendar events:\n[2023-08-15 10:00] Wedding" ), "2 events" ); assert_eq!( InsightGenerator::summarize_tool_result( "get_location_history", "Found 5 location records:\n[2023-08-15 10:00] 39.0, -120.0" ), "5 location records" ); } #[test] fn found_header_labels_truncation_honestly() { // Truncated: total exceeds what's listed below the header. assert_eq!( InsightGenerator::found_header(312, 60, "messages"), "Found 312 messages (showing first 60):" ); // Not truncated: plain header, no annotation noise. assert_eq!( InsightGenerator::found_header(7, 7, "messages"), "Found 7 messages:" ); assert_eq!( InsightGenerator::found_header(40, 20, "location records"), "Found 40 location records (showing first 20):" ); } #[test] fn summarize_parses_truncated_found_header() { // The "(showing first K)" annotation must not break the few-shot // trajectory summarizer's "Found N" count parsing. assert_eq!( InsightGenerator::summarize_tool_result( "get_sms_messages", "Found 312 messages (showing first 60):\n[2023-08-15 10:00] Sarah: hi" ), "312 messages" ); } fn make_search_hit( id: i64, contact: &str, body: &str, snippet: Option<&str>, type_: i32, ) -> SmsSearchHit { SmsSearchHit { message_id: id, contact_name: contact.to_string(), contact_address: "+15551234567".to_string(), body: body.to_string(), date: 1_700_000_000 + id * 86_400, type_, similarity_score: None, snippet: snippet.map(|s| s.to_string()), } } #[test] fn format_search_hits_falls_back_to_body_when_no_snippet() { // fts5 mode often returns no snippet for messages that didn't need // excerpting (whole body short, or semantic-only hit in hybrid). let hit = make_search_hit(1, "Sarah", "see you at the lake tomorrow", None, 1); let out = InsightGenerator::format_search_hits(&[hit], "fts5", false); assert!(out.starts_with("Found 1 messages (mode: fts5")); assert!(out.contains("see you at the lake tomorrow")); // Received message: contact is the sender. assert!(out.contains("Sarah →")); assert!(!out.contains("date-filtered")); } #[test] fn format_search_hits_labels_sent_direction() { // Sent messages must name the recipient — results can span multiple // conversations, and a sender-only label left them unattributable. let hit = make_search_hit(5, "Sarah", "on my way", None, 2); let out = InsightGenerator::format_search_hits(&[hit], "fts5", false); assert!(out.contains("→ Sarah —")); } #[test] fn format_search_hits_prefers_snippet_over_body_and_strips_marks() { let hit = make_search_hit( 2, "Sarah", "long body text that should be ignored once the server returns a focused snippet", Some("…at the lake tomorrow…"), 1, ); let out = InsightGenerator::format_search_hits(&[hit], "hybrid", true); // Snippet wins over body, tags stripped. assert!(out.contains("at the lake tomorrow")); assert!(!out.contains("")); assert!(!out.contains("")); assert!(!out.contains("long body text")); // Date-filter flag flows through to the header. assert!(out.contains("date-filtered")); } #[test] fn format_search_hits_surfaces_mms_attachment_only_matches() { // Regression guard: pre-snippet, MMS rows that matched via // message_parts_fts (filename / part text) had an empty `body` // and rendered as a blank preview to the LLM. let hit = make_search_hit(3, "Mom", "", Some("birthday_cake.jpg"), 1); let out = InsightGenerator::format_search_hits(&[hit], "fts5", false); assert!(out.contains("birthday_cake.jpg")); assert!(!out.contains("")); assert!(out.contains("Mom →")); } #[test] fn format_search_hits_empty_snippet_falls_back_to_body() { let hit = make_search_hit(4, "Dad", "fallback body", Some(""), 1); let out = InsightGenerator::format_search_hits(&[hit], "fts5", false); assert!(out.contains("fallback body")); } #[test] fn strip_mark_tags_handles_common_patterns() { assert_eq!( InsightGenerator::strip_mark_tags("plain text"), "plain text" ); assert_eq!( InsightGenerator::strip_mark_tags("…the lake…"), "…the lake…" ); // FTS5 highlights every match — multiple marks per snippet are normal. assert_eq!( InsightGenerator::strip_mark_tags("dinner at tahoe"), "dinner at tahoe" ); } #[test] fn summarize_search_rag_counts_hits() { let raw = "[2023-08-15] Sarah: venue confirmed\n\n[2023-08-14] Mom: travel plans\n\n[2023-08-13] Dad: weather"; assert_eq!( InsightGenerator::summarize_tool_result("search_rag", raw), "3 rag hits" ); } #[test] fn summarize_get_file_tags() { assert_eq!( InsightGenerator::summarize_tool_result("get_file_tags", "wedding, tahoe, 2023"), "3 tags" ); } #[test] fn summarize_describe_photo_truncates() { let raw = "A wedding ceremony at Lake Tahoe with about 40 guests seated in rows facing a lakeside arch decorated with white flowers."; let out = InsightGenerator::summarize_tool_result("describe_photo", raw); assert!(out.starts_with("described: \"")); assert!(out.contains("A wedding ceremony at Lake Tahoe")); assert!(out.ends_with("...\"")); } #[test] fn summarize_reverse_geocode_returns_place() { let out = InsightGenerator::summarize_tool_result("reverse_geocode", "South Lake Tahoe, CA, USA"); assert_eq!(out, "place: South Lake Tahoe, CA, USA"); } #[test] fn summarize_recall_entities_counts_lines() { let raw = "Known entities:\n- Sarah (person)\n- Tahoe (place)\n- Wedding 2023 (event)"; assert_eq!( InsightGenerator::summarize_tool_result("recall_entities", raw), "3 entities" ); } #[test] fn summarize_recall_facts_counts_lines() { let raw = "Knowledge for this photo:\n- Sarah: college friend\n- Tahoe: vacation spot"; assert_eq!( InsightGenerator::summarize_tool_result("recall_facts_for_photo", raw), "2 facts" ); } #[test] fn summarize_store_entity_extracts_id() { assert_eq!( InsightGenerator::summarize_tool_result( "store_entity", "Entity stored: ID:42 | person | Sarah | confidence:0.80" ), "stored id=42" ); } #[test] fn summarize_store_fact_extracts_id() { assert_eq!( InsightGenerator::summarize_tool_result( "store_fact", "Stored new fact: ID:17 | confidence:0.60" ), "stored id=17" ); assert_eq!( InsightGenerator::summarize_tool_result( "store_fact", "Corroborated existing fact: ID:17 | confidence:0.85" ), "stored id=17" ); } #[test] fn summarize_current_datetime() { assert_eq!( InsightGenerator::summarize_tool_result( "get_current_datetime", "Current date/time: 2024-01-15 12:00:00 PST (Monday)" ), "time noted" ); } #[test] fn summarize_unknown_tool_falls_back_to_char_count() { let out = InsightGenerator::summarize_tool_result("never_heard_of_it", "some output"); assert_eq!(out, "11 chars"); } #[test] fn build_system_content_uses_custom_prompt_verbatim_for_identity() { let out = InsightGenerator::build_system_content( Some("You are a journal writer in first person, warm and reflective."), None, "", 6, ); assert!( out.starts_with("You are a journal writer in first person, warm and reflective."), "custom prompt must lead the system content; got: {}", out.chars().take(200).collect::<String>(), ); assert!( !out.contains("personal photo memory assistant"), "framework identity must not leak when custom prompt is supplied" ); assert!(out.contains("Tool-use guidance")); assert!(out.contains("budget of 6")); } #[test] fn build_system_content_uses_neutral_default_when_no_custom() { let out = InsightGenerator::build_system_content(None, None, "", 6); assert!(out.contains("reconstructing a memory from a photo")); assert!(!out.contains("personal photo memory assistant")); assert!(out.contains("Tool-use guidance")); } #[test] fn build_system_content_includes_fewshot_and_owner_id() { let owner = "\n\nYour identity in the knowledge store: Alice (entity ID: 7)."; let fewshot = "## Examples\n\n### Example 1\n...\n\n---\n\n"; let out = InsightGenerator::build_system_content(None, Some(owner), fewshot, 6); assert!(out.contains("Alice (entity ID: 7)")); assert!(out.contains("## Examples")); } #[test] fn render_fewshot_empty_returns_empty_string() { assert!(InsightGenerator::render_fewshot_examples(&[]).is_empty()); } #[test] fn render_single_trajectory_walks_tool_calls_in_order() { let arguments = serde_json::json!({ "query": "wedding", "date": "2023-08-15" }); let msgs = vec![ ChatMessage::system("ignored"), ChatMessage::user("Photo file path: /photos/img.jpg\nDate taken: August 15, 2023"), ChatMessage { role: "assistant".to_string(), content: String::new(), tool_calls: Some(vec![ToolCall { function: ToolCallFunction { name: "search_rag".to_string(), arguments, }, id: None, }]), images: None, }, ChatMessage::tool_result("No relevant messages found."), ChatMessage { role: "assistant".to_string(), content: "Final title\n\nFinal body.".to_string(), tool_calls: None, images: None, }, ]; let out = InsightGenerator::render_single_trajectory(&msgs); assert!(out.contains("Input:")); assert!(out.contains("/photos/img.jpg")); assert!(out.contains("1. search_rag(")); assert!(out.contains("query=\"wedding\"")); assert!(out.contains("-> empty (pivoted)")); assert!(out.contains("Final insight: Final title")); } #[test] fn parse_title_body_standard_format() { let (t, b) = parse_title_body("Title: Summer at the Lake\n\nWe spent the afternoon..."); assert_eq!(t, "Summer at the Lake"); assert_eq!(b, "We spent the afternoon..."); } #[test] fn parse_title_body_single_newline() { let (t, b) = parse_title_body("Title: Morning Walk\nThe sun was rising..."); assert_eq!(t, "Morning Walk"); assert_eq!(b, "The sun was rising..."); } #[test] fn parse_title_body_lowercase_prefix() { let (t, b) = parse_title_body("title: Garden Party\n\nEveryone gathered..."); assert_eq!(t, "Garden Party"); assert_eq!(b, "Everyone gathered..."); } #[test] fn parse_title_body_strips_bold_wrapper() { let (t, b) = parse_title_body("**Title: A Day in the Woods**\n\nWe hiked the ridge trail."); assert_eq!(t, "A Day in the Woods"); assert_eq!(b, "We hiked the ridge trail."); } #[test] fn parse_title_body_strips_bold_label_only() { // Bold around just the label: "**Title:** X" let (t, b) = parse_title_body("**Title:** Garden Party\n\nEveryone gathered..."); assert_eq!(t, "Garden Party"); assert_eq!(b, "Everyone gathered..."); } #[test] fn parse_title_body_strips_heading_hashes() { let (t, b) = parse_title_body("## Title: Morning Walk\nThe sun was rising..."); assert_eq!(t, "Morning Walk"); assert_eq!(b, "The sun was rising..."); } #[test] fn parse_title_body_fallback_first_sentence() { let (t, b) = parse_title_body("A warm summer day. We gathered at the park for a picnic."); assert_eq!(t, "A warm summer day"); assert_eq!(b, "We gathered at the park for a picnic."); } #[test] fn parse_title_body_fallback_truncate() { let input = "A single long paragraph with no periods or title prefix that just keeps going on and on"; let (t, b) = parse_title_body(input); assert!(t.len() <= 60); assert_eq!(b, input); } /// Regression: hybrid mode was leaking the OpenRouter model override /// into the local llamacpp client, causing describe_image to send /// e.g. "google/gemini-3-flash-preview" to llama-swap (which 400s). #[test] fn resolve_backend_hybrid_does_not_leak_model_to_local_llamacpp() { use crate::ai::llamacpp::LlamaCppClient; let mut base = LlamaCppClient::new(Some("http://localhost:9292/v1".into()), Some("chat".into())); base.set_vision_model("vision".into()); base.set_embedding_model("embed".into()); let openrouter_model = "google/gemini-3-flash-preview"; let overrides_model: Option<String> = Some(openrouter_model.into()); let is_hybrid = true; // Replicate the resolve_backend local-client construction // (lines ~3686-3695 of this file). let mut lc = base.clone(); if !is_hybrid && let Some(ref m) = overrides_model { lc.primary_model = m.clone(); lc.set_vision_model(m.clone()); } // In hybrid mode the local client must keep its configured slots. assert_eq!( lc.vision_model, "vision", "hybrid mode must not override vision_model with OpenRouter model" ); assert_eq!( lc.primary_model, "chat", "hybrid mode must not override primary_model with OpenRouter model" ); assert_eq!(lc.embedding_model, "embed"); } }