diff --git a/src/ai/insight_generator.rs b/src/ai/insight_generator.rs index ebba8be..632b504 100644 --- a/src/ai/insight_generator.rs +++ b/src/ai/insight_generator.rs @@ -83,6 +83,9 @@ pub struct InsightGenerator { search_dao: Arc>>, tag_dao: Arc>>, + // Face detections (used by the get_faces_in_photo agentic tool) + face_dao: Arc>>, + // Knowledge memory knowledge_dao: Arc>>, @@ -100,6 +103,7 @@ pub struct ToolGateOpts { pub daily_summaries_present: bool, pub calendar_present: bool, pub location_history_present: bool, + pub faces_present: bool, } impl InsightGenerator { @@ -116,6 +120,7 @@ impl InsightGenerator { search_dao: Arc>>, tag_dao: Arc>>, knowledge_dao: Arc>>, + face_dao: Arc>>, libraries: Vec, ) -> Self { Self { @@ -131,6 +136,7 @@ impl InsightGenerator { search_dao, tag_dao, knowledge_dao, + face_dao, libraries, } } @@ -174,12 +180,20 @@ impl InsightGenerator { .expect("Unable to lock DailySummaryDao"); dao.has_any_summaries(&cx).unwrap_or(false) }; + let faces_present = { + let mut dao = self + .face_dao + .lock() + .expect("Unable to lock FaceDao"); + dao.has_any_faces(&cx).unwrap_or(false) + }; ToolGateOpts { has_vision, apollo_enabled: self.apollo_enabled(), daily_summaries_present, calendar_present, location_history_present, + faces_present, } } @@ -1529,6 +1543,7 @@ Return ONLY the summary, nothing else."#, "get_calendar_events" => self.tool_get_calendar_events(arguments, cx).await, "get_location_history" => self.tool_get_location_history(arguments, cx).await, "get_file_tags" => self.tool_get_file_tags(arguments, cx).await, + "get_faces_in_photo" => self.tool_get_faces_in_photo(arguments, cx).await, "describe_photo" => self.tool_describe_photo(ollama, image_base64).await, "reverse_geocode" => self.tool_reverse_geocode(arguments).await, "get_personal_place_at" => self.tool_get_personal_place_at(arguments).await, @@ -2149,6 +2164,82 @@ Return ONLY the summary, nothing else."#, } } + /// Tool: get_faces_in_photo — list face detections + person names for + /// the given file path. Resolves rel_path → content_hash via FaceDao, + /// then queries face_detections joined with persons (status='detected' + /// only). Returns a compact bullet list keyed for human-LLM readability. + async fn tool_get_faces_in_photo( + &self, + args: &serde_json::Value, + cx: &opentelemetry::Context, + ) -> String { + let file_path = match args.get("file_path").and_then(|v| v.as_str()) { + Some(p) if !p.trim().is_empty() => p.trim().to_string(), + _ => return "Error: missing required parameter 'file_path'".to_string(), + }; + log::info!("tool_get_faces_in_photo: file_path='{}'", file_path); + + // Resolve content_hash from any library that has this rel_path. + // Walk libraries in their declared order and take the first hit. + let mut content_hash: Option = None; + for lib in &self.libraries { + let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao"); + if let Ok(Some(h)) = dao.resolve_content_hash(cx, lib.id, &file_path) { + content_hash = Some(h); + break; + } + } + let Some(content_hash) = content_hash else { + return "No content_hash found for that file path (the photo may not be indexed yet, \ + or the path doesn't match any library)." + .to_string(); + }; + + let faces = { + let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao"); + match dao.list_for_content_hash(cx, &content_hash) { + Ok(rows) => rows, + Err(e) => return format!("Error querying faces: {}", e), + } + }; + + if faces.is_empty() { + return "No faces detected in this photo.".to_string(); + } + + // Render: bound faces grouped by person first, then unbound. The + // model uses the bound names directly; the unbound count + bbox + // helps it count people without naming them. + let bound: Vec<&_> = faces.iter().filter(|f| f.person_name.is_some()).collect(); + let unbound: Vec<&_> = faces.iter().filter(|f| f.person_name.is_none()).collect(); + + let mut out = format!("Found {} face(s) in this photo:\n", faces.len()); + for f in &bound { + out.push_str(&format!( + "- {} (confidence {:.2}, bbox x={:.2} y={:.2} w={:.2} h={:.2}, source: {})\n", + f.person_name.as_deref().unwrap_or("?"), + f.confidence, + f.bbox_x, + f.bbox_y, + f.bbox_w, + f.bbox_h, + f.source, + )); + } + for f in &unbound { + out.push_str(&format!( + "- (unidentified) confidence {:.2}, bbox x={:.2} y={:.2} w={:.2} h={:.2}, source: {}\n", + f.confidence, + f.bbox_x, + f.bbox_y, + f.bbox_w, + f.bbox_h, + f.source, + )); + } + out + } + /// Tool: describe_photo — generate a visual description of the photo async fn tool_describe_photo( &self, @@ -2733,6 +2824,25 @@ Return ONLY the summary, nothing else."#, )); } + if opts.faces_present { + tools.push(Tool::function( + "get_faces_in_photo", + "Return the faces detected in this photo with their bounding boxes and assigned person names \ + (when bound). Each face carries `person_name` (string or null), `bbox` ({x, y, w, h} normalized 0–1), \ + `confidence` (0–1), and `source` ('auto' from detector or 'manual' from a user-drawn bbox). \ + More authoritative than `get_file_tags` for counting people in a photo or naming who is present, \ + since it returns detected-but-unbound faces too. \ + Example: `{file_path: \"2019/06/IMG_4242.jpg\"}`.", + serde_json::json!({ + "type": "object", + "required": ["file_path"], + "properties": { + "file_path": { "type": "string", "description": "File path of the photo." } + } + }), + )); + } + tools.push(Tool::function( "recall_entities", "Search the persistent knowledge memory for previously learned people, places, events, or things. \ @@ -3748,6 +3858,7 @@ mod tests { daily_summaries_present: false, calendar_present: false, location_history_present: false, + faces_present: false, }; let tools = InsightGenerator::build_tool_definitions(opts); let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect(); @@ -3769,6 +3880,7 @@ mod tests { assert!(!names.contains(&"search_rag")); assert!(!names.contains(&"get_calendar_events")); assert!(!names.contains(&"get_location_history")); + assert!(!names.contains(&"get_faces_in_photo")); } #[test] @@ -3779,6 +3891,7 @@ mod tests { daily_summaries_present: true, calendar_present: true, location_history_present: true, + faces_present: true, }; let tools = InsightGenerator::build_tool_definitions(opts); let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect(); @@ -3787,6 +3900,7 @@ mod tests { assert!(names.contains(&"search_rag")); assert!(names.contains(&"get_calendar_events")); assert!(names.contains(&"get_location_history")); + assert!(names.contains(&"get_faces_in_photo")); } fn place(name: &str, description: &str) -> ApolloPlace { diff --git a/src/bin/populate_knowledge.rs b/src/bin/populate_knowledge.rs index 9c55e60..e72a27b 100644 --- a/src/bin/populate_knowledge.rs +++ b/src/bin/populate_knowledge.rs @@ -14,6 +14,7 @@ use image_api::database::{ SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao, connect, }; +use image_api::faces::{FaceDao, SqliteFaceDao}; use image_api::file_types::{IMAGE_EXTENSIONS, VIDEO_EXTENSIONS}; use image_api::libraries::{self, Library}; use image_api::tags::{SqliteTagDao, TagDao}; @@ -182,6 +183,8 @@ async fn main() -> anyhow::Result<()> { Arc::new(Mutex::new(Box::new(SqliteTagDao::default()))); let knowledge_dao: Arc>> = Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); + let face_dao: Arc>> = + Arc::new(Mutex::new(Box::new(SqliteFaceDao::new()))); // Pass the full library set so `resolve_full_path` probes every root, // even when --library restricts the walk. A rel_path shared across @@ -199,6 +202,7 @@ async fn main() -> anyhow::Result<()> { search_dao, tag_dao, knowledge_dao, + face_dao, all_libs.clone(), ); diff --git a/src/faces.rs b/src/faces.rs index fb2fb87..d92b7ae 100644 --- a/src/faces.rs +++ b/src/faces.rs @@ -503,6 +503,10 @@ pub trait FaceDao: Send + Sync { into: i32, ) -> anyhow::Result; + /// Cheap presence probe — returns true iff at least one face has been + /// detected (excluding marker rows). Used by chat-tool gating. + fn has_any_faces(&mut self, ctx: &opentelemetry::Context) -> anyhow::Result; + /// Resolve `(library_id, rel_path)` → `content_hash` via image_exif. /// Returns None when the photo hasn't been EXIF-indexed yet (no row /// in image_exif) or when the row exists but content_hash is NULL. @@ -1432,6 +1436,20 @@ impl FaceDao for SqliteFaceDao { }) } + fn has_any_faces(&mut self, ctx: &opentelemetry::Context) -> anyhow::Result { + use anyhow::Context; + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "has_any_faces", |_span| { + face_detections::table + .filter(face_detections::status.eq("detected")) + .select(face_detections::id) + .first::(conn.deref_mut()) + .optional() + .map(|x| x.is_some()) + .with_context(|| "has_any_faces query") + }) + } + fn resolve_content_hash( &mut self, ctx: &opentelemetry::Context, diff --git a/src/state.rs b/src/state.rs index abbcc56..49d0b4c 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,5 +1,6 @@ use crate::ai::apollo_client::ApolloClient; use crate::ai::face_client::FaceClient; +use crate::faces; use crate::ai::insight_chat::{ChatLockMap, InsightChatService}; use crate::ai::openrouter::OpenRouterClient; use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient}; @@ -206,6 +207,8 @@ impl Default for AppState { Arc::new(Mutex::new(Box::new(SqliteTagDao::default()))); let knowledge_dao: Arc>> = Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); + let face_dao: Arc>> = + Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new()))); // Load base path and ensure the primary library row reflects it. let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env"); @@ -232,6 +235,7 @@ impl Default for AppState { search_dao.clone(), tag_dao.clone(), knowledge_dao, + face_dao.clone(), libraries_vec.clone(), ); @@ -348,6 +352,8 @@ impl AppState { Arc::new(Mutex::new(Box::new(SqliteTagDao::default()))); let knowledge_dao: Arc>> = Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new()))); + let face_dao: Arc>> = + Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new()))); // Initialize test InsightGenerator with all data sources let base_path_str = base_path.to_string_lossy().to_string(); @@ -371,6 +377,7 @@ impl AppState { search_dao.clone(), tag_dao.clone(), knowledge_dao, + face_dao.clone(), vec![test_lib], );