insight-chat: add get_faces_in_photo agentic tool

The LLM had no path to see face_detections data — get_file_tags
returns user-applied tags, but a face that's been detected and bound
to a person via the embedding-cluster auto-bind path doesn't always
have a matching tag. The new tool joins face_detections with persons
by content_hash and returns bound names + bboxes, plus unidentified
faces (so smaller models can count people in the photo without
inferring from a visual description).

Gated on face_detections being non-empty via the same has_any_*
pattern as daily_summaries.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-05-07 17:43:16 -04:00
parent 388eb22cd2
commit b64a5bec28
4 changed files with 143 additions and 0 deletions

View File

@@ -83,6 +83,9 @@ pub struct InsightGenerator {
search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>>,
tag_dao: Arc<Mutex<Box<dyn TagDao>>>,
// Face detections (used by the get_faces_in_photo agentic tool)
face_dao: Arc<Mutex<Box<dyn crate::faces::FaceDao>>>,
// Knowledge memory
knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>,
@@ -100,6 +103,7 @@ pub struct ToolGateOpts {
pub daily_summaries_present: bool,
pub calendar_present: bool,
pub location_history_present: bool,
pub faces_present: bool,
}
impl InsightGenerator {
@@ -116,6 +120,7 @@ impl InsightGenerator {
search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>>,
tag_dao: Arc<Mutex<Box<dyn TagDao>>>,
knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>,
face_dao: Arc<Mutex<Box<dyn crate::faces::FaceDao>>>,
libraries: Vec<Library>,
) -> Self {
Self {
@@ -131,6 +136,7 @@ impl InsightGenerator {
search_dao,
tag_dao,
knowledge_dao,
face_dao,
libraries,
}
}
@@ -174,12 +180,20 @@ impl InsightGenerator {
.expect("Unable to lock DailySummaryDao");
dao.has_any_summaries(&cx).unwrap_or(false)
};
let faces_present = {
let mut dao = self
.face_dao
.lock()
.expect("Unable to lock FaceDao");
dao.has_any_faces(&cx).unwrap_or(false)
};
ToolGateOpts {
has_vision,
apollo_enabled: self.apollo_enabled(),
daily_summaries_present,
calendar_present,
location_history_present,
faces_present,
}
}
@@ -1529,6 +1543,7 @@ Return ONLY the summary, nothing else."#,
"get_calendar_events" => self.tool_get_calendar_events(arguments, cx).await,
"get_location_history" => self.tool_get_location_history(arguments, cx).await,
"get_file_tags" => self.tool_get_file_tags(arguments, cx).await,
"get_faces_in_photo" => self.tool_get_faces_in_photo(arguments, cx).await,
"describe_photo" => self.tool_describe_photo(ollama, image_base64).await,
"reverse_geocode" => self.tool_reverse_geocode(arguments).await,
"get_personal_place_at" => self.tool_get_personal_place_at(arguments).await,
@@ -2149,6 +2164,82 @@ Return ONLY the summary, nothing else."#,
}
}
/// Tool: get_faces_in_photo — list face detections + person names for
/// the given file path. Resolves rel_path → content_hash via FaceDao,
/// then queries face_detections joined with persons (status='detected'
/// only). Returns a compact bullet list keyed for human-LLM readability.
async fn tool_get_faces_in_photo(
&self,
args: &serde_json::Value,
cx: &opentelemetry::Context,
) -> String {
let file_path = match args.get("file_path").and_then(|v| v.as_str()) {
Some(p) if !p.trim().is_empty() => p.trim().to_string(),
_ => return "Error: missing required parameter 'file_path'".to_string(),
};
log::info!("tool_get_faces_in_photo: file_path='{}'", file_path);
// Resolve content_hash from any library that has this rel_path.
// Walk libraries in their declared order and take the first hit.
let mut content_hash: Option<String> = None;
for lib in &self.libraries {
let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao");
if let Ok(Some(h)) = dao.resolve_content_hash(cx, lib.id, &file_path) {
content_hash = Some(h);
break;
}
}
let Some(content_hash) = content_hash else {
return "No content_hash found for that file path (the photo may not be indexed yet, \
or the path doesn't match any library)."
.to_string();
};
let faces = {
let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao");
match dao.list_for_content_hash(cx, &content_hash) {
Ok(rows) => rows,
Err(e) => return format!("Error querying faces: {}", e),
}
};
if faces.is_empty() {
return "No faces detected in this photo.".to_string();
}
// Render: bound faces grouped by person first, then unbound. The
// model uses the bound names directly; the unbound count + bbox
// helps it count people without naming them.
let bound: Vec<&_> = faces.iter().filter(|f| f.person_name.is_some()).collect();
let unbound: Vec<&_> = faces.iter().filter(|f| f.person_name.is_none()).collect();
let mut out = format!("Found {} face(s) in this photo:\n", faces.len());
for f in &bound {
out.push_str(&format!(
"- {} (confidence {:.2}, bbox x={:.2} y={:.2} w={:.2} h={:.2}, source: {})\n",
f.person_name.as_deref().unwrap_or("?"),
f.confidence,
f.bbox_x,
f.bbox_y,
f.bbox_w,
f.bbox_h,
f.source,
));
}
for f in &unbound {
out.push_str(&format!(
"- (unidentified) confidence {:.2}, bbox x={:.2} y={:.2} w={:.2} h={:.2}, source: {}\n",
f.confidence,
f.bbox_x,
f.bbox_y,
f.bbox_w,
f.bbox_h,
f.source,
));
}
out
}
/// Tool: describe_photo — generate a visual description of the photo
async fn tool_describe_photo(
&self,
@@ -2733,6 +2824,25 @@ Return ONLY the summary, nothing else."#,
));
}
if opts.faces_present {
tools.push(Tool::function(
"get_faces_in_photo",
"Return the faces detected in this photo with their bounding boxes and assigned person names \
(when bound). Each face carries `person_name` (string or null), `bbox` ({x, y, w, h} normalized 01), \
`confidence` (01), and `source` ('auto' from detector or 'manual' from a user-drawn bbox). \
More authoritative than `get_file_tags` for counting people in a photo or naming who is present, \
since it returns detected-but-unbound faces too. \
Example: `{file_path: \"2019/06/IMG_4242.jpg\"}`.",
serde_json::json!({
"type": "object",
"required": ["file_path"],
"properties": {
"file_path": { "type": "string", "description": "File path of the photo." }
}
}),
));
}
tools.push(Tool::function(
"recall_entities",
"Search the persistent knowledge memory for previously learned people, places, events, or things. \
@@ -3748,6 +3858,7 @@ mod tests {
daily_summaries_present: false,
calendar_present: false,
location_history_present: false,
faces_present: false,
};
let tools = InsightGenerator::build_tool_definitions(opts);
let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect();
@@ -3769,6 +3880,7 @@ mod tests {
assert!(!names.contains(&"search_rag"));
assert!(!names.contains(&"get_calendar_events"));
assert!(!names.contains(&"get_location_history"));
assert!(!names.contains(&"get_faces_in_photo"));
}
#[test]
@@ -3779,6 +3891,7 @@ mod tests {
daily_summaries_present: true,
calendar_present: true,
location_history_present: true,
faces_present: true,
};
let tools = InsightGenerator::build_tool_definitions(opts);
let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect();
@@ -3787,6 +3900,7 @@ mod tests {
assert!(names.contains(&"search_rag"));
assert!(names.contains(&"get_calendar_events"));
assert!(names.contains(&"get_location_history"));
assert!(names.contains(&"get_faces_in_photo"));
}
fn place(name: &str, description: &str) -> ApolloPlace {

View File

@@ -14,6 +14,7 @@ use image_api::database::{
SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao,
connect,
};
use image_api::faces::{FaceDao, SqliteFaceDao};
use image_api::file_types::{IMAGE_EXTENSIONS, VIDEO_EXTENSIONS};
use image_api::libraries::{self, Library};
use image_api::tags::{SqliteTagDao, TagDao};
@@ -182,6 +183,8 @@ async fn main() -> anyhow::Result<()> {
Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
let face_dao: Arc<Mutex<Box<dyn FaceDao>>> =
Arc::new(Mutex::new(Box::new(SqliteFaceDao::new())));
// Pass the full library set so `resolve_full_path` probes every root,
// even when --library restricts the walk. A rel_path shared across
@@ -199,6 +202,7 @@ async fn main() -> anyhow::Result<()> {
search_dao,
tag_dao,
knowledge_dao,
face_dao,
all_libs.clone(),
);

View File

@@ -503,6 +503,10 @@ pub trait FaceDao: Send + Sync {
into: i32,
) -> anyhow::Result<Person>;
/// Cheap presence probe — returns true iff at least one face has been
/// detected (excluding marker rows). Used by chat-tool gating.
fn has_any_faces(&mut self, ctx: &opentelemetry::Context) -> anyhow::Result<bool>;
/// Resolve `(library_id, rel_path)` → `content_hash` via image_exif.
/// Returns None when the photo hasn't been EXIF-indexed yet (no row
/// in image_exif) or when the row exists but content_hash is NULL.
@@ -1432,6 +1436,20 @@ impl FaceDao for SqliteFaceDao {
})
}
fn has_any_faces(&mut self, ctx: &opentelemetry::Context) -> anyhow::Result<bool> {
use anyhow::Context;
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "has_any_faces", |_span| {
face_detections::table
.filter(face_detections::status.eq("detected"))
.select(face_detections::id)
.first::<i32>(conn.deref_mut())
.optional()
.map(|x| x.is_some())
.with_context(|| "has_any_faces query")
})
}
fn resolve_content_hash(
&mut self,
ctx: &opentelemetry::Context,

View File

@@ -1,5 +1,6 @@
use crate::ai::apollo_client::ApolloClient;
use crate::ai::face_client::FaceClient;
use crate::faces;
use crate::ai::insight_chat::{ChatLockMap, InsightChatService};
use crate::ai::openrouter::OpenRouterClient;
use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
@@ -206,6 +207,8 @@ impl Default for AppState {
Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> =
Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new())));
// Load base path and ensure the primary library row reflects it.
let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
@@ -232,6 +235,7 @@ impl Default for AppState {
search_dao.clone(),
tag_dao.clone(),
knowledge_dao,
face_dao.clone(),
libraries_vec.clone(),
);
@@ -348,6 +352,8 @@ impl AppState {
Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> =
Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new())));
// Initialize test InsightGenerator with all data sources
let base_path_str = base_path.to_string_lossy().to_string();
@@ -371,6 +377,7 @@ impl AppState {
search_dao.clone(),
tag_dao.clone(),
knowledge_dao,
face_dao.clone(),
vec![test_lib],
);