insight-chat: add get_faces_in_photo agentic tool
The LLM had no path to see face_detections data — get_file_tags returns user-applied tags, but a face that's been detected and bound to a person via the embedding-cluster auto-bind path doesn't always have a matching tag. The new tool joins face_detections with persons by content_hash and returns bound names + bboxes, plus unidentified faces (so smaller models can count people in the photo without inferring from a visual description). Gated on face_detections being non-empty via the same has_any_* pattern as daily_summaries. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -83,6 +83,9 @@ pub struct InsightGenerator {
|
||||
search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>>,
|
||||
tag_dao: Arc<Mutex<Box<dyn TagDao>>>,
|
||||
|
||||
// Face detections (used by the get_faces_in_photo agentic tool)
|
||||
face_dao: Arc<Mutex<Box<dyn crate::faces::FaceDao>>>,
|
||||
|
||||
// Knowledge memory
|
||||
knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>,
|
||||
|
||||
@@ -100,6 +103,7 @@ pub struct ToolGateOpts {
|
||||
pub daily_summaries_present: bool,
|
||||
pub calendar_present: bool,
|
||||
pub location_history_present: bool,
|
||||
pub faces_present: bool,
|
||||
}
|
||||
|
||||
impl InsightGenerator {
|
||||
@@ -116,6 +120,7 @@ impl InsightGenerator {
|
||||
search_dao: Arc<Mutex<Box<dyn SearchHistoryDao>>>,
|
||||
tag_dao: Arc<Mutex<Box<dyn TagDao>>>,
|
||||
knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>>,
|
||||
face_dao: Arc<Mutex<Box<dyn crate::faces::FaceDao>>>,
|
||||
libraries: Vec<Library>,
|
||||
) -> Self {
|
||||
Self {
|
||||
@@ -131,6 +136,7 @@ impl InsightGenerator {
|
||||
search_dao,
|
||||
tag_dao,
|
||||
knowledge_dao,
|
||||
face_dao,
|
||||
libraries,
|
||||
}
|
||||
}
|
||||
@@ -174,12 +180,20 @@ impl InsightGenerator {
|
||||
.expect("Unable to lock DailySummaryDao");
|
||||
dao.has_any_summaries(&cx).unwrap_or(false)
|
||||
};
|
||||
let faces_present = {
|
||||
let mut dao = self
|
||||
.face_dao
|
||||
.lock()
|
||||
.expect("Unable to lock FaceDao");
|
||||
dao.has_any_faces(&cx).unwrap_or(false)
|
||||
};
|
||||
ToolGateOpts {
|
||||
has_vision,
|
||||
apollo_enabled: self.apollo_enabled(),
|
||||
daily_summaries_present,
|
||||
calendar_present,
|
||||
location_history_present,
|
||||
faces_present,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1529,6 +1543,7 @@ Return ONLY the summary, nothing else."#,
|
||||
"get_calendar_events" => self.tool_get_calendar_events(arguments, cx).await,
|
||||
"get_location_history" => self.tool_get_location_history(arguments, cx).await,
|
||||
"get_file_tags" => self.tool_get_file_tags(arguments, cx).await,
|
||||
"get_faces_in_photo" => self.tool_get_faces_in_photo(arguments, cx).await,
|
||||
"describe_photo" => self.tool_describe_photo(ollama, image_base64).await,
|
||||
"reverse_geocode" => self.tool_reverse_geocode(arguments).await,
|
||||
"get_personal_place_at" => self.tool_get_personal_place_at(arguments).await,
|
||||
@@ -2149,6 +2164,82 @@ Return ONLY the summary, nothing else."#,
|
||||
}
|
||||
}
|
||||
|
||||
/// Tool: get_faces_in_photo — list face detections + person names for
|
||||
/// the given file path. Resolves rel_path → content_hash via FaceDao,
|
||||
/// then queries face_detections joined with persons (status='detected'
|
||||
/// only). Returns a compact bullet list keyed for human-LLM readability.
|
||||
async fn tool_get_faces_in_photo(
|
||||
&self,
|
||||
args: &serde_json::Value,
|
||||
cx: &opentelemetry::Context,
|
||||
) -> String {
|
||||
let file_path = match args.get("file_path").and_then(|v| v.as_str()) {
|
||||
Some(p) if !p.trim().is_empty() => p.trim().to_string(),
|
||||
_ => return "Error: missing required parameter 'file_path'".to_string(),
|
||||
};
|
||||
log::info!("tool_get_faces_in_photo: file_path='{}'", file_path);
|
||||
|
||||
// Resolve content_hash from any library that has this rel_path.
|
||||
// Walk libraries in their declared order and take the first hit.
|
||||
let mut content_hash: Option<String> = None;
|
||||
for lib in &self.libraries {
|
||||
let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao");
|
||||
if let Ok(Some(h)) = dao.resolve_content_hash(cx, lib.id, &file_path) {
|
||||
content_hash = Some(h);
|
||||
break;
|
||||
}
|
||||
}
|
||||
let Some(content_hash) = content_hash else {
|
||||
return "No content_hash found for that file path (the photo may not be indexed yet, \
|
||||
or the path doesn't match any library)."
|
||||
.to_string();
|
||||
};
|
||||
|
||||
let faces = {
|
||||
let mut dao = self.face_dao.lock().expect("Unable to lock FaceDao");
|
||||
match dao.list_for_content_hash(cx, &content_hash) {
|
||||
Ok(rows) => rows,
|
||||
Err(e) => return format!("Error querying faces: {}", e),
|
||||
}
|
||||
};
|
||||
|
||||
if faces.is_empty() {
|
||||
return "No faces detected in this photo.".to_string();
|
||||
}
|
||||
|
||||
// Render: bound faces grouped by person first, then unbound. The
|
||||
// model uses the bound names directly; the unbound count + bbox
|
||||
// helps it count people without naming them.
|
||||
let bound: Vec<&_> = faces.iter().filter(|f| f.person_name.is_some()).collect();
|
||||
let unbound: Vec<&_> = faces.iter().filter(|f| f.person_name.is_none()).collect();
|
||||
|
||||
let mut out = format!("Found {} face(s) in this photo:\n", faces.len());
|
||||
for f in &bound {
|
||||
out.push_str(&format!(
|
||||
"- {} (confidence {:.2}, bbox x={:.2} y={:.2} w={:.2} h={:.2}, source: {})\n",
|
||||
f.person_name.as_deref().unwrap_or("?"),
|
||||
f.confidence,
|
||||
f.bbox_x,
|
||||
f.bbox_y,
|
||||
f.bbox_w,
|
||||
f.bbox_h,
|
||||
f.source,
|
||||
));
|
||||
}
|
||||
for f in &unbound {
|
||||
out.push_str(&format!(
|
||||
"- (unidentified) confidence {:.2}, bbox x={:.2} y={:.2} w={:.2} h={:.2}, source: {}\n",
|
||||
f.confidence,
|
||||
f.bbox_x,
|
||||
f.bbox_y,
|
||||
f.bbox_w,
|
||||
f.bbox_h,
|
||||
f.source,
|
||||
));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Tool: describe_photo — generate a visual description of the photo
|
||||
async fn tool_describe_photo(
|
||||
&self,
|
||||
@@ -2733,6 +2824,25 @@ Return ONLY the summary, nothing else."#,
|
||||
));
|
||||
}
|
||||
|
||||
if opts.faces_present {
|
||||
tools.push(Tool::function(
|
||||
"get_faces_in_photo",
|
||||
"Return the faces detected in this photo with their bounding boxes and assigned person names \
|
||||
(when bound). Each face carries `person_name` (string or null), `bbox` ({x, y, w, h} normalized 0–1), \
|
||||
`confidence` (0–1), and `source` ('auto' from detector or 'manual' from a user-drawn bbox). \
|
||||
More authoritative than `get_file_tags` for counting people in a photo or naming who is present, \
|
||||
since it returns detected-but-unbound faces too. \
|
||||
Example: `{file_path: \"2019/06/IMG_4242.jpg\"}`.",
|
||||
serde_json::json!({
|
||||
"type": "object",
|
||||
"required": ["file_path"],
|
||||
"properties": {
|
||||
"file_path": { "type": "string", "description": "File path of the photo." }
|
||||
}
|
||||
}),
|
||||
));
|
||||
}
|
||||
|
||||
tools.push(Tool::function(
|
||||
"recall_entities",
|
||||
"Search the persistent knowledge memory for previously learned people, places, events, or things. \
|
||||
@@ -3748,6 +3858,7 @@ mod tests {
|
||||
daily_summaries_present: false,
|
||||
calendar_present: false,
|
||||
location_history_present: false,
|
||||
faces_present: false,
|
||||
};
|
||||
let tools = InsightGenerator::build_tool_definitions(opts);
|
||||
let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect();
|
||||
@@ -3769,6 +3880,7 @@ mod tests {
|
||||
assert!(!names.contains(&"search_rag"));
|
||||
assert!(!names.contains(&"get_calendar_events"));
|
||||
assert!(!names.contains(&"get_location_history"));
|
||||
assert!(!names.contains(&"get_faces_in_photo"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -3779,6 +3891,7 @@ mod tests {
|
||||
daily_summaries_present: true,
|
||||
calendar_present: true,
|
||||
location_history_present: true,
|
||||
faces_present: true,
|
||||
};
|
||||
let tools = InsightGenerator::build_tool_definitions(opts);
|
||||
let names: Vec<&str> = tools.iter().map(|t| t.function.name.as_str()).collect();
|
||||
@@ -3787,6 +3900,7 @@ mod tests {
|
||||
assert!(names.contains(&"search_rag"));
|
||||
assert!(names.contains(&"get_calendar_events"));
|
||||
assert!(names.contains(&"get_location_history"));
|
||||
assert!(names.contains(&"get_faces_in_photo"));
|
||||
}
|
||||
|
||||
fn place(name: &str, description: &str) -> ApolloPlace {
|
||||
|
||||
@@ -14,6 +14,7 @@ use image_api::database::{
|
||||
SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao,
|
||||
connect,
|
||||
};
|
||||
use image_api::faces::{FaceDao, SqliteFaceDao};
|
||||
use image_api::file_types::{IMAGE_EXTENSIONS, VIDEO_EXTENSIONS};
|
||||
use image_api::libraries::{self, Library};
|
||||
use image_api::tags::{SqliteTagDao, TagDao};
|
||||
@@ -182,6 +183,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
|
||||
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
|
||||
let face_dao: Arc<Mutex<Box<dyn FaceDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteFaceDao::new())));
|
||||
|
||||
// Pass the full library set so `resolve_full_path` probes every root,
|
||||
// even when --library restricts the walk. A rel_path shared across
|
||||
@@ -199,6 +202,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
search_dao,
|
||||
tag_dao,
|
||||
knowledge_dao,
|
||||
face_dao,
|
||||
all_libs.clone(),
|
||||
);
|
||||
|
||||
|
||||
18
src/faces.rs
18
src/faces.rs
@@ -503,6 +503,10 @@ pub trait FaceDao: Send + Sync {
|
||||
into: i32,
|
||||
) -> anyhow::Result<Person>;
|
||||
|
||||
/// Cheap presence probe — returns true iff at least one face has been
|
||||
/// detected (excluding marker rows). Used by chat-tool gating.
|
||||
fn has_any_faces(&mut self, ctx: &opentelemetry::Context) -> anyhow::Result<bool>;
|
||||
|
||||
/// Resolve `(library_id, rel_path)` → `content_hash` via image_exif.
|
||||
/// Returns None when the photo hasn't been EXIF-indexed yet (no row
|
||||
/// in image_exif) or when the row exists but content_hash is NULL.
|
||||
@@ -1432,6 +1436,20 @@ impl FaceDao for SqliteFaceDao {
|
||||
})
|
||||
}
|
||||
|
||||
fn has_any_faces(&mut self, ctx: &opentelemetry::Context) -> anyhow::Result<bool> {
|
||||
use anyhow::Context;
|
||||
let mut conn = self.connection.lock().expect("face dao lock");
|
||||
trace_db_call(ctx, "query", "has_any_faces", |_span| {
|
||||
face_detections::table
|
||||
.filter(face_detections::status.eq("detected"))
|
||||
.select(face_detections::id)
|
||||
.first::<i32>(conn.deref_mut())
|
||||
.optional()
|
||||
.map(|x| x.is_some())
|
||||
.with_context(|| "has_any_faces query")
|
||||
})
|
||||
}
|
||||
|
||||
fn resolve_content_hash(
|
||||
&mut self,
|
||||
ctx: &opentelemetry::Context,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::ai::apollo_client::ApolloClient;
|
||||
use crate::ai::face_client::FaceClient;
|
||||
use crate::faces;
|
||||
use crate::ai::insight_chat::{ChatLockMap, InsightChatService};
|
||||
use crate::ai::openrouter::OpenRouterClient;
|
||||
use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
|
||||
@@ -206,6 +207,8 @@ impl Default for AppState {
|
||||
Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
|
||||
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
|
||||
let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new())));
|
||||
|
||||
// Load base path and ensure the primary library row reflects it.
|
||||
let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
|
||||
@@ -232,6 +235,7 @@ impl Default for AppState {
|
||||
search_dao.clone(),
|
||||
tag_dao.clone(),
|
||||
knowledge_dao,
|
||||
face_dao.clone(),
|
||||
libraries_vec.clone(),
|
||||
);
|
||||
|
||||
@@ -348,6 +352,8 @@ impl AppState {
|
||||
Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
|
||||
let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
|
||||
let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> =
|
||||
Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new())));
|
||||
|
||||
// Initialize test InsightGenerator with all data sources
|
||||
let base_path_str = base_path.to_string_lossy().to_string();
|
||||
@@ -371,6 +377,7 @@ impl AppState {
|
||||
search_dao.clone(),
|
||||
tag_dao.clone(),
|
||||
knowledge_dao,
|
||||
face_dao.clone(),
|
||||
vec![test_lib],
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user