diff --git a/src/face_watch.rs b/src/face_watch.rs index 87d0103..272ed8f 100644 --- a/src/face_watch.rs +++ b/src/face_watch.rs @@ -16,10 +16,11 @@ use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError}; use crate::exif; -use crate::faces::{FaceDao, InsertFaceDetectionInput}; +use crate::faces::{self, FaceDao, InsertFaceDetectionInput}; use crate::file_types; use crate::libraries::Library; use crate::memories::PathExcluder; +use crate::tags::TagDao; use log::{debug, info, warn}; use std::path::Path; use std::sync::{Arc, Mutex}; @@ -41,6 +42,7 @@ pub fn run_face_detection_pass( excluded_dirs: &[String], face_client: &FaceClient, face_dao: Arc>>, + tag_dao: Arc>>, candidates: Vec, ) { if !face_client.is_enabled() { @@ -94,13 +96,22 @@ pub fn run_face_detection_pass( let permit_sem = sem.clone(); let face_client = face_client.clone(); let face_dao = face_dao.clone(); + let tag_dao = tag_dao.clone(); let library_root = library_root.clone(); handles.push(tokio::spawn(async move { // acquire_owned would let us drop the permit explicitly // before await points; for a one-shot call into Apollo // the simpler bounded acquire is enough. let _permit = permit_sem.acquire().await.expect("face semaphore"); - process_one(library_id, &library_root, cand, &face_client, face_dao).await; + process_one( + library_id, + &library_root, + cand, + &face_client, + face_dao, + tag_dao, + ) + .await; })); } for h in handles { @@ -117,6 +128,7 @@ async fn process_one( cand: FaceCandidate, face_client: &FaceClient, face_dao: Arc>>, + tag_dao: Arc>>, ) { let abs = Path::new(library_root).join(&cand.rel_path); // Read the bytes off disk in a blocking-friendly task. Filesystem IO @@ -148,60 +160,85 @@ async fn process_one( match face_client.detect(bytes, meta).await { Ok(resp) => { - // Hold the dao lock only across the synchronous DB writes. - let mut dao = face_dao.lock().expect("face dao"); - if resp.faces.is_empty() { - if let Err(e) = dao.mark_status( - &ctx, - library_id, - &cand.content_hash, - &cand.rel_path, - "no_faces", - &resp.model_version, - ) { - warn!( - "face_watch: mark no_faces failed for {}: {:?}", - cand.rel_path, e - ); - } - debug!( - "face_watch: {} → no faces (model {})", - cand.rel_path, resp.model_version - ); - } else { - let face_count = resp.faces.len(); - for face in &resp.faces { - let emb = match face.decode_embedding() { - Ok(b) => b, - Err(e) => { - warn!("face_watch: bad embedding for {}: {:?}", cand.rel_path, e); - continue; - } - }; - if let Err(e) = dao.store_detection( + // Stage 1: persist detections, holding the dao lock only + // across synchronous DB writes. + let mut stored_for_autobind: Vec<(i32, Vec)> = Vec::new(); + { + let mut dao = face_dao.lock().expect("face dao"); + if resp.faces.is_empty() { + if let Err(e) = dao.mark_status( &ctx, - InsertFaceDetectionInput { - library_id, - content_hash: cand.content_hash.clone(), - rel_path: cand.rel_path.clone(), - bbox: Some((face.bbox.x, face.bbox.y, face.bbox.w, face.bbox.h)), - embedding: Some(emb), - confidence: Some(face.confidence), - source: "auto".to_string(), - person_id: None, - status: "detected".to_string(), - model_version: resp.model_version.clone(), - }, + library_id, + &cand.content_hash, + &cand.rel_path, + "no_faces", + &resp.model_version, ) { warn!( - "face_watch: store_detection failed for {}: {:?}", + "face_watch: mark no_faces failed for {}: {:?}", cand.rel_path, e ); } + debug!( + "face_watch: {} → no faces (model {})", + cand.rel_path, resp.model_version + ); + } else { + let face_count = resp.faces.len(); + for face in &resp.faces { + let emb = match face.decode_embedding() { + Ok(b) => b, + Err(e) => { + warn!("face_watch: bad embedding for {}: {:?}", cand.rel_path, e); + continue; + } + }; + // Decode the f32 vector once for auto-bind comparison. + let emb_floats = faces::decode_embedding_bytes(&emb); + match dao.store_detection( + &ctx, + InsertFaceDetectionInput { + library_id, + content_hash: cand.content_hash.clone(), + rel_path: cand.rel_path.clone(), + bbox: Some((face.bbox.x, face.bbox.y, face.bbox.w, face.bbox.h)), + embedding: Some(emb), + confidence: Some(face.confidence), + source: "auto".to_string(), + person_id: None, + status: "detected".to_string(), + model_version: resp.model_version.clone(), + }, + ) { + Ok(row) => { + if let Some(floats) = emb_floats { + stored_for_autobind.push((row.id, floats)); + } + } + Err(e) => warn!( + "face_watch: store_detection failed for {}: {:?}", + cand.rel_path, e + ), + } + } + info!( + "face_watch: {} → {} face(s) ({}ms, {})", + cand.rel_path, face_count, resp.duration_ms, resp.model_version + ); } - info!( - "face_watch: {} → {} face(s) ({}ms, {})", - cand.rel_path, face_count, resp.duration_ms, resp.model_version + } + + // Stage 2: auto-bind newly-stored faces against same-named + // people-tags. Done outside the dao lock so the lookups don't + // serialize with concurrent detect tasks. + if !stored_for_autobind.is_empty() { + try_auto_bind( + &ctx, + &cand.rel_path, + &resp.model_version, + stored_for_autobind, + &tag_dao, + &face_dao, ); } } @@ -243,6 +280,137 @@ async fn process_one( } } +/// Auto-bind newly-detected faces to a same-named person, when a tag on the +/// photo unambiguously identifies one. Driven by `FACE_AUTOBIND_MIN_COS` +/// (default 0.4): the new face's embedding must reach this cosine +/// similarity against the L2-normalized mean of the person's existing +/// faces. The first face for a person binds unconditionally — there's +/// nothing to compare against, and the alternative ("never bind without +/// a reference") would mean bootstrap never kicks off. +/// +/// Multi-match (the photo carries tags for two different known persons) +/// is intentionally a no-op — we can't tell which face is which without +/// additional matching. Those faces stay unassigned for the cluster +/// suggester (Phase 6) to handle. +fn try_auto_bind( + ctx: &opentelemetry::Context, + rel_path: &str, + model_version: &str, + new_faces: Vec<(i32, Vec)>, // (face_id, decoded embedding) + tag_dao: &Arc>>, + face_dao: &Arc>>, +) { + // 1. Pull the photo's tags. + let tag_names: Vec = { + let mut td = tag_dao.lock().expect("tag dao"); + match td.get_tags_for_path(ctx, rel_path) { + Ok(tags) => tags.into_iter().map(|t| t.name).collect(), + Err(e) => { + warn!( + "face_watch: get_tags_for_path failed for {}: {:?}", + rel_path, e + ); + return; + } + } + }; + if tag_names.is_empty() { + return; + } + + // 2. Find tags that map to existing persons (case-insensitive). + let person_for_tag: std::collections::HashMap = { + let mut fd = face_dao.lock().expect("face dao"); + match fd.find_persons_by_names_ci(ctx, &tag_names) { + Ok(m) => m, + Err(e) => { + warn!( + "face_watch: find_persons_by_names_ci failed for {}: {:?}", + rel_path, e + ); + return; + } + } + }; + + // 3. Multi-match: ambiguous, skip. Single match: candidate person. + let unique_person_ids: std::collections::HashSet = + person_for_tag.values().copied().collect(); + if unique_person_ids.len() != 1 { + if !unique_person_ids.is_empty() { + debug!( + "face_watch: {} carries tags for {} different persons; skipping auto-bind", + rel_path, + unique_person_ids.len() + ); + } + return; + } + let person_id = *unique_person_ids.iter().next().expect("nonempty set"); + + let threshold: f32 = std::env::var("FACE_AUTOBIND_MIN_COS") + .ok() + .and_then(|s| s.parse().ok()) + .filter(|t: &f32| *t >= 0.0 && *t <= 1.0) + .unwrap_or(0.4); + + // 4. Reference embedding (if any) under the same model_version. + let reference: Option> = { + let mut fd = face_dao.lock().expect("face dao"); + match fd.person_reference_embedding(ctx, person_id, model_version) { + Ok(r) => r, + Err(e) => { + warn!( + "face_watch: person_reference_embedding failed for person {}: {:?}", + person_id, e + ); + return; + } + } + }; + + // 5. Bind each new face that meets the criterion. Hold the lock once + // for the whole batch; assign_face_to_person uses its own short + // transaction internally. + let mut fd = face_dao.lock().expect("face dao"); + for (face_id, emb) in new_faces { + let bind = match &reference { + None => { + // Person has no faces yet — first one wins so bootstrap + // can ever produce a usable reference. After this row + // commits, future faces evaluate against it. + debug!( + "face_watch: auto-binding first face {} → person {} (no reference yet)", + face_id, person_id + ); + true + } + Some(ref_vec) => { + let sim = faces::cosine_similarity(&emb, ref_vec); + if sim >= threshold { + debug!( + "face_watch: auto-binding face {} → person {} (cos={:.3} ≥ {:.3})", + face_id, person_id, sim, threshold + ); + true + } else { + debug!( + "face_watch: leaving face {} unassigned (cos={:.3} < {:.3} for person {})", + face_id, sim, threshold, person_id + ); + false + } + } + }; + if bind && let Err(e) = fd.assign_face_to_person(ctx, face_id, person_id) { + warn!( + "face_watch: assign_face_to_person failed (face={}, person={}): {:?}", + face_id, person_id, e + ); + } + } +} + /// Drop candidates whose path matches the watcher's `EXCLUDED_DIRS` rules. /// Pulled out for unit testing — the same `PathExcluder` /memories uses, /// just applied at the face-detect candidate set instead of the memories diff --git a/src/faces.rs b/src/faces.rs index 8400f76..1ca6eec 100644 --- a/src/faces.rs +++ b/src/faces.rs @@ -393,6 +393,41 @@ pub trait FaceDao: Send + Sync { library_id: i32, rel_path: &str, ) -> anyhow::Result>; + + // ── Auto-bind support (Phase 4) ───────────────────────────────────── + + /// Map case-insensitive person names → person id. Used by the + /// auto-bind path to look up "is this tag a known person?". Names + /// passed in are matched LOWER(persons.name); collisions resolve to + /// the person with the lowest id (stable, but the UNIQUE constraint + /// on persons.name COLLATE NOCASE prevents collisions in practice). + fn find_persons_by_names_ci( + &mut self, + ctx: &opentelemetry::Context, + names: &[String], + ) -> anyhow::Result>; + + /// Mean of a person's existing face embeddings. Returns the L2- + /// normalized 512-d reference vector, or None when the person has + /// no detected faces yet (auto-bind treats that as "first face wins + /// unconditionally"). Filters by the same model_version that produced + /// the candidate embedding so cross-model averaging never happens. + fn person_reference_embedding( + &mut self, + ctx: &opentelemetry::Context, + person_id: i32, + model_version: &str, + ) -> anyhow::Result>>; + + /// Set face_detections.person_id and, when the target person has no + /// cover_face_id yet, set it to this face. One transaction so a + /// half-bound state can't survive a SQLite write error. + fn assign_face_to_person( + &mut self, + ctx: &opentelemetry::Context, + face_id: i32, + person_id: i32, + ) -> anyhow::Result<()>; } /// Free-standing input struct; the DAO copies it into [`InsertFaceDetection`] @@ -1154,6 +1189,184 @@ impl FaceDao for SqliteFaceDao { .with_context(|| "resolve content_hash") }) } + + fn find_persons_by_names_ci( + &mut self, + ctx: &opentelemetry::Context, + names: &[String], + ) -> anyhow::Result> { + if names.is_empty() { + return Ok(std::collections::HashMap::new()); + } + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "find_persons_by_names_ci", |span| { + span.set_attribute(KeyValue::new("count", names.len() as i64)); + // Lowercase comparison both sides. Use sql_query to keep the + // bind list dynamic without fighting Diesel's type system on + // the LOWER() function. + use diesel::sql_types::*; + let placeholders = std::iter::repeat_n("?", names.len()) + .collect::>() + .join(","); + let sql = format!( + "SELECT id, LOWER(name) AS lower_name FROM persons \ + WHERE LOWER(name) IN ({}) ORDER BY id ASC", + placeholders + ); + #[derive(QueryableByName)] + struct Row { + #[diesel(sql_type = Integer)] + id: i32, + #[diesel(sql_type = Text)] + lower_name: String, + } + let mut q = diesel::sql_query(sql).into_boxed(); + for n in names { + q = q.bind::(n.to_lowercase()); + } + let rows = q + .load::(conn.deref_mut()) + .with_context(|| "find_persons_by_names_ci")?; + // Lowest id wins on collision (UNIQUE COLLATE NOCASE on the + // table prevents that today, but the deduplication is a + // defensive belt-and-braces). + let mut out = std::collections::HashMap::with_capacity(rows.len()); + for r in rows { + out.entry(r.lower_name).or_insert(r.id); + } + Ok(out) + }) + } + + fn person_reference_embedding( + &mut self, + ctx: &opentelemetry::Context, + person_id: i32, + model_version: &str, + ) -> anyhow::Result>> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "person_reference_embedding", |span| { + span.set_attribute(KeyValue::new("person_id", person_id as i64)); + span.set_attribute(KeyValue::new("model_version", model_version.to_string())); + // Pull only the embedding bytes; we average them in Rust. A + // SQL aggregate over 512-d vectors isn't meaningfully faster + // and would tie us to a specific embedding length. + let blobs: Vec>> = face_detections::table + .filter(face_detections::person_id.eq(person_id)) + .filter(face_detections::status.eq("detected")) + .filter(face_detections::model_version.eq(model_version)) + .select(face_detections::embedding) + .load(conn.deref_mut()) + .with_context(|| "load person embeddings")?; + let vectors: Vec> = blobs + .into_iter() + .filter_map(|b| b.and_then(|bytes| decode_embedding_bytes(&bytes))) + .collect(); + if vectors.is_empty() { + return Ok(None); + } + Ok(Some(mean_normalized(&vectors))) + }) + } + + fn assign_face_to_person( + &mut self, + ctx: &opentelemetry::Context, + face_id: i32, + person_id: i32, + ) -> anyhow::Result<()> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "update", "assign_face_to_person", |span| { + span.set_attribute(KeyValue::new("face_id", face_id as i64)); + span.set_attribute(KeyValue::new("person_id", person_id as i64)); + conn.deref_mut().transaction::<_, anyhow::Error, _>(|tx| { + diesel::update(face_detections::table.find(face_id)) + .set(face_detections::person_id.eq(person_id)) + .execute(tx) + .with_context(|| "set face person_id")?; + // If this person has no cover yet, claim this face. + // Don't overwrite an existing cover — the user may have + // hand-picked one in the UI. + let cover: Option = persons::table + .find(person_id) + .select(persons::cover_face_id) + .first::>(tx) + .with_context(|| "load person cover")?; + if cover.is_none() { + diesel::update(persons::table.find(person_id)) + .set(persons::cover_face_id.eq(face_id)) + .execute(tx) + .with_context(|| "set cover_face_id")?; + } + Ok(()) + }) + }) + } +} + +// ── Embedding helpers ─────────────────────────────────────────────────────── + +/// Decode a 2048-byte little-endian f32 BLOB into a Vec of length 512. +/// Returns None on malformed input rather than erroring — the caller treats +/// "no usable embedding" the same as "no embedding at all" (skip averaging). +pub(crate) fn decode_embedding_bytes(bytes: &[u8]) -> Option> { + if bytes.len() != 2048 { + return None; + } + let mut out = Vec::with_capacity(512); + for chunk in bytes.chunks_exact(4) { + out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])); + } + Some(out) +} + +/// Mean of L2-normalized vectors, then re-normalize. ArcFace embeddings +/// from insightface are already L2-normalized, so re-normalizing the +/// average is a one-step "average direction" operation. +fn mean_normalized(vectors: &[Vec]) -> Vec { + debug_assert!( + !vectors.is_empty(), + "mean_normalized requires non-empty input" + ); + let dim = vectors[0].len(); + let mut acc = vec![0.0f32; dim]; + for v in vectors { + debug_assert_eq!(v.len(), dim, "mismatched embedding dim"); + for (i, x) in v.iter().enumerate() { + acc[i] += *x; + } + } + let n = vectors.len() as f32; + for x in &mut acc { + *x /= n; + } + let norm = acc.iter().map(|x| x * x).sum::().sqrt(); + if norm > 0.0 { + for x in &mut acc { + *x /= norm; + } + } + acc +} + +/// Cosine similarity of two embeddings. Both must be the same length; +/// neither needs to be pre-normalized. Returns 0.0 on length mismatch +/// or zero-magnitude input rather than NaN — the auto-bind path +/// interprets 0.0 as "no useful similarity, leave unassigned". +pub(crate) fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { + if a.len() != b.len() || a.is_empty() { + return 0.0; + } + let mut dot = 0.0f32; + let mut na = 0.0f32; + let mut nb = 0.0f32; + for (x, y) in a.iter().zip(b.iter()) { + dot += x * y; + na += x * x; + nb += y * y; + } + let denom = na.sqrt() * nb.sqrt(); + if denom <= 0.0 { 0.0 } else { dot / denom } } // ── Handlers ──────────────────────────────────────────────────────────────── @@ -1179,6 +1392,14 @@ where .route(web::get().to(list_persons_handler::)) .route(web::post().to(create_person_handler::)), ) + .service( + web::resource("/persons/bootstrap") + .route(web::post().to(bootstrap_persons_handler::)), + ) + .service( + web::resource("/tags/people-bootstrap-candidates") + .route(web::get().to(bootstrap_candidates_handler::)), + ) .service( web::resource("/persons/{id}") .route(web::get().to(get_person_handler::)) @@ -1193,6 +1414,292 @@ where ) } +// ── Bootstrap (Phase 4) ───────────────────────────────────────────────────── + +#[derive(Serialize, Debug, Clone)] +pub struct BootstrapCandidate { + /// Display name — most-frequent capitalization across the case-insensitive + /// group, or simply the first one seen if it's a tie. + pub name: String, + /// Lowercased name; the stable key for grouping and the auto-bind path. + pub normalized_name: String, + /// Sum of `tagged_photo` counts across all capitalizations of this name. + pub usage_count: i64, + /// Heuristic suggestion; the UI defaults this to checked but the user + /// confirms before [`bootstrap_persons_handler`] actually creates rows. + pub looks_like_person: bool, + /// True when a `persons` row already exists for this name (any case). + /// The UI hides these — re-running bootstrap is idempotent so it's fine + /// either way, but the noise isn't worth showing. + pub already_exists: bool, +} + +#[derive(Serialize, Debug)] +pub struct BootstrapCandidatesResponse { + pub candidates: Vec, +} + +#[derive(Deserialize, Debug)] +pub struct BootstrapPersonsReq { + pub names: Vec, +} + +#[derive(Serialize, Debug)] +pub struct BootstrapPersonsResponse { + pub created: Vec, + pub skipped: Vec, +} + +#[derive(Serialize, Debug)] +pub struct BootstrapSkipped { + pub name: String, + pub reason: String, +} + +/// Conservative "this tag *might* be a person name" heuristic. False +/// negatives are fine — the operator confirms in the UI before any row +/// is created. False positives are also fine for the same reason; the +/// goal is just to default sensible candidates to checked. +/// +/// Rules: +/// - 1–2 whitespace-separated words +/// - Each word starts with an uppercase character +/// - No digits anywhere (rejects "Trip 2018", "2024", etc.) +/// - Single-word names not on a small denylist of common non-person +/// tags (cat, christmas, beach, ...). Two-word names skip the +/// denylist because a real two-word person name is the dominant +/// case ("Sarah Smith") and false-blocking it is worse than false- +/// accepting "Sunset Walk". +pub(crate) fn looks_like_person(raw: &str) -> bool { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return false; + } + let words: Vec<&str> = trimmed.split_whitespace().collect(); + if !(1..=2).contains(&words.len()) { + return false; + } + for w in &words { + let Some(first) = w.chars().next() else { + return false; + }; + if !first.is_uppercase() { + return false; + } + if w.chars().any(|c| c.is_ascii_digit()) { + return false; + } + } + if words.len() == 1 { + const DENY: &[&str] = &[ + // Pets / animals + "cat", + "dog", + "kitten", + "puppy", + "bird", + "fish", + "pet", + "pets", + // Events / occasions + "birthday", + "christmas", + "halloween", + "easter", + "thanksgiving", + "wedding", + "anniversary", + "vacation", + "holiday", + "party", + "trip", + "graduation", + "concert", + // Places (generic) + "home", + "work", + "beach", + "park", + "hotel", + "restaurant", + "office", + "house", + "garden", + // Subjects / styles + "food", + "sunset", + "sunrise", + "landscape", + "portrait", + "selfie", + "nature", + "flowers", + "flower", + "snow", + "rain", + "sky", + // Buckets + "untagged", + "favorites", + "favourites", + "misc", + "other", + "random", + ]; + let lower = trimmed.to_lowercase(); + if DENY.iter().any(|w| *w == lower) { + return false; + } + } + true +} + +async fn bootstrap_candidates_handler( + _: Claims, + request: HttpRequest, + face_dao: web::Data>, + tag_dao: web::Data>, +) -> impl Responder { + use std::collections::HashMap; + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("faces.bootstrap_candidates", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + // All tags + their counts. Path filter unused — bootstrap is library-wide. + let tags_with_counts = { + let mut td = tag_dao.lock().expect("tag dao lock"); + match crate::tags::TagDao::get_all_tags(&mut *td, &span_context, None) { + Ok(t) => t, + Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)), + } + }; + + // Group by lowercase name. Pick the most-frequent capitalization for + // the display name (ties broken by first-seen). + struct Group { + display: String, + display_freq: i64, + total_count: i64, + } + let mut groups: HashMap = HashMap::new(); + for (count, tag) in tags_with_counts { + let lower = tag.name.to_lowercase(); + let g = groups.entry(lower).or_insert_with(|| Group { + display: tag.name.clone(), + display_freq: 0, + total_count: 0, + }); + g.total_count += count; + if count > g.display_freq { + g.display = tag.name.clone(); + g.display_freq = count; + } + } + + // Cross-reference against existing persons (bulk one-query lookup). + let lower_names: Vec = groups.keys().cloned().collect(); + let existing = { + let mut fd = face_dao.lock().expect("face dao lock"); + match fd.find_persons_by_names_ci(&span_context, &lower_names) { + Ok(m) => m, + Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)), + } + }; + + let mut candidates: Vec = groups + .into_iter() + .map(|(lower, g)| BootstrapCandidate { + looks_like_person: looks_like_person(&g.display), + already_exists: existing.contains_key(&lower), + name: g.display, + normalized_name: lower, + usage_count: g.total_count, + }) + .collect(); + // Sort: persons-first heuristic by descending count, then alphabetical. + // Persons-likely candidates surface near the top so the user doesn't + // scroll past dozens of "vacation"-style tags to find them. + candidates.sort_by(|a, b| { + b.looks_like_person + .cmp(&a.looks_like_person) + .then(b.usage_count.cmp(&a.usage_count)) + .then(a.normalized_name.cmp(&b.normalized_name)) + }); + + HttpResponse::Ok().json(BootstrapCandidatesResponse { candidates }) +} + +async fn bootstrap_persons_handler( + _: Claims, + request: HttpRequest, + body: web::Json, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("faces.bootstrap_persons", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + let mut created: Vec = Vec::new(); + let mut skipped: Vec = Vec::new(); + + let mut dao = face_dao.lock().expect("face dao lock"); + + // Pre-fetch the existing-name set so a duplicate request reports + // "already exists" (skipped) rather than firing N inserts that all + // 409 against the UNIQUE COLLATE NOCASE constraint. + let lower_names: Vec = body.names.iter().map(|n| n.to_lowercase()).collect(); + let existing = match dao.find_persons_by_names_ci(&span_context, &lower_names) { + Ok(m) => m, + Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)), + }; + + for name in &body.names { + let trimmed = name.trim(); + if trimmed.is_empty() { + skipped.push(BootstrapSkipped { + name: name.clone(), + reason: "empty name".into(), + }); + continue; + } + let lower = trimmed.to_lowercase(); + if existing.contains_key(&lower) { + skipped.push(BootstrapSkipped { + name: trimmed.to_string(), + reason: "person already exists".into(), + }); + continue; + } + match dao.create_person( + &span_context, + &CreatePersonReq { + name: trimmed.to_string(), + notes: None, + entity_id: None, + }, + /*from_tag*/ true, + ) { + Ok(p) => created.push(p), + Err(e) => { + if is_unique_violation(&e) { + // Race with a concurrent create; treat as skipped. + skipped.push(BootstrapSkipped { + name: trimmed.to_string(), + reason: "person already exists".into(), + }); + } else { + skipped.push(BootstrapSkipped { + name: trimmed.to_string(), + reason: format!("{:#}", e), + }); + } + } + } + } + + HttpResponse::Ok().json(BootstrapPersonsResponse { created, skipped }) +} + // ── Stats / list ──────────────────────────────────────────────────────────── #[derive(Deserialize)] @@ -1773,6 +2280,269 @@ mod tests { ); } + // ── Phase 4: bootstrap heuristic + cosine + DAO support ───────────── + + #[test] + fn looks_like_person_accepts_typical_names() { + assert!(looks_like_person("Cameron")); + assert!(looks_like_person("Sarah Smith")); + assert!(looks_like_person("Mary Jane")); + // Non-ASCII title-cased single word still counts. + assert!(looks_like_person("Renée")); + } + + #[test] + fn looks_like_person_rejects_obvious_non_people() { + // Digits, lowercase, three-or-more words, denylist hits. + assert!(!looks_like_person("2018")); + assert!(!looks_like_person("Trip 2018")); + assert!(!looks_like_person("trip")); + assert!(!looks_like_person("Birthday Party Cake")); + assert!(!looks_like_person("cat")); + assert!(!looks_like_person("Cat")); // denied even when title-cased + assert!(!looks_like_person("Christmas")); + assert!(!looks_like_person("home")); + assert!(!looks_like_person("")); + assert!(!looks_like_person(" ")); + } + + #[test] + fn looks_like_person_two_words_skips_denylist() { + // Two-word names get a pass on the single-word denylist — + // "Sunset Walk" is much more likely a real album than a person, + // but false-accepting is fine because the operator confirms. + // What matters is we don't false-reject "Sarah Smith". + assert!(looks_like_person("Sunset Walk")); + assert!(looks_like_person("Sarah Smith")); + } + + #[test] + fn cosine_similarity_known_vectors() { + // Identical vectors → 1.0; orthogonal → 0.0; opposite → -1.0. + let a = vec![1.0, 0.0, 0.0]; + let b = vec![1.0, 0.0, 0.0]; + let c = vec![0.0, 1.0, 0.0]; + let d = vec![-1.0, 0.0, 0.0]; + assert!((cosine_similarity(&a, &b) - 1.0).abs() < 1e-6); + assert!(cosine_similarity(&a, &c).abs() < 1e-6); + assert!((cosine_similarity(&a, &d) - (-1.0)).abs() < 1e-6); + // Mismatched length → 0.0 (defensive, not NaN). + assert_eq!(cosine_similarity(&a, &[1.0, 0.0]), 0.0); + // Empty input → 0.0. + assert_eq!(cosine_similarity(&[], &[]), 0.0); + // Zero vector → 0.0 (denominator guard, not NaN). + let zero = vec![0.0, 0.0, 0.0]; + assert_eq!(cosine_similarity(&a, &zero), 0.0); + } + + #[test] + fn decode_embedding_bytes_round_trip() { + // 512×f32 LE = 2048 bytes. Anything else returns None. + let v: Vec = (0..512).map(|i| i as f32 * 0.001).collect(); + let mut bytes = Vec::with_capacity(2048); + for f in &v { + bytes.extend_from_slice(&f.to_le_bytes()); + } + let decoded = decode_embedding_bytes(&bytes).expect("decode"); + assert_eq!(decoded.len(), 512); + for (a, b) in v.iter().zip(decoded.iter()) { + assert!((a - b).abs() < 1e-9); + } + assert_eq!(decode_embedding_bytes(&[0u8; 100]), None); + assert_eq!(decode_embedding_bytes(&[0u8; 4096]), None); + } + + #[test] + fn find_persons_by_names_ci_groups_case() { + let mut dao = fresh_dao(); + let _ = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Alice".into(), + notes: None, + entity_id: None, + }, + false, + ) + .unwrap(); + let _ = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Bob".into(), + notes: None, + entity_id: None, + }, + false, + ) + .unwrap(); + + // Mix of cases + a name that has no person row. + let m = dao + .find_persons_by_names_ci(&ctx(), &["alice".into(), "BOB".into(), "charlie".into()]) + .expect("lookup"); + assert!(m.contains_key("alice")); + assert!(m.contains_key("bob")); + assert!(!m.contains_key("charlie")); + // Empty input is a no-op (don't fire a SQL with zero binds). + assert!( + dao.find_persons_by_names_ci(&ctx(), &[]) + .unwrap() + .is_empty() + ); + } + + #[test] + fn person_reference_embedding_filters_by_model_version() { + // A person with embeddings from buffalo_l shouldn't have its + // reference contaminated by a future buffalo_xl row. The auto- + // bind path passes the candidate's model_version so old rows + // never reach the average. + let mut dao = fresh_dao(); + diesel::sql_query( + "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \ + VALUES (1, 'main', '/tmp', 0)", + ) + .execute(dao.connection.lock().unwrap().deref_mut()) + .expect("seed libraries"); + let p = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Subject".into(), + notes: None, + entity_id: None, + }, + false, + ) + .unwrap(); + + // 512-d unit vector along axis 0, written for buffalo_l. + let mut emb_l: Vec = vec![0.0; 512]; + emb_l[0] = 1.0; + let mut emb_l_bytes = Vec::with_capacity(2048); + for f in &emb_l { + emb_l_bytes.extend_from_slice(&f.to_le_bytes()); + } + // 512-d unit vector along axis 1, written for some-other model. + let mut emb_xl: Vec = vec![0.0; 512]; + emb_xl[1] = 1.0; + let mut emb_xl_bytes = Vec::with_capacity(2048); + for f in &emb_xl { + emb_xl_bytes.extend_from_slice(&f.to_le_bytes()); + } + + for (bytes, mv) in [(emb_l_bytes, "buffalo_l"), (emb_xl_bytes, "buffalo_xl")] { + let _ = dao + .store_detection( + &ctx(), + InsertFaceDetectionInput { + library_id: 1, + content_hash: format!("h-{mv}"), + rel_path: format!("p-{mv}.jpg"), + bbox: Some((0.1, 0.1, 0.2, 0.2)), + embedding: Some(bytes), + confidence: Some(0.9), + source: "auto".into(), + person_id: Some(p.id), + status: "detected".into(), + model_version: mv.into(), + }, + ) + .unwrap(); + } + + let ref_l = dao + .person_reference_embedding(&ctx(), p.id, "buffalo_l") + .unwrap() + .expect("buffalo_l ref"); + // Reference for buffalo_l should match emb_l (axis-0 unit). + assert!((ref_l[0] - 1.0).abs() < 1e-5, "axis 0 should be ~1.0"); + assert!(ref_l[1].abs() < 1e-5, "axis 1 should be ~0.0"); + + // Unknown model_version → None, not a cross-version average. + assert!( + dao.person_reference_embedding(&ctx(), p.id, "buffalo_xxxl") + .unwrap() + .is_none() + ); + } + + #[test] + fn assign_face_to_person_sets_cover_when_unset() { + let mut dao = fresh_dao(); + diesel::sql_query( + "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \ + VALUES (1, 'main', '/tmp', 0)", + ) + .execute(dao.connection.lock().unwrap().deref_mut()) + .expect("seed libraries"); + let p = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Cover".into(), + notes: None, + entity_id: None, + }, + false, + ) + .unwrap(); + assert!(p.cover_face_id.is_none()); + + // Insert two faces unbound. + let face1 = dao + .store_detection( + &ctx(), + InsertFaceDetectionInput { + library_id: 1, + content_hash: "h1".into(), + rel_path: "p1.jpg".into(), + bbox: Some((0.1, 0.1, 0.2, 0.2)), + embedding: Some(vec![0u8; 2048]), + confidence: Some(0.9), + source: "auto".into(), + person_id: None, + status: "detected".into(), + model_version: "buffalo_l".into(), + }, + ) + .unwrap(); + let face2 = dao + .store_detection( + &ctx(), + InsertFaceDetectionInput { + library_id: 1, + content_hash: "h2".into(), + rel_path: "p2.jpg".into(), + bbox: Some((0.1, 0.1, 0.2, 0.2)), + embedding: Some(vec![0u8; 2048]), + confidence: Some(0.9), + source: "auto".into(), + person_id: None, + status: "detected".into(), + model_version: "buffalo_l".into(), + }, + ) + .unwrap(); + + // First assignment claims the cover. + dao.assign_face_to_person(&ctx(), face1.id, p.id).unwrap(); + let p_after_first = dao.get_person(&ctx(), p.id).unwrap().unwrap(); + assert_eq!(p_after_first.cover_face_id, Some(face1.id)); + + // Second assignment must NOT overwrite — operator may have + // hand-picked the cover after the first auto-bind. + dao.assign_face_to_person(&ctx(), face2.id, p.id).unwrap(); + let p_after_second = dao.get_person(&ctx(), p.id).unwrap().unwrap(); + assert_eq!( + p_after_second.cover_face_id, + Some(face1.id), + "cover must remain face1 after second auto-bind" + ); + } + #[test] fn person_crud_roundtrip() { let mut dao = fresh_dao(); diff --git a/src/main.rs b/src/main.rs index 4e628d0..708ccf7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1827,6 +1827,12 @@ fn watch_files( let face_dao = Arc::new(Mutex::new( Box::new(faces::SqliteFaceDao::new()) as Box )); + // tag_dao for the watcher's auto-bind path. Independent of the + // request-handler tag_dao instance — both end up pointing at the + // same SQLite file via SqliteTagDao::default(). + let watcher_tag_dao = Arc::new(Mutex::new( + Box::new(SqliteTagDao::default()) as Box + )); let mut last_quick_scan = SystemTime::now(); let mut last_full_scan = SystemTime::now(); @@ -1853,6 +1859,7 @@ fn watch_files( Arc::clone(&exif_dao), Arc::clone(&preview_dao), Arc::clone(&face_dao), + Arc::clone(&watcher_tag_dao), face_client.clone(), &excluded_dirs, None, @@ -1873,6 +1880,7 @@ fn watch_files( Arc::clone(&exif_dao), Arc::clone(&preview_dao), Arc::clone(&face_dao), + Arc::clone(&watcher_tag_dao), face_client.clone(), &excluded_dirs, Some(check_since), @@ -1922,6 +1930,7 @@ fn process_new_files( exif_dao: Arc>>, preview_dao: Arc>>, face_dao: Arc>>, + tag_dao: Arc>>, face_client: crate::ai::face_client::FaceClient, excluded_dirs: &[String], modified_since: Option, @@ -2112,6 +2121,7 @@ fn process_new_files( excluded_dirs, &face_client, Arc::clone(&face_dao), + Arc::clone(&tag_dao), candidates, ); }