faces: add person_id filter to /faces/embeddings; remove tag-bootstrap #89

Merged
cameron merged 1 commits from feature/faces-tab into master 2026-05-10 15:49:19 +00:00

View File

@@ -47,7 +47,7 @@ use std::sync::{Arc, Mutex};
/// Visual identity. The optional `entity_id` bridges this person to an
/// LLM-extracted knowledge-graph entity (textual side). Persons are NOT
/// auto-bridged at creation — only when the user explicitly links them in
/// the management UI, or when bootstrap finds an exact-name match.
/// the management UI.
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct Person {
pub id: i32,
@@ -366,6 +366,10 @@ pub struct EmbeddingsQuery {
pub limit: i64,
#[serde(default)]
pub offset: i64,
/// Restrict to one person's faces. Used by the similar-unassigned
/// suggester to fetch a centroid pool. When set, takes precedence
/// over `unassigned` (the more specific filter wins).
pub person_id: Option<i32>,
}
fn default_unassigned() -> bool {
@@ -429,6 +433,7 @@ pub trait FaceDao: Send + Sync {
ctx: &opentelemetry::Context,
library_id: Option<i32>,
unassigned: bool,
person_id: Option<i32>,
limit: i64,
offset: i64,
) -> anyhow::Result<Vec<(FaceDetectionRow, String)>>;
@@ -863,6 +868,7 @@ impl FaceDao for SqliteFaceDao {
ctx: &opentelemetry::Context,
library_id: Option<i32>,
unassigned: bool,
person_id: Option<i32>,
limit: i64,
offset: i64,
) -> anyhow::Result<Vec<(FaceDetectionRow, String)>> {
@@ -876,7 +882,13 @@ impl FaceDao for SqliteFaceDao {
if let Some(lib) = library_id {
query = query.filter(face_detections::library_id.eq(lib));
}
if unassigned {
// person_id is the more specific filter — when both it and
// `unassigned` are supplied, prefer the explicit person id and
// ignore the IS NULL constraint (which would always return
// empty for an assigned person).
if let Some(pid) = person_id {
query = query.filter(face_detections::person_id.eq(pid));
} else if unassigned {
query = query.filter(face_detections::person_id.is_null());
}
let rows = query
@@ -1676,18 +1688,10 @@ where
.route(web::get().to(list_persons_handler::<D>))
.route(web::post().to(create_person_handler::<D>)),
)
.service(
web::resource("/persons/bootstrap")
.route(web::post().to(bootstrap_persons_handler::<D>)),
)
.service(
web::resource("/persons/ignore-bucket")
.route(web::post().to(ignore_bucket_handler::<D>)),
)
.service(
web::resource("/tags/people-bootstrap-candidates")
.route(web::get().to(bootstrap_candidates_handler::<D>)),
)
.service(
web::resource("/persons/{id}")
.route(web::get().to(get_person_handler::<D>))
@@ -1702,340 +1706,6 @@ where
)
}
// ── Bootstrap (Phase 4) ─────────────────────────────────────────────────────
#[derive(Serialize, Debug, Clone)]
pub struct BootstrapCandidate {
/// Display name — most-frequent capitalization across the case-insensitive
/// group, or simply the first one seen if it's a tie.
pub name: String,
/// Lowercased name; the stable key for grouping and the auto-bind path.
pub normalized_name: String,
/// Sum of `tagged_photo` counts across all capitalizations of this name.
pub usage_count: i64,
/// Heuristic suggestion; the UI defaults this to checked but the user
/// confirms before [`bootstrap_persons_handler`] actually creates rows.
pub looks_like_person: bool,
/// True when a `persons` row already exists for this name (any case).
/// The UI hides these — re-running bootstrap is idempotent so it's fine
/// either way, but the noise isn't worth showing.
pub already_exists: bool,
}
#[derive(Serialize, Debug)]
pub struct BootstrapCandidatesResponse {
pub candidates: Vec<BootstrapCandidate>,
}
#[derive(Deserialize, Debug)]
pub struct BootstrapPersonsReq {
pub names: Vec<String>,
}
#[derive(Serialize, Debug)]
pub struct BootstrapPersonsResponse {
pub created: Vec<Person>,
pub skipped: Vec<BootstrapSkipped>,
}
#[derive(Serialize, Debug)]
pub struct BootstrapSkipped {
pub name: String,
pub reason: String,
}
/// Hard filter for the bootstrap candidate list. Returns true if the tag
/// could plausibly be a person name; returns false to drop it from the
/// candidates entirely (not just leave looks_like_person=false).
///
/// Rules — all required:
/// - At least 3 characters after trimming. Two-letter tags ("AB", "OK")
/// are almost always abbreviations or markers, not names.
/// - No emoji or symbol-class characters. SQL-side string sort already
/// surfaces those at the top of the tag list; filtering them keeps
/// the candidate UI focused on names rather than chart-junk.
/// - No control characters or null bytes.
pub(crate) fn is_plausible_name_token(raw: &str) -> bool {
let trimmed = raw.trim();
if trimmed.chars().count() < 3 {
return false;
}
for c in trimmed.chars() {
// Letter / mark / decimal-digit / connector-punctuation /
// dash / apostrophe / period / whitespace are all plausible in a
// name. Anything else (emoji, symbols, math operators, arrows,
// box drawing, control codes) disqualifies the whole tag.
if c.is_alphabetic()
|| c.is_whitespace()
|| matches!(c, '\'' | '-' | '.' | '_' | '\u{2019}')
{
continue;
}
if c.is_ascii_digit() {
// Digits don't disqualify here — `looks_like_person` rejects
// them later, but `is_plausible_name_token` is just about
// "could this be in the candidate list at all?". A tag like
// "Sarah2" stays as a candidate (display-flagged not-a-person
// by looks_like_person) so the operator can still spot and
// confirm it manually if it's an alias.
continue;
}
return false;
}
true
}
/// Conservative "this tag *might* be a person name" heuristic. False
/// negatives are fine — the operator confirms in the UI before any row
/// is created. False positives are also fine for the same reason; the
/// goal is just to default sensible candidates to checked.
///
/// Rules:
/// - 12 whitespace-separated words
/// - Each word starts with an uppercase character
/// - No digits anywhere (rejects "Trip 2018", "2024", etc.)
/// - Single-word names not on a small denylist of common non-person
/// tags (cat, christmas, beach, ...). Two-word names skip the
/// denylist because a real two-word person name is the dominant
/// case ("Sarah Smith") and false-blocking it is worse than false-
/// accepting "Sunset Walk".
pub(crate) fn looks_like_person(raw: &str) -> bool {
let trimmed = raw.trim();
if trimmed.is_empty() {
return false;
}
let words: Vec<&str> = trimmed.split_whitespace().collect();
if !(1..=2).contains(&words.len()) {
return false;
}
for w in &words {
let Some(first) = w.chars().next() else {
return false;
};
if !first.is_uppercase() {
return false;
}
if w.chars().any(|c| c.is_ascii_digit()) {
return false;
}
}
if words.len() == 1 {
const DENY: &[&str] = &[
// Pets / animals
"cat",
"dog",
"kitten",
"puppy",
"bird",
"fish",
"pet",
"pets",
// Events / occasions
"birthday",
"christmas",
"halloween",
"easter",
"thanksgiving",
"wedding",
"anniversary",
"vacation",
"holiday",
"party",
"trip",
"graduation",
"concert",
// Places (generic)
"home",
"work",
"beach",
"park",
"hotel",
"restaurant",
"office",
"house",
"garden",
// Subjects / styles
"food",
"sunset",
"sunrise",
"landscape",
"portrait",
"selfie",
"nature",
"flowers",
"flower",
"snow",
"rain",
"sky",
// Buckets
"untagged",
"favorites",
"favourites",
"misc",
"other",
"random",
];
let lower = trimmed.to_lowercase();
if DENY.iter().any(|w| *w == lower) {
return false;
}
}
true
}
async fn bootstrap_candidates_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
face_dao: web::Data<Mutex<D>>,
tag_dao: web::Data<Mutex<crate::tags::SqliteTagDao>>,
) -> impl Responder {
use std::collections::HashMap;
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("faces.bootstrap_candidates", &context);
let span_context = opentelemetry::Context::current_with_span(span);
// All tags + their counts. Path filter unused — bootstrap is library-wide.
let tags_with_counts = {
let mut td = tag_dao.lock().expect("tag dao lock");
match crate::tags::TagDao::get_all_tags(&mut *td, &span_context, None) {
Ok(t) => t,
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
}
};
// Group by lowercase name. Pick the most-frequent capitalization
// for the display name (ties broken by first-seen). Filter out
// short tags and tags carrying non-name characters (emojis, symbols)
// before grouping — they're noise no operator would tick, so showing
// them just makes the candidate list harder to scan.
struct Group {
display: String,
display_freq: i64,
total_count: i64,
}
let mut groups: HashMap<String, Group> = HashMap::new();
for (count, tag) in tags_with_counts {
if !is_plausible_name_token(&tag.name) {
continue;
}
let lower = tag.name.to_lowercase();
let g = groups.entry(lower).or_insert_with(|| Group {
display: tag.name.clone(),
display_freq: 0,
total_count: 0,
});
g.total_count += count;
if count > g.display_freq {
g.display = tag.name.clone();
g.display_freq = count;
}
}
// Cross-reference against existing persons (bulk one-query lookup).
let lower_names: Vec<String> = groups.keys().cloned().collect();
let existing = {
let mut fd = face_dao.lock().expect("face dao lock");
match fd.find_persons_by_names_ci(&span_context, &lower_names) {
Ok(m) => m,
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
}
};
let mut candidates: Vec<BootstrapCandidate> = groups
.into_iter()
.map(|(lower, g)| BootstrapCandidate {
looks_like_person: looks_like_person(&g.display),
already_exists: existing.contains_key(&lower),
name: g.display,
normalized_name: lower,
usage_count: g.total_count,
})
.collect();
// Sort: persons-first heuristic by descending count, then alphabetical.
// Persons-likely candidates surface near the top so the user doesn't
// scroll past dozens of "vacation"-style tags to find them.
candidates.sort_by(|a, b| {
b.looks_like_person
.cmp(&a.looks_like_person)
.then(b.usage_count.cmp(&a.usage_count))
.then(a.normalized_name.cmp(&b.normalized_name))
});
HttpResponse::Ok().json(BootstrapCandidatesResponse { candidates })
}
async fn bootstrap_persons_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
body: web::Json<BootstrapPersonsReq>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("faces.bootstrap_persons", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let mut created: Vec<Person> = Vec::new();
let mut skipped: Vec<BootstrapSkipped> = Vec::new();
let mut dao = face_dao.lock().expect("face dao lock");
// Pre-fetch the existing-name set so a duplicate request reports
// "already exists" (skipped) rather than firing N inserts that all
// 409 against the UNIQUE COLLATE NOCASE constraint.
let lower_names: Vec<String> = body.names.iter().map(|n| n.to_lowercase()).collect();
let existing = match dao.find_persons_by_names_ci(&span_context, &lower_names) {
Ok(m) => m,
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
};
for name in &body.names {
let trimmed = name.trim();
if trimmed.is_empty() {
skipped.push(BootstrapSkipped {
name: name.clone(),
reason: "empty name".into(),
});
continue;
}
let lower = trimmed.to_lowercase();
if existing.contains_key(&lower) {
skipped.push(BootstrapSkipped {
name: trimmed.to_string(),
reason: "person already exists".into(),
});
continue;
}
match dao.create_person(
&span_context,
&CreatePersonReq {
name: trimmed.to_string(),
notes: None,
entity_id: None,
is_ignored: false,
},
/*from_tag*/ true,
) {
Ok(p) => created.push(p),
Err(e) => {
if is_unique_violation(&e) {
// Race with a concurrent create; treat as skipped.
skipped.push(BootstrapSkipped {
name: trimmed.to_string(),
reason: "person already exists".into(),
});
} else {
skipped.push(BootstrapSkipped {
name: trimmed.to_string(),
reason: format!("{:#}", e),
});
}
}
}
}
HttpResponse::Ok().json(BootstrapPersonsResponse { created, skipped })
}
// ── Stats / list ────────────────────────────────────────────────────────────
#[derive(Deserialize)]
@@ -2132,6 +1802,7 @@ async fn embeddings_handler<D: FaceDao>(
&span_context,
query.library,
query.unassigned,
query.person_id,
limit,
offset,
)
@@ -2796,77 +2467,7 @@ mod tests {
);
}
// ── Phase 4: bootstrap heuristic + cosine + DAO support ─────────────
#[test]
fn is_plausible_name_token_filters_short_and_emoji() {
// Hard filter applied before grouping — emojis and tags shorter
// than 3 chars never make it into the candidate list, regardless
// of looks_like_person's later assessment.
assert!(is_plausible_name_token("Cameron"));
assert!(is_plausible_name_token("Sarah Smith"));
assert!(is_plausible_name_token("O'Brien"));
assert!(is_plausible_name_token("Jean-Luc"));
assert!(is_plausible_name_token("St. James"));
assert!(is_plausible_name_token("Renée"));
assert!(is_plausible_name_token("José"));
// Asian script names — the alphabetic/letter check covers any
// script, not just Latin.
assert!(is_plausible_name_token("田中太郎"));
// Below the 3-character floor.
assert!(!is_plausible_name_token(""));
assert!(!is_plausible_name_token(" "));
assert!(!is_plausible_name_token("Bo"));
assert!(!is_plausible_name_token("AB"));
// Trim before counting — surrounding whitespace doesn't count.
assert!(!is_plausible_name_token(" AB "));
// Emoji / symbol classes get the whole tag dropped.
assert!(!is_plausible_name_token("🐱cat"));
assert!(!is_plausible_name_token("Heart ❤"));
assert!(!is_plausible_name_token("📸Photo"));
assert!(!is_plausible_name_token("→ Trip"));
assert!(!is_plausible_name_token("★Vacation"));
// Digits are kept (handled by looks_like_person, not here).
assert!(is_plausible_name_token("Trip 2018"));
assert!(is_plausible_name_token("2024"));
}
#[test]
fn looks_like_person_accepts_typical_names() {
assert!(looks_like_person("Cameron"));
assert!(looks_like_person("Sarah Smith"));
assert!(looks_like_person("Mary Jane"));
// Non-ASCII title-cased single word still counts.
assert!(looks_like_person("Renée"));
}
#[test]
fn looks_like_person_rejects_obvious_non_people() {
// Digits, lowercase, three-or-more words, denylist hits.
assert!(!looks_like_person("2018"));
assert!(!looks_like_person("Trip 2018"));
assert!(!looks_like_person("trip"));
assert!(!looks_like_person("Birthday Party Cake"));
assert!(!looks_like_person("cat"));
assert!(!looks_like_person("Cat")); // denied even when title-cased
assert!(!looks_like_person("Christmas"));
assert!(!looks_like_person("home"));
assert!(!looks_like_person(""));
assert!(!looks_like_person(" "));
}
#[test]
fn looks_like_person_two_words_skips_denylist() {
// Two-word names get a pass on the single-word denylist —
// "Sunset Walk" is much more likely a real album than a person,
// but false-accepting is fine because the operator confirms.
// What matters is we don't false-reject "Sarah Smith".
assert!(looks_like_person("Sunset Walk"));
assert!(looks_like_person("Sarah Smith"));
}
// ── Phase 4: cosine + DAO support ───────────────────────────────────
#[test]
fn cosine_similarity_known_vectors() {
@@ -3339,6 +2940,82 @@ mod tests {
assert_eq!(faces[0].person_id, Some(alice.id));
}
#[test]
fn list_embeddings_filters_by_person_id() {
// Apollo's similar-unassigned suggester relies on this filter to
// pull a single person's embeddings without paging the whole
// detected set client-side. When person_id is set it must win
// over `unassigned=true` (otherwise the IS NULL constraint would
// always return an empty set for an assigned person).
let mut dao = fresh_dao();
diesel::sql_query(
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
VALUES (1, 'main', '/tmp', 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed libraries");
let alice = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Alice".into(),
notes: None,
entity_id: None,
is_ignored: false,
},
false,
)
.unwrap();
let bob = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Bob".into(),
notes: None,
entity_id: None,
is_ignored: false,
},
false,
)
.unwrap();
let mk_row = |hash: &str, person: Option<i32>| InsertFaceDetectionInput {
library_id: 1,
content_hash: hash.into(),
rel_path: format!("{hash}.jpg"),
bbox: Some((0.1, 0.1, 0.2, 0.2)),
embedding: Some(vec![0u8; 2048]),
confidence: Some(0.9),
source: "auto".into(),
person_id: person,
status: "detected".into(),
model_version: "buffalo_l".into(),
};
dao.store_detection(&ctx(), mk_row("a1", Some(alice.id))).unwrap();
dao.store_detection(&ctx(), mk_row("a2", Some(alice.id))).unwrap();
dao.store_detection(&ctx(), mk_row("b1", Some(bob.id))).unwrap();
dao.store_detection(&ctx(), mk_row("u1", None)).unwrap();
// person_id=alice returns only alice's two faces — ignoring the
// (default-true) `unassigned` filter, which would have selected
// u1 only.
let alice_rows = dao
.list_embeddings(&ctx(), None, true, Some(alice.id), 100, 0)
.unwrap();
assert_eq!(alice_rows.len(), 2);
assert!(alice_rows
.iter()
.all(|(r, _)| r.person_id == Some(alice.id)));
// unassigned=true with no person_id behaves as before.
let unassigned_rows = dao
.list_embeddings(&ctx(), None, true, None, 100, 0)
.unwrap();
assert_eq!(unassigned_rows.len(), 1);
assert_eq!(unassigned_rows[0].0.content_hash, "u1");
}
// ── crop_image_to_bbox ──────────────────────────────────────────────
// Pure helper used by the manual face-create handler. Generate a tiny
// image in memory, write it to a temp file, then exercise the bbox