faces: add person_id filter to /faces/embeddings; remove tag-bootstrap
Pairs with the Apollo FACES-tab change. The new
POST /api/persons/{id}/similar-unassigned route on Apollo needs to
fetch one person's embeddings cheaply to compute the centroid;
adding a person_id query param to /faces/embeddings keeps that to a
single round-trip instead of paging the whole detected set
client-side. When both person_id and unassigned=true are supplied,
person_id wins (the explicit filter is the more specific intent).
Tag-bootstrap removal: bootstrap_candidates_handler,
bootstrap_persons_handler, /persons/bootstrap and
/tags/people-bootstrap-candidates route registrations, and the
heuristic helpers (is_plausible_name_token, looks_like_person) plus
their tests. Only Apollo called these; the migration is complete.
The persons.created_from_tag column stays - it's informational on
existing rows and removing it would be a destructive migration for
no benefit.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
507
src/faces.rs
507
src/faces.rs
@@ -47,7 +47,7 @@ use std::sync::{Arc, Mutex};
|
|||||||
/// Visual identity. The optional `entity_id` bridges this person to an
|
/// Visual identity. The optional `entity_id` bridges this person to an
|
||||||
/// LLM-extracted knowledge-graph entity (textual side). Persons are NOT
|
/// LLM-extracted knowledge-graph entity (textual side). Persons are NOT
|
||||||
/// auto-bridged at creation — only when the user explicitly links them in
|
/// auto-bridged at creation — only when the user explicitly links them in
|
||||||
/// the management UI, or when bootstrap finds an exact-name match.
|
/// the management UI.
|
||||||
#[derive(Serialize, Queryable, Clone, Debug)]
|
#[derive(Serialize, Queryable, Clone, Debug)]
|
||||||
pub struct Person {
|
pub struct Person {
|
||||||
pub id: i32,
|
pub id: i32,
|
||||||
@@ -366,6 +366,10 @@ pub struct EmbeddingsQuery {
|
|||||||
pub limit: i64,
|
pub limit: i64,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub offset: i64,
|
pub offset: i64,
|
||||||
|
/// Restrict to one person's faces. Used by the similar-unassigned
|
||||||
|
/// suggester to fetch a centroid pool. When set, takes precedence
|
||||||
|
/// over `unassigned` (the more specific filter wins).
|
||||||
|
pub person_id: Option<i32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_unassigned() -> bool {
|
fn default_unassigned() -> bool {
|
||||||
@@ -429,6 +433,7 @@ pub trait FaceDao: Send + Sync {
|
|||||||
ctx: &opentelemetry::Context,
|
ctx: &opentelemetry::Context,
|
||||||
library_id: Option<i32>,
|
library_id: Option<i32>,
|
||||||
unassigned: bool,
|
unassigned: bool,
|
||||||
|
person_id: Option<i32>,
|
||||||
limit: i64,
|
limit: i64,
|
||||||
offset: i64,
|
offset: i64,
|
||||||
) -> anyhow::Result<Vec<(FaceDetectionRow, String)>>;
|
) -> anyhow::Result<Vec<(FaceDetectionRow, String)>>;
|
||||||
@@ -863,6 +868,7 @@ impl FaceDao for SqliteFaceDao {
|
|||||||
ctx: &opentelemetry::Context,
|
ctx: &opentelemetry::Context,
|
||||||
library_id: Option<i32>,
|
library_id: Option<i32>,
|
||||||
unassigned: bool,
|
unassigned: bool,
|
||||||
|
person_id: Option<i32>,
|
||||||
limit: i64,
|
limit: i64,
|
||||||
offset: i64,
|
offset: i64,
|
||||||
) -> anyhow::Result<Vec<(FaceDetectionRow, String)>> {
|
) -> anyhow::Result<Vec<(FaceDetectionRow, String)>> {
|
||||||
@@ -876,7 +882,13 @@ impl FaceDao for SqliteFaceDao {
|
|||||||
if let Some(lib) = library_id {
|
if let Some(lib) = library_id {
|
||||||
query = query.filter(face_detections::library_id.eq(lib));
|
query = query.filter(face_detections::library_id.eq(lib));
|
||||||
}
|
}
|
||||||
if unassigned {
|
// person_id is the more specific filter — when both it and
|
||||||
|
// `unassigned` are supplied, prefer the explicit person id and
|
||||||
|
// ignore the IS NULL constraint (which would always return
|
||||||
|
// empty for an assigned person).
|
||||||
|
if let Some(pid) = person_id {
|
||||||
|
query = query.filter(face_detections::person_id.eq(pid));
|
||||||
|
} else if unassigned {
|
||||||
query = query.filter(face_detections::person_id.is_null());
|
query = query.filter(face_detections::person_id.is_null());
|
||||||
}
|
}
|
||||||
let rows = query
|
let rows = query
|
||||||
@@ -1676,18 +1688,10 @@ where
|
|||||||
.route(web::get().to(list_persons_handler::<D>))
|
.route(web::get().to(list_persons_handler::<D>))
|
||||||
.route(web::post().to(create_person_handler::<D>)),
|
.route(web::post().to(create_person_handler::<D>)),
|
||||||
)
|
)
|
||||||
.service(
|
|
||||||
web::resource("/persons/bootstrap")
|
|
||||||
.route(web::post().to(bootstrap_persons_handler::<D>)),
|
|
||||||
)
|
|
||||||
.service(
|
.service(
|
||||||
web::resource("/persons/ignore-bucket")
|
web::resource("/persons/ignore-bucket")
|
||||||
.route(web::post().to(ignore_bucket_handler::<D>)),
|
.route(web::post().to(ignore_bucket_handler::<D>)),
|
||||||
)
|
)
|
||||||
.service(
|
|
||||||
web::resource("/tags/people-bootstrap-candidates")
|
|
||||||
.route(web::get().to(bootstrap_candidates_handler::<D>)),
|
|
||||||
)
|
|
||||||
.service(
|
.service(
|
||||||
web::resource("/persons/{id}")
|
web::resource("/persons/{id}")
|
||||||
.route(web::get().to(get_person_handler::<D>))
|
.route(web::get().to(get_person_handler::<D>))
|
||||||
@@ -1702,340 +1706,6 @@ where
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Bootstrap (Phase 4) ─────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone)]
|
|
||||||
pub struct BootstrapCandidate {
|
|
||||||
/// Display name — most-frequent capitalization across the case-insensitive
|
|
||||||
/// group, or simply the first one seen if it's a tie.
|
|
||||||
pub name: String,
|
|
||||||
/// Lowercased name; the stable key for grouping and the auto-bind path.
|
|
||||||
pub normalized_name: String,
|
|
||||||
/// Sum of `tagged_photo` counts across all capitalizations of this name.
|
|
||||||
pub usage_count: i64,
|
|
||||||
/// Heuristic suggestion; the UI defaults this to checked but the user
|
|
||||||
/// confirms before [`bootstrap_persons_handler`] actually creates rows.
|
|
||||||
pub looks_like_person: bool,
|
|
||||||
/// True when a `persons` row already exists for this name (any case).
|
|
||||||
/// The UI hides these — re-running bootstrap is idempotent so it's fine
|
|
||||||
/// either way, but the noise isn't worth showing.
|
|
||||||
pub already_exists: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Debug)]
|
|
||||||
pub struct BootstrapCandidatesResponse {
|
|
||||||
pub candidates: Vec<BootstrapCandidate>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
|
||||||
pub struct BootstrapPersonsReq {
|
|
||||||
pub names: Vec<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Debug)]
|
|
||||||
pub struct BootstrapPersonsResponse {
|
|
||||||
pub created: Vec<Person>,
|
|
||||||
pub skipped: Vec<BootstrapSkipped>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Debug)]
|
|
||||||
pub struct BootstrapSkipped {
|
|
||||||
pub name: String,
|
|
||||||
pub reason: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Hard filter for the bootstrap candidate list. Returns true if the tag
|
|
||||||
/// could plausibly be a person name; returns false to drop it from the
|
|
||||||
/// candidates entirely (not just leave looks_like_person=false).
|
|
||||||
///
|
|
||||||
/// Rules — all required:
|
|
||||||
/// - At least 3 characters after trimming. Two-letter tags ("AB", "OK")
|
|
||||||
/// are almost always abbreviations or markers, not names.
|
|
||||||
/// - No emoji or symbol-class characters. SQL-side string sort already
|
|
||||||
/// surfaces those at the top of the tag list; filtering them keeps
|
|
||||||
/// the candidate UI focused on names rather than chart-junk.
|
|
||||||
/// - No control characters or null bytes.
|
|
||||||
pub(crate) fn is_plausible_name_token(raw: &str) -> bool {
|
|
||||||
let trimmed = raw.trim();
|
|
||||||
if trimmed.chars().count() < 3 {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for c in trimmed.chars() {
|
|
||||||
// Letter / mark / decimal-digit / connector-punctuation /
|
|
||||||
// dash / apostrophe / period / whitespace are all plausible in a
|
|
||||||
// name. Anything else (emoji, symbols, math operators, arrows,
|
|
||||||
// box drawing, control codes) disqualifies the whole tag.
|
|
||||||
if c.is_alphabetic()
|
|
||||||
|| c.is_whitespace()
|
|
||||||
|| matches!(c, '\'' | '-' | '.' | '_' | '\u{2019}')
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if c.is_ascii_digit() {
|
|
||||||
// Digits don't disqualify here — `looks_like_person` rejects
|
|
||||||
// them later, but `is_plausible_name_token` is just about
|
|
||||||
// "could this be in the candidate list at all?". A tag like
|
|
||||||
// "Sarah2" stays as a candidate (display-flagged not-a-person
|
|
||||||
// by looks_like_person) so the operator can still spot and
|
|
||||||
// confirm it manually if it's an alias.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
true
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Conservative "this tag *might* be a person name" heuristic. False
|
|
||||||
/// negatives are fine — the operator confirms in the UI before any row
|
|
||||||
/// is created. False positives are also fine for the same reason; the
|
|
||||||
/// goal is just to default sensible candidates to checked.
|
|
||||||
///
|
|
||||||
/// Rules:
|
|
||||||
/// - 1–2 whitespace-separated words
|
|
||||||
/// - Each word starts with an uppercase character
|
|
||||||
/// - No digits anywhere (rejects "Trip 2018", "2024", etc.)
|
|
||||||
/// - Single-word names not on a small denylist of common non-person
|
|
||||||
/// tags (cat, christmas, beach, ...). Two-word names skip the
|
|
||||||
/// denylist because a real two-word person name is the dominant
|
|
||||||
/// case ("Sarah Smith") and false-blocking it is worse than false-
|
|
||||||
/// accepting "Sunset Walk".
|
|
||||||
pub(crate) fn looks_like_person(raw: &str) -> bool {
|
|
||||||
let trimmed = raw.trim();
|
|
||||||
if trimmed.is_empty() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
let words: Vec<&str> = trimmed.split_whitespace().collect();
|
|
||||||
if !(1..=2).contains(&words.len()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for w in &words {
|
|
||||||
let Some(first) = w.chars().next() else {
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
if !first.is_uppercase() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if w.chars().any(|c| c.is_ascii_digit()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if words.len() == 1 {
|
|
||||||
const DENY: &[&str] = &[
|
|
||||||
// Pets / animals
|
|
||||||
"cat",
|
|
||||||
"dog",
|
|
||||||
"kitten",
|
|
||||||
"puppy",
|
|
||||||
"bird",
|
|
||||||
"fish",
|
|
||||||
"pet",
|
|
||||||
"pets",
|
|
||||||
// Events / occasions
|
|
||||||
"birthday",
|
|
||||||
"christmas",
|
|
||||||
"halloween",
|
|
||||||
"easter",
|
|
||||||
"thanksgiving",
|
|
||||||
"wedding",
|
|
||||||
"anniversary",
|
|
||||||
"vacation",
|
|
||||||
"holiday",
|
|
||||||
"party",
|
|
||||||
"trip",
|
|
||||||
"graduation",
|
|
||||||
"concert",
|
|
||||||
// Places (generic)
|
|
||||||
"home",
|
|
||||||
"work",
|
|
||||||
"beach",
|
|
||||||
"park",
|
|
||||||
"hotel",
|
|
||||||
"restaurant",
|
|
||||||
"office",
|
|
||||||
"house",
|
|
||||||
"garden",
|
|
||||||
// Subjects / styles
|
|
||||||
"food",
|
|
||||||
"sunset",
|
|
||||||
"sunrise",
|
|
||||||
"landscape",
|
|
||||||
"portrait",
|
|
||||||
"selfie",
|
|
||||||
"nature",
|
|
||||||
"flowers",
|
|
||||||
"flower",
|
|
||||||
"snow",
|
|
||||||
"rain",
|
|
||||||
"sky",
|
|
||||||
// Buckets
|
|
||||||
"untagged",
|
|
||||||
"favorites",
|
|
||||||
"favourites",
|
|
||||||
"misc",
|
|
||||||
"other",
|
|
||||||
"random",
|
|
||||||
];
|
|
||||||
let lower = trimmed.to_lowercase();
|
|
||||||
if DENY.iter().any(|w| *w == lower) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
true
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn bootstrap_candidates_handler<D: FaceDao>(
|
|
||||||
_: Claims,
|
|
||||||
request: HttpRequest,
|
|
||||||
face_dao: web::Data<Mutex<D>>,
|
|
||||||
tag_dao: web::Data<Mutex<crate::tags::SqliteTagDao>>,
|
|
||||||
) -> impl Responder {
|
|
||||||
use std::collections::HashMap;
|
|
||||||
let context = extract_context_from_request(&request);
|
|
||||||
let span = global_tracer().start_with_context("faces.bootstrap_candidates", &context);
|
|
||||||
let span_context = opentelemetry::Context::current_with_span(span);
|
|
||||||
|
|
||||||
// All tags + their counts. Path filter unused — bootstrap is library-wide.
|
|
||||||
let tags_with_counts = {
|
|
||||||
let mut td = tag_dao.lock().expect("tag dao lock");
|
|
||||||
match crate::tags::TagDao::get_all_tags(&mut *td, &span_context, None) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Group by lowercase name. Pick the most-frequent capitalization
|
|
||||||
// for the display name (ties broken by first-seen). Filter out
|
|
||||||
// short tags and tags carrying non-name characters (emojis, symbols)
|
|
||||||
// before grouping — they're noise no operator would tick, so showing
|
|
||||||
// them just makes the candidate list harder to scan.
|
|
||||||
struct Group {
|
|
||||||
display: String,
|
|
||||||
display_freq: i64,
|
|
||||||
total_count: i64,
|
|
||||||
}
|
|
||||||
let mut groups: HashMap<String, Group> = HashMap::new();
|
|
||||||
for (count, tag) in tags_with_counts {
|
|
||||||
if !is_plausible_name_token(&tag.name) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let lower = tag.name.to_lowercase();
|
|
||||||
let g = groups.entry(lower).or_insert_with(|| Group {
|
|
||||||
display: tag.name.clone(),
|
|
||||||
display_freq: 0,
|
|
||||||
total_count: 0,
|
|
||||||
});
|
|
||||||
g.total_count += count;
|
|
||||||
if count > g.display_freq {
|
|
||||||
g.display = tag.name.clone();
|
|
||||||
g.display_freq = count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cross-reference against existing persons (bulk one-query lookup).
|
|
||||||
let lower_names: Vec<String> = groups.keys().cloned().collect();
|
|
||||||
let existing = {
|
|
||||||
let mut fd = face_dao.lock().expect("face dao lock");
|
|
||||||
match fd.find_persons_by_names_ci(&span_context, &lower_names) {
|
|
||||||
Ok(m) => m,
|
|
||||||
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut candidates: Vec<BootstrapCandidate> = groups
|
|
||||||
.into_iter()
|
|
||||||
.map(|(lower, g)| BootstrapCandidate {
|
|
||||||
looks_like_person: looks_like_person(&g.display),
|
|
||||||
already_exists: existing.contains_key(&lower),
|
|
||||||
name: g.display,
|
|
||||||
normalized_name: lower,
|
|
||||||
usage_count: g.total_count,
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
// Sort: persons-first heuristic by descending count, then alphabetical.
|
|
||||||
// Persons-likely candidates surface near the top so the user doesn't
|
|
||||||
// scroll past dozens of "vacation"-style tags to find them.
|
|
||||||
candidates.sort_by(|a, b| {
|
|
||||||
b.looks_like_person
|
|
||||||
.cmp(&a.looks_like_person)
|
|
||||||
.then(b.usage_count.cmp(&a.usage_count))
|
|
||||||
.then(a.normalized_name.cmp(&b.normalized_name))
|
|
||||||
});
|
|
||||||
|
|
||||||
HttpResponse::Ok().json(BootstrapCandidatesResponse { candidates })
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn bootstrap_persons_handler<D: FaceDao>(
|
|
||||||
_: Claims,
|
|
||||||
request: HttpRequest,
|
|
||||||
body: web::Json<BootstrapPersonsReq>,
|
|
||||||
face_dao: web::Data<Mutex<D>>,
|
|
||||||
) -> impl Responder {
|
|
||||||
let context = extract_context_from_request(&request);
|
|
||||||
let span = global_tracer().start_with_context("faces.bootstrap_persons", &context);
|
|
||||||
let span_context = opentelemetry::Context::current_with_span(span);
|
|
||||||
|
|
||||||
let mut created: Vec<Person> = Vec::new();
|
|
||||||
let mut skipped: Vec<BootstrapSkipped> = Vec::new();
|
|
||||||
|
|
||||||
let mut dao = face_dao.lock().expect("face dao lock");
|
|
||||||
|
|
||||||
// Pre-fetch the existing-name set so a duplicate request reports
|
|
||||||
// "already exists" (skipped) rather than firing N inserts that all
|
|
||||||
// 409 against the UNIQUE COLLATE NOCASE constraint.
|
|
||||||
let lower_names: Vec<String> = body.names.iter().map(|n| n.to_lowercase()).collect();
|
|
||||||
let existing = match dao.find_persons_by_names_ci(&span_context, &lower_names) {
|
|
||||||
Ok(m) => m,
|
|
||||||
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
|
|
||||||
};
|
|
||||||
|
|
||||||
for name in &body.names {
|
|
||||||
let trimmed = name.trim();
|
|
||||||
if trimmed.is_empty() {
|
|
||||||
skipped.push(BootstrapSkipped {
|
|
||||||
name: name.clone(),
|
|
||||||
reason: "empty name".into(),
|
|
||||||
});
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let lower = trimmed.to_lowercase();
|
|
||||||
if existing.contains_key(&lower) {
|
|
||||||
skipped.push(BootstrapSkipped {
|
|
||||||
name: trimmed.to_string(),
|
|
||||||
reason: "person already exists".into(),
|
|
||||||
});
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match dao.create_person(
|
|
||||||
&span_context,
|
|
||||||
&CreatePersonReq {
|
|
||||||
name: trimmed.to_string(),
|
|
||||||
notes: None,
|
|
||||||
entity_id: None,
|
|
||||||
is_ignored: false,
|
|
||||||
},
|
|
||||||
/*from_tag*/ true,
|
|
||||||
) {
|
|
||||||
Ok(p) => created.push(p),
|
|
||||||
Err(e) => {
|
|
||||||
if is_unique_violation(&e) {
|
|
||||||
// Race with a concurrent create; treat as skipped.
|
|
||||||
skipped.push(BootstrapSkipped {
|
|
||||||
name: trimmed.to_string(),
|
|
||||||
reason: "person already exists".into(),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
skipped.push(BootstrapSkipped {
|
|
||||||
name: trimmed.to_string(),
|
|
||||||
reason: format!("{:#}", e),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HttpResponse::Ok().json(BootstrapPersonsResponse { created, skipped })
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── Stats / list ────────────────────────────────────────────────────────────
|
// ── Stats / list ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
@@ -2132,6 +1802,7 @@ async fn embeddings_handler<D: FaceDao>(
|
|||||||
&span_context,
|
&span_context,
|
||||||
query.library,
|
query.library,
|
||||||
query.unassigned,
|
query.unassigned,
|
||||||
|
query.person_id,
|
||||||
limit,
|
limit,
|
||||||
offset,
|
offset,
|
||||||
)
|
)
|
||||||
@@ -2796,77 +2467,7 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Phase 4: bootstrap heuristic + cosine + DAO support ─────────────
|
// ── Phase 4: cosine + DAO support ───────────────────────────────────
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn is_plausible_name_token_filters_short_and_emoji() {
|
|
||||||
// Hard filter applied before grouping — emojis and tags shorter
|
|
||||||
// than 3 chars never make it into the candidate list, regardless
|
|
||||||
// of looks_like_person's later assessment.
|
|
||||||
assert!(is_plausible_name_token("Cameron"));
|
|
||||||
assert!(is_plausible_name_token("Sarah Smith"));
|
|
||||||
assert!(is_plausible_name_token("O'Brien"));
|
|
||||||
assert!(is_plausible_name_token("Jean-Luc"));
|
|
||||||
assert!(is_plausible_name_token("St. James"));
|
|
||||||
assert!(is_plausible_name_token("Renée"));
|
|
||||||
assert!(is_plausible_name_token("José"));
|
|
||||||
// Asian script names — the alphabetic/letter check covers any
|
|
||||||
// script, not just Latin.
|
|
||||||
assert!(is_plausible_name_token("田中太郎"));
|
|
||||||
|
|
||||||
// Below the 3-character floor.
|
|
||||||
assert!(!is_plausible_name_token(""));
|
|
||||||
assert!(!is_plausible_name_token(" "));
|
|
||||||
assert!(!is_plausible_name_token("Bo"));
|
|
||||||
assert!(!is_plausible_name_token("AB"));
|
|
||||||
// Trim before counting — surrounding whitespace doesn't count.
|
|
||||||
assert!(!is_plausible_name_token(" AB "));
|
|
||||||
|
|
||||||
// Emoji / symbol classes get the whole tag dropped.
|
|
||||||
assert!(!is_plausible_name_token("🐱cat"));
|
|
||||||
assert!(!is_plausible_name_token("Heart ❤"));
|
|
||||||
assert!(!is_plausible_name_token("📸Photo"));
|
|
||||||
assert!(!is_plausible_name_token("→ Trip"));
|
|
||||||
assert!(!is_plausible_name_token("★Vacation"));
|
|
||||||
|
|
||||||
// Digits are kept (handled by looks_like_person, not here).
|
|
||||||
assert!(is_plausible_name_token("Trip 2018"));
|
|
||||||
assert!(is_plausible_name_token("2024"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn looks_like_person_accepts_typical_names() {
|
|
||||||
assert!(looks_like_person("Cameron"));
|
|
||||||
assert!(looks_like_person("Sarah Smith"));
|
|
||||||
assert!(looks_like_person("Mary Jane"));
|
|
||||||
// Non-ASCII title-cased single word still counts.
|
|
||||||
assert!(looks_like_person("Renée"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn looks_like_person_rejects_obvious_non_people() {
|
|
||||||
// Digits, lowercase, three-or-more words, denylist hits.
|
|
||||||
assert!(!looks_like_person("2018"));
|
|
||||||
assert!(!looks_like_person("Trip 2018"));
|
|
||||||
assert!(!looks_like_person("trip"));
|
|
||||||
assert!(!looks_like_person("Birthday Party Cake"));
|
|
||||||
assert!(!looks_like_person("cat"));
|
|
||||||
assert!(!looks_like_person("Cat")); // denied even when title-cased
|
|
||||||
assert!(!looks_like_person("Christmas"));
|
|
||||||
assert!(!looks_like_person("home"));
|
|
||||||
assert!(!looks_like_person(""));
|
|
||||||
assert!(!looks_like_person(" "));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn looks_like_person_two_words_skips_denylist() {
|
|
||||||
// Two-word names get a pass on the single-word denylist —
|
|
||||||
// "Sunset Walk" is much more likely a real album than a person,
|
|
||||||
// but false-accepting is fine because the operator confirms.
|
|
||||||
// What matters is we don't false-reject "Sarah Smith".
|
|
||||||
assert!(looks_like_person("Sunset Walk"));
|
|
||||||
assert!(looks_like_person("Sarah Smith"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn cosine_similarity_known_vectors() {
|
fn cosine_similarity_known_vectors() {
|
||||||
@@ -3339,6 +2940,82 @@ mod tests {
|
|||||||
assert_eq!(faces[0].person_id, Some(alice.id));
|
assert_eq!(faces[0].person_id, Some(alice.id));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn list_embeddings_filters_by_person_id() {
|
||||||
|
// Apollo's similar-unassigned suggester relies on this filter to
|
||||||
|
// pull a single person's embeddings without paging the whole
|
||||||
|
// detected set client-side. When person_id is set it must win
|
||||||
|
// over `unassigned=true` (otherwise the IS NULL constraint would
|
||||||
|
// always return an empty set for an assigned person).
|
||||||
|
let mut dao = fresh_dao();
|
||||||
|
diesel::sql_query(
|
||||||
|
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
|
||||||
|
VALUES (1, 'main', '/tmp', 0)",
|
||||||
|
)
|
||||||
|
.execute(dao.connection.lock().unwrap().deref_mut())
|
||||||
|
.expect("seed libraries");
|
||||||
|
|
||||||
|
let alice = dao
|
||||||
|
.create_person(
|
||||||
|
&ctx(),
|
||||||
|
&CreatePersonReq {
|
||||||
|
name: "Alice".into(),
|
||||||
|
notes: None,
|
||||||
|
entity_id: None,
|
||||||
|
is_ignored: false,
|
||||||
|
},
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let bob = dao
|
||||||
|
.create_person(
|
||||||
|
&ctx(),
|
||||||
|
&CreatePersonReq {
|
||||||
|
name: "Bob".into(),
|
||||||
|
notes: None,
|
||||||
|
entity_id: None,
|
||||||
|
is_ignored: false,
|
||||||
|
},
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let mk_row = |hash: &str, person: Option<i32>| InsertFaceDetectionInput {
|
||||||
|
library_id: 1,
|
||||||
|
content_hash: hash.into(),
|
||||||
|
rel_path: format!("{hash}.jpg"),
|
||||||
|
bbox: Some((0.1, 0.1, 0.2, 0.2)),
|
||||||
|
embedding: Some(vec![0u8; 2048]),
|
||||||
|
confidence: Some(0.9),
|
||||||
|
source: "auto".into(),
|
||||||
|
person_id: person,
|
||||||
|
status: "detected".into(),
|
||||||
|
model_version: "buffalo_l".into(),
|
||||||
|
};
|
||||||
|
dao.store_detection(&ctx(), mk_row("a1", Some(alice.id))).unwrap();
|
||||||
|
dao.store_detection(&ctx(), mk_row("a2", Some(alice.id))).unwrap();
|
||||||
|
dao.store_detection(&ctx(), mk_row("b1", Some(bob.id))).unwrap();
|
||||||
|
dao.store_detection(&ctx(), mk_row("u1", None)).unwrap();
|
||||||
|
|
||||||
|
// person_id=alice returns only alice's two faces — ignoring the
|
||||||
|
// (default-true) `unassigned` filter, which would have selected
|
||||||
|
// u1 only.
|
||||||
|
let alice_rows = dao
|
||||||
|
.list_embeddings(&ctx(), None, true, Some(alice.id), 100, 0)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(alice_rows.len(), 2);
|
||||||
|
assert!(alice_rows
|
||||||
|
.iter()
|
||||||
|
.all(|(r, _)| r.person_id == Some(alice.id)));
|
||||||
|
|
||||||
|
// unassigned=true with no person_id behaves as before.
|
||||||
|
let unassigned_rows = dao
|
||||||
|
.list_embeddings(&ctx(), None, true, None, 100, 0)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(unassigned_rows.len(), 1);
|
||||||
|
assert_eq!(unassigned_rows[0].0.content_hash, "u1");
|
||||||
|
}
|
||||||
|
|
||||||
// ── crop_image_to_bbox ──────────────────────────────────────────────
|
// ── crop_image_to_bbox ──────────────────────────────────────────────
|
||||||
// Pure helper used by the manual face-create handler. Generate a tiny
|
// Pure helper used by the manual face-create handler. Generate a tiny
|
||||||
// image in memory, write it to a temp file, then exercise the bbox
|
// image in memory, write it to a temp file, then exercise the bbox
|
||||||
|
|||||||
Reference in New Issue
Block a user