faces: ignore/junk bucket — DB schema + lazy-create endpoint

A single global "Ignored" person row, marked is_ignored=true, that the
frontend lazily creates on first use to hold strangers, false
detections, and faces the user doesn't want bound to a real person.

Schema (new migration 2026-04-29-000200_add_is_ignored):
  - persons.is_ignored BOOLEAN NOT NULL DEFAULT 0
  - Partial index on (is_ignored) WHERE is_ignored = 1; small WHERE
    set means a tiny index that only ever services the bucket lookup.

Why a real persons row instead of a separate table or status enum:
  - face_detections.person_id stays a clean foreign key — no special
    code paths for "ignored faces" anywhere else in the schema.
  - The cluster-suggester already filters by `person_id IS NULL`, so
    bound-to-ignored faces are naturally excluded from re-clustering
    without any change.
  - merge / rename / delete all work on it with the existing routes
    (the management UI just hides it from default views).

DAO additions / changes:
  - get_or_create_ignored_person (idempotent; race-safe via the
    UNIQUE COLLATE NOCASE on persons.name + retry-on-409 fallback).
  - list_persons gains an include_ignored parameter; default false
    so the management screen hides the bucket unless asked.
  - find_persons_by_names_ci filters is_ignored=0 in SQL so the
    auto-bind path can NEVER target the bucket — even if the user
    happens to tag photos as "Ignored", the heuristic look-up skips
    it. Bucket assignment is always an explicit operator action.
  - update_person accepts is_ignored: Option<bool> so a person can
    be moved into / out of the bucket without a delete + recreate.

Routes:
  - POST /persons/ignore-bucket — returns the bucket, creating it on
    first call. Frontend uses this lazily right before binding.
  - GET /persons gains ?include_ignored=true; default behavior
    unchanged.
  - PATCH /persons/{id} now accepts is_ignored.

Tests: ignore_bucket_idempotent_and_filters_auto_bind covers the
contract: bucket is idempotent across calls, find_persons_by_names_ci
skips it (even on exact name match), default list_persons hides it,
include_ignored=true surfaces it. All other tests updated to pass
the new is_ignored: false / Option<bool> fields explicitly.

cargo test --lib: 181/0; fmt + clippy clean for new code.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cameron Cordes
2026-04-29 22:48:16 +00:00
parent 0e160f5d22
commit 7303fb8aa3
4 changed files with 206 additions and 6 deletions

View File

@@ -0,0 +1,2 @@
DROP INDEX IF EXISTS idx_persons_is_ignored;
ALTER TABLE persons DROP COLUMN is_ignored;

View File

@@ -0,0 +1,20 @@
-- IGNORE / junk bucket for the face recognition feature.
--
-- An "Ignored" person is the destination for strangers, faces the user
-- doesn't want tagged, and false detections. It looks like any other
-- person row (so face_detections.person_id stays a clean foreign key)
-- but `is_ignored=1` flags it for special UI treatment:
-- - hidden from the persons list by default
-- - excluded from `find_persons_by_names_ci` so a tag-name match
-- can never auto-bind a real face to the ignore bucket
-- - cluster-suggest already filters by `person_id IS NULL`, so faces
-- bound to an ignored person are naturally excluded from future
-- re-clustering
--
-- Partial index because the WHERE-clause is small (typically 1 row),
-- and we only ever query for `is_ignored = 1` to find the bucket.
ALTER TABLE persons ADD COLUMN is_ignored BOOLEAN NOT NULL DEFAULT 0;
CREATE INDEX idx_persons_is_ignored
ON persons(is_ignored) WHERE is_ignored = 1;

View File

@@ -160,6 +160,7 @@ diesel::table! {
notes -> Nullable<Text>, notes -> Nullable<Text>,
created_at -> BigInt, created_at -> BigInt,
updated_at -> BigInt, updated_at -> BigInt,
is_ignored -> Bool,
} }
} }

View File

@@ -56,6 +56,13 @@ pub struct Person {
pub notes: Option<String>, pub notes: Option<String>,
pub created_at: i64, pub created_at: i64,
pub updated_at: i64, pub updated_at: i64,
/// True for the IGNORE / junk bucket. Hidden from the default
/// persons list, skipped by `find_persons_by_names_ci` (so a tag
/// match can never auto-bind a real face into the ignore bucket),
/// and excluded from cluster suggestions because cluster-suggest
/// already filters by `person_id IS NULL` and ignored faces have
/// a non-null person_id.
pub is_ignored: bool,
} }
#[derive(Insertable, Debug)] #[derive(Insertable, Debug)]
@@ -64,6 +71,7 @@ struct InsertPerson {
name: String, name: String,
notes: Option<String>, notes: Option<String>,
created_from_tag: bool, created_from_tag: bool,
is_ignored: bool,
created_at: i64, created_at: i64,
updated_at: i64, updated_at: i64,
} }
@@ -184,6 +192,7 @@ pub struct PersonSummary {
pub entity_id: Option<i32>, pub entity_id: Option<i32>,
pub created_from_tag: bool, pub created_from_tag: bool,
pub notes: Option<String>, pub notes: Option<String>,
pub is_ignored: bool,
pub face_count: i64, pub face_count: i64,
} }
@@ -198,6 +207,11 @@ pub struct CreatePersonReq {
/// unbridged; set explicitly to wire the person to LLM-extracted facts. /// unbridged; set explicitly to wire the person to LLM-extracted facts.
#[serde(default)] #[serde(default)]
pub entity_id: Option<i32>, pub entity_id: Option<i32>,
/// True for the IGNORE / junk bucket. The frontend sets this when
/// lazily creating the Ignored person via the dedicated endpoint;
/// hand-rolled callers leave it false.
#[serde(default)]
pub is_ignored: bool,
} }
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
@@ -210,6 +224,10 @@ pub struct UpdatePersonReq {
pub cover_face_id: Option<i32>, pub cover_face_id: Option<i32>,
#[serde(default)] #[serde(default)]
pub entity_id: Option<i32>, pub entity_id: Option<i32>,
/// Toggle the ignore flag. Mostly used by the UI to "un-ignore" a
/// person that was previously bound to the bucket.
#[serde(default)]
pub is_ignored: Option<bool>,
} }
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
@@ -370,7 +388,15 @@ pub trait FaceDao: Send + Sync {
&mut self, &mut self,
ctx: &opentelemetry::Context, ctx: &opentelemetry::Context,
library_id: Option<i32>, library_id: Option<i32>,
include_ignored: bool,
) -> anyhow::Result<Vec<PersonSummary>>; ) -> anyhow::Result<Vec<PersonSummary>>;
/// Get the IGNORE/junk bucket, creating it lazily on first call.
/// Idempotent — returns the same row across calls. Single global
/// bucket per database; the frontend never sees the literal name.
fn get_or_create_ignored_person(
&mut self,
ctx: &opentelemetry::Context,
) -> anyhow::Result<Person>;
fn update_person( fn update_person(
&mut self, &mut self,
ctx: &opentelemetry::Context, ctx: &opentelemetry::Context,
@@ -940,6 +966,7 @@ impl FaceDao for SqliteFaceDao {
name: req.name.clone(), name: req.name.clone(),
notes: req.notes.clone(), notes: req.notes.clone(),
created_from_tag: from_tag, created_from_tag: from_tag,
is_ignored: req.is_ignored,
created_at: now, created_at: now,
updated_at: now, updated_at: now,
}; };
@@ -967,6 +994,56 @@ impl FaceDao for SqliteFaceDao {
}) })
} }
fn get_or_create_ignored_person(
&mut self,
ctx: &opentelemetry::Context,
) -> anyhow::Result<Person> {
// Fast path: there's already an is_ignored row → return it.
// Slow path on first use: create one with a stable display name
// ("Ignored"). Race-safe because the UNIQUE(name COLLATE NOCASE)
// index forces only one ever to exist (we trip and look up).
{
let mut conn = self.connection.lock().expect("face dao lock");
if let Some(p) = persons::table
.filter(persons::is_ignored.eq(true))
.order(persons::id.asc())
.first::<Person>(conn.deref_mut())
.optional()
.with_context(|| "lookup ignored person")?
{
return Ok(p);
}
}
// Drop the lock before delegating to create_person — that
// method takes its own lock.
match self.create_person(
ctx,
&CreatePersonReq {
name: "Ignored".to_string(),
notes: Some(
"Bucket for strangers, false detections, and faces \
you don't want bound to a real person."
.to_string(),
),
entity_id: None,
is_ignored: true,
},
/*from_tag*/ false,
) {
Ok(p) => Ok(p),
Err(e) if is_unique_violation(&e) => {
// Race: someone else created the row. Re-read.
let mut conn = self.connection.lock().expect("face dao lock");
persons::table
.filter(persons::is_ignored.eq(true))
.order(persons::id.asc())
.first::<Person>(conn.deref_mut())
.with_context(|| "load ignored person after race")
}
Err(e) => Err(e),
}
}
fn get_person( fn get_person(
&mut self, &mut self,
ctx: &opentelemetry::Context, ctx: &opentelemetry::Context,
@@ -987,6 +1064,7 @@ impl FaceDao for SqliteFaceDao {
&mut self, &mut self,
ctx: &opentelemetry::Context, ctx: &opentelemetry::Context,
library_id: Option<i32>, library_id: Option<i32>,
include_ignored: bool,
) -> anyhow::Result<Vec<PersonSummary>> { ) -> anyhow::Result<Vec<PersonSummary>> {
let mut conn = self.connection.lock().expect("face dao lock"); let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "list_persons", |_| { trace_db_call(ctx, "query", "list_persons", |_| {
@@ -994,7 +1072,15 @@ impl FaceDao for SqliteFaceDao {
// query for face counts. Using a LEFT JOIN + GROUP BY in // query for face counts. Using a LEFT JOIN + GROUP BY in
// Diesel here gets noisy with the optional library filter; a // Diesel here gets noisy with the optional library filter; a
// second roundtrip is cheap and clearer. // second roundtrip is cheap and clearer.
let person_rows: Vec<Person> = persons::table let mut person_query = persons::table.into_boxed();
if !include_ignored {
// Default — hide the IGNORE/junk bucket from the list.
// The frontend asks include_ignored=true explicitly when
// it needs to surface ignored persons (e.g. a "show
// ignored" toggle in the management UI).
person_query = person_query.filter(persons::is_ignored.eq(false));
}
let person_rows: Vec<Person> = person_query
.order(persons::name.asc()) .order(persons::name.asc())
.load::<Person>(conn.deref_mut()) .load::<Person>(conn.deref_mut())
.with_context(|| "load persons")?; .with_context(|| "load persons")?;
@@ -1045,6 +1131,7 @@ impl FaceDao for SqliteFaceDao {
entity_id: p.entity_id, entity_id: p.entity_id,
created_from_tag: p.created_from_tag, created_from_tag: p.created_from_tag,
notes: p.notes, notes: p.notes,
is_ignored: p.is_ignored,
face_count, face_count,
} }
}) })
@@ -1092,6 +1179,12 @@ impl FaceDao for SqliteFaceDao {
.execute(conn.deref_mut()) .execute(conn.deref_mut())
.with_context(|| "update person entity_id")?; .with_context(|| "update person entity_id")?;
} }
if let Some(flag) = patch.is_ignored {
diesel::update(persons::table.find(id))
.set((persons::is_ignored.eq(flag), persons::updated_at.eq(now)))
.execute(conn.deref_mut())
.with_context(|| "update person is_ignored")?;
}
persons::table persons::table
.find(id) .find(id)
.first::<Person>(conn.deref_mut()) .first::<Person>(conn.deref_mut())
@@ -1216,9 +1309,15 @@ impl FaceDao for SqliteFaceDao {
let placeholders = std::iter::repeat_n("?", names.len()) let placeholders = std::iter::repeat_n("?", names.len())
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join(","); .join(",");
// Filter out is_ignored persons so the auto-bind path can
// never target the IGNORE/junk bucket — even if a tag name
// happens to match it (e.g. someone tags photos as "Ignored"
// by hand). Ignore-bucket assignment is an explicit operator
// action through the dedicated endpoint, never a heuristic.
let sql = format!( let sql = format!(
"SELECT id, LOWER(name) AS lower_name FROM persons \ "SELECT id, LOWER(name) AS lower_name FROM persons \
WHERE LOWER(name) IN ({}) ORDER BY id ASC", WHERE is_ignored = 0 AND LOWER(name) IN ({}) \
ORDER BY id ASC",
placeholders placeholders
); );
#[derive(QueryableByName)] #[derive(QueryableByName)]
@@ -1404,6 +1503,10 @@ where
web::resource("/persons/bootstrap") web::resource("/persons/bootstrap")
.route(web::post().to(bootstrap_persons_handler::<D>)), .route(web::post().to(bootstrap_persons_handler::<D>)),
) )
.service(
web::resource("/persons/ignore-bucket")
.route(web::post().to(ignore_bucket_handler::<D>)),
)
.service( .service(
web::resource("/tags/people-bootstrap-candidates") web::resource("/tags/people-bootstrap-candidates")
.route(web::get().to(bootstrap_candidates_handler::<D>)), .route(web::get().to(bootstrap_candidates_handler::<D>)),
@@ -1731,6 +1834,7 @@ async fn bootstrap_persons_handler<D: FaceDao>(
name: trimmed.to_string(), name: trimmed.to_string(),
notes: None, notes: None,
entity_id: None, entity_id: None,
is_ignored: false,
}, },
/*from_tag*/ true, /*from_tag*/ true,
) { ) {
@@ -1762,6 +1866,17 @@ pub struct LibraryQuery {
pub library: Option<String>, pub library: Option<String>,
} }
/// `GET /persons` query: optional library scope, optional include of
/// the IGNORE/junk bucket. The bucket is hidden by default so the
/// management UI shows only "real" persons; the persons-management
/// screen requests it explicitly when it needs to surface ignored.
#[derive(Deserialize)]
pub struct ListPersonsQuery {
pub library: Option<String>,
#[serde(default)]
pub include_ignored: bool,
}
async fn stats_handler<D: FaceDao>( async fn stats_handler<D: FaceDao>(
_: Claims, _: Claims,
request: HttpRequest, request: HttpRequest,
@@ -2047,7 +2162,7 @@ async fn list_persons_handler<D: FaceDao>(
_: Claims, _: Claims,
request: HttpRequest, request: HttpRequest,
app_state: web::Data<AppState>, app_state: web::Data<AppState>,
query: web::Query<LibraryQuery>, query: web::Query<ListPersonsQuery>,
face_dao: web::Data<Mutex<D>>, face_dao: web::Data<Mutex<D>>,
) -> impl Responder { ) -> impl Responder {
let context = extract_context_from_request(&request); let context = extract_context_from_request(&request);
@@ -2059,7 +2174,21 @@ async fn list_persons_handler<D: FaceDao>(
.flatten() .flatten()
.map(|l| l.id); .map(|l| l.id);
let mut dao = face_dao.lock().expect("face dao lock"); let mut dao = face_dao.lock().expect("face dao lock");
dao.list_persons(&span_context, library_id) dao.list_persons(&span_context, library_id, query.include_ignored)
.map(|p| HttpResponse::Ok().json(p))
.into_http_internal_err()
}
async fn ignore_bucket_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("persons.ignore_bucket", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let mut dao = face_dao.lock().expect("face dao lock");
dao.get_or_create_ignored_person(&span_context)
.map(|p| HttpResponse::Ok().json(p)) .map(|p| HttpResponse::Ok().json(p))
.into_http_internal_err() .into_http_internal_err()
} }
@@ -2299,6 +2428,7 @@ mod tests {
name: "Cameron".into(), name: "Cameron".into(),
notes: None, notes: None,
entity_id: None, entity_id: None,
is_ignored: false,
}, },
false, false,
) )
@@ -2310,6 +2440,7 @@ mod tests {
name: "Cameron".into(), name: "Cameron".into(),
notes: None, notes: None,
entity_id: None, entity_id: None,
is_ignored: false,
}, },
false, false,
) )
@@ -2457,6 +2588,7 @@ mod tests {
name: "Alice".into(), name: "Alice".into(),
notes: None, notes: None,
entity_id: None, entity_id: None,
is_ignored: false,
}, },
false, false,
) )
@@ -2468,6 +2600,7 @@ mod tests {
name: "Bob".into(), name: "Bob".into(),
notes: None, notes: None,
entity_id: None, entity_id: None,
is_ignored: false,
}, },
false, false,
) )
@@ -2508,6 +2641,7 @@ mod tests {
name: "Subject".into(), name: "Subject".into(),
notes: None, notes: None,
entity_id: None, entity_id: None,
is_ignored: false,
}, },
false, false,
) )
@@ -2580,6 +2714,7 @@ mod tests {
name: "Cover".into(), name: "Cover".into(),
notes: None, notes: None,
entity_id: None, entity_id: None,
is_ignored: false,
}, },
false, false,
) )
@@ -2648,6 +2783,7 @@ mod tests {
name: "Alice".into(), name: "Alice".into(),
notes: Some("the boss".into()), notes: Some("the boss".into()),
entity_id: None, entity_id: None,
is_ignored: false,
}, },
false, false,
) )
@@ -2665,6 +2801,7 @@ mod tests {
name: "alice".into(), name: "alice".into(),
notes: None, notes: None,
entity_id: None, entity_id: None,
is_ignored: false,
}, },
false, false,
); );
@@ -2682,6 +2819,7 @@ mod tests {
notes: Some("a new note".into()), notes: Some("a new note".into()),
cover_face_id: None, cover_face_id: None,
entity_id: None, entity_id: None,
is_ignored: None,
}, },
) )
.expect("update"); .expect("update");
@@ -2689,11 +2827,48 @@ mod tests {
assert!(updated.updated_at >= prev_updated); assert!(updated.updated_at >= prev_updated);
// List + delete. // List + delete.
let listed = dao.list_persons(&ctx(), None).expect("list"); let listed = dao.list_persons(&ctx(), None, false).expect("list");
assert_eq!(listed.len(), 1); assert_eq!(listed.len(), 1);
assert_eq!(listed[0].face_count, 0); assert_eq!(listed[0].face_count, 0);
assert!(dao.delete_person(&ctx(), p.id, false).expect("delete")); assert!(dao.delete_person(&ctx(), p.id, false).expect("delete"));
assert!(dao.list_persons(&ctx(), None).expect("list").is_empty()); assert!(
dao.list_persons(&ctx(), None, false)
.expect("list")
.is_empty()
);
}
#[test]
fn ignore_bucket_idempotent_and_filters_auto_bind() {
// First call creates the bucket; second returns the same row.
// Once it exists, find_persons_by_names_ci must skip it even if
// the search term matches its name — the auto-bind path must
// NEVER target the IGNORE/junk bucket.
let mut dao = fresh_dao();
let first = dao
.get_or_create_ignored_person(&ctx())
.expect("create bucket");
assert!(first.is_ignored);
let second = dao
.get_or_create_ignored_person(&ctx())
.expect("re-fetch bucket");
assert_eq!(first.id, second.id, "bucket must be idempotent");
// Searching by the bucket's name must return nothing — the
// auto-bind look-up filters is_ignored=true.
let m = dao
.find_persons_by_names_ci(&ctx(), &["ignored".into()])
.expect("name lookup");
assert!(
!m.contains_key("ignored"),
"find_persons_by_names_ci must skip the ignore bucket: {m:?}"
);
// Default list_persons hides it; include_ignored=true surfaces it.
let visible = dao.list_persons(&ctx(), None, false).expect("list");
assert!(visible.iter().all(|p| !p.is_ignored));
let all = dao.list_persons(&ctx(), None, true).expect("list all");
assert!(all.iter().any(|p| p.is_ignored && p.id == first.id));
} }
#[test] #[test]
@@ -2754,6 +2929,7 @@ mod tests {
name: "Alice".into(), name: "Alice".into(),
notes: None, notes: None,
entity_id: None, entity_id: None,
is_ignored: false,
}, },
false, false,
) )
@@ -2765,6 +2941,7 @@ mod tests {
name: "Alyse".into(), name: "Alyse".into(),
notes: Some("dup of alice".into()), notes: Some("dup of alice".into()),
entity_id: None, entity_id: None,
is_ignored: false,
}, },
false, false,
) )