Files
ImageApi/src/faces.rs
Cameron Cordes 1d9b9a0bc4 faces: avoid 40 MB row clone in /faces/embeddings
list_embeddings cloned the full FaceDetectionRow inside the filter_map
just to pair it with the base64-encoded embedding. The 2 KB BLOB was
already on the row — at 20k unassigned faces that's 40 MB of pointless
heap traffic per Apollo cluster-suggest run. Move the bytes out via
Option::take() so the row drops the BLOB instead of duplicating it.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 19:00:55 -04:00

3527 lines
138 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Local face recognition: data layer + HTTP surface.
//!
//! Phase 2 ships the persistence model and the manual CRUD endpoints; the
//! file-watch hook that drives automatic detection lives in `process_new_files`
//! (Phase 3) and is not registered yet. Inference is delegated to Apollo over
//! HTTP via [`crate::ai::face_client`]; this module never imports onnxruntime.
//!
//! Data model:
//! - `persons` are visual identities (the "who" of a face).
//! - `face_detections` rows are either real detections (`status='detected'`)
//! or markers (`status='no_faces' | 'failed'`). Both are keyed on
//! `content_hash` so the same JPEG in two libraries is scanned once.
//! - The `(library_id, rel_path)` pair is the *display* lookup; we resolve
//! it through `image_exif.content_hash` on every read so renames don't
//! strand face rows.
//!
//! The `FaceDao` trait abstracts persistence; `SqliteFaceDao` is the
//! production impl. The Phase 2 endpoints use it directly. A test impl
//! (in-memory) lives at the bottom of the module behind `#[cfg(test)]`.
use crate::Claims;
use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError};
use crate::database::schema::{face_detections, image_exif, persons};
use crate::error::IntoHttpError;
use crate::exif;
use crate::file_types;
use crate::libraries::{self, Library};
use crate::otel::{extract_context_from_request, global_tracer, trace_db_call};
use crate::state::AppState;
use crate::utils::normalize_path;
use crate::{ThumbnailRequest, connect};
use actix_web::dev::{ServiceFactory, ServiceRequest};
use actix_web::{App, HttpRequest, HttpResponse, Responder, web};
use anyhow::{Context, anyhow};
use chrono::Utc;
use diesel::prelude::*;
use image::GenericImageView;
use log::{info, warn};
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
use serde::{Deserialize, Serialize};
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};
// ── Wire types ──────────────────────────────────────────────────────────────
/// Visual identity. The optional `entity_id` bridges this person to an
/// LLM-extracted knowledge-graph entity (textual side). Persons are NOT
/// auto-bridged at creation — only when the user explicitly links them in
/// the management UI, or when bootstrap finds an exact-name match.
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct Person {
pub id: i32,
pub name: String,
pub cover_face_id: Option<i32>,
pub entity_id: Option<i32>,
pub created_from_tag: bool,
pub notes: Option<String>,
pub created_at: i64,
pub updated_at: i64,
/// True for the IGNORE / junk bucket. Hidden from the default
/// persons list, skipped by `find_persons_by_names_ci` (so a tag
/// match can never auto-bind a real face into the ignore bucket),
/// and excluded from cluster suggestions because cluster-suggest
/// already filters by `person_id IS NULL` and ignored faces have
/// a non-null person_id.
pub is_ignored: bool,
}
#[derive(Insertable, Debug)]
#[diesel(table_name = persons)]
struct InsertPerson {
name: String,
notes: Option<String>,
created_from_tag: bool,
is_ignored: bool,
created_at: i64,
updated_at: i64,
}
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct FaceDetectionRow {
pub id: i32,
pub library_id: i32,
pub content_hash: String,
pub rel_path: String,
pub bbox_x: Option<f32>,
pub bbox_y: Option<f32>,
pub bbox_w: Option<f32>,
pub bbox_h: Option<f32>,
/// Skip on the wire — clients call /faces/embeddings explicitly when
/// they need it. Saves ~2 KB per face on every list response.
#[serde(skip_serializing)]
pub embedding: Option<Vec<u8>>,
pub confidence: Option<f32>,
pub source: String,
pub person_id: Option<i32>,
pub status: String,
pub model_version: String,
pub created_at: i64,
}
/// SQL fragment restricting an `image_exif.rel_path` (or `face_detections.rel_path`)
/// column to image extensions. Videos register in `image_exif` with a
/// populated `content_hash` but can never produce a `face_detections` row
/// — applying this filter at query time keeps videos out of the per-tick
/// backlog drain (which would otherwise loop forever — `filter_excluded`
/// drops them client-side without writing a marker) and out of the SCANNED
/// stat denominator (so 100% is reachable).
fn image_path_predicate(col: &str) -> String {
let clauses: Vec<String> = file_types::IMAGE_EXTENSIONS
.iter()
.map(|ext| format!("lower({col}) LIKE '%.{ext}'"))
.collect();
format!("({})", clauses.join(" OR "))
}
/// Row shape for `list_unscanned_candidates`'s raw SQL. Diesel's
/// `sql_query` requires a `QueryableByName` row type with explicit
/// column SQL types; using a tuple isn't supported.
#[derive(diesel::QueryableByName, Debug)]
struct CountRow {
#[diesel(sql_type = diesel::sql_types::BigInt)]
count: i64,
}
#[derive(diesel::QueryableByName, Debug)]
struct UnscannedRow {
#[diesel(sql_type = diesel::sql_types::Text)]
rel_path: String,
#[diesel(sql_type = diesel::sql_types::Text)]
content_hash: String,
}
#[derive(Insertable, Debug)]
#[diesel(table_name = face_detections)]
struct InsertFaceDetection {
library_id: i32,
content_hash: String,
rel_path: String,
bbox_x: Option<f32>,
bbox_y: Option<f32>,
bbox_w: Option<f32>,
bbox_h: Option<f32>,
embedding: Option<Vec<u8>>,
confidence: Option<f32>,
source: String,
person_id: Option<i32>,
status: String,
model_version: String,
created_at: i64,
}
/// Build a [`FaceWithPerson`] from a freshly-mutated row by resolving the
/// person name via [`FaceDao::get_person`]. Used by `create_face_handler`
/// and `update_face_handler` so PATCH/POST responses match the join shape
/// `/image/faces` returns — without this the carousel overlay's
/// optimistic-replace would clobber the rendered name (the bare
/// [`FaceDetectionRow`] doesn't carry it).
fn hydrate_face_with_person<D: FaceDao>(
dao: &mut D,
ctx: &opentelemetry::Context,
row: FaceDetectionRow,
) -> anyhow::Result<FaceWithPerson> {
let person_name = match row.person_id {
Some(pid) => dao.get_person(ctx, pid)?.map(|p| p.name),
None => None,
};
Ok(FaceWithPerson {
id: row.id,
bbox_x: row.bbox_x.unwrap_or(0.0),
bbox_y: row.bbox_y.unwrap_or(0.0),
bbox_w: row.bbox_w.unwrap_or(0.0),
bbox_h: row.bbox_h.unwrap_or(0.0),
confidence: row.confidence.unwrap_or(0.0),
source: row.source,
person_id: row.person_id,
person_name,
model_version: row.model_version,
})
}
/// Face row decorated with its assigned person's name. Returned by
/// `/image/faces` for the rendering side (carousel overlay, person chips).
#[derive(Serialize, Debug, Clone)]
pub struct FaceWithPerson {
pub id: i32,
pub bbox_x: f32,
pub bbox_y: f32,
pub bbox_w: f32,
pub bbox_h: f32,
pub confidence: f32,
pub source: String,
pub person_id: Option<i32>,
pub person_name: Option<String>,
pub model_version: String,
}
/// Face row plus the photo it lives on. Powers the per-person photo grid
/// (`GET /persons/{id}/faces`) and unassigned-cluster surfacing in Apollo.
#[derive(Serialize, Debug, Clone)]
pub struct FaceWithPath {
pub id: i32,
pub library_id: i32,
pub rel_path: String,
pub bbox_x: f32,
pub bbox_y: f32,
pub bbox_w: f32,
pub bbox_h: f32,
pub confidence: f32,
pub person_id: Option<i32>,
pub model_version: String,
}
/// Embedding-bearing face row. Returned by `/faces/embeddings` for Apollo's
/// clustering layer; embedding is base64-encoded so the JSON payload is
/// self-contained (Apollo's DBSCAN runs over numpy arrays decoded from this).
#[derive(Serialize, Debug, Clone)]
pub struct FaceEmbeddingRow {
pub id: i32,
pub library_id: i32,
pub rel_path: String,
pub content_hash: String,
pub person_id: Option<i32>,
pub model_version: String,
/// base64 of 2048 bytes (512×f32 LE).
pub embedding: String,
/// Normalized bbox 0..1, included so the cluster suggester UI can
/// crop a face thumbnail without an extra round-trip per cluster.
/// Shouldn't be NULL for `status='detected'` rows (CHECK constraint
/// in the migration), but the DB type is nullable so we mirror it.
pub bbox_x: Option<f32>,
pub bbox_y: Option<f32>,
pub bbox_w: Option<f32>,
pub bbox_h: Option<f32>,
}
#[derive(Serialize, Debug, Default)]
pub struct FaceStats {
pub library_id: Option<i32>,
pub total_photos: i64,
pub scanned: i64,
pub with_faces: i64,
pub no_faces: i64,
pub failed: i64,
pub persons_count: i64,
pub unassigned_faces: i64,
}
#[derive(Serialize, Debug, Clone)]
pub struct PersonSummary {
pub id: i32,
pub name: String,
pub cover_face_id: Option<i32>,
pub entity_id: Option<i32>,
pub created_from_tag: bool,
pub notes: Option<String>,
pub is_ignored: bool,
pub face_count: i64,
}
// ── Request bodies ──────────────────────────────────────────────────────────
#[derive(Deserialize, Debug)]
pub struct CreatePersonReq {
pub name: String,
#[serde(default)]
pub notes: Option<String>,
/// Optional bridge to an existing entity. NULL/missing leaves it
/// unbridged; set explicitly to wire the person to LLM-extracted facts.
#[serde(default)]
pub entity_id: Option<i32>,
/// True for the IGNORE / junk bucket. The frontend sets this when
/// lazily creating the Ignored person via the dedicated endpoint;
/// hand-rolled callers leave it false.
#[serde(default)]
pub is_ignored: bool,
}
#[derive(Deserialize, Debug)]
pub struct UpdatePersonReq {
#[serde(default)]
pub name: Option<String>,
#[serde(default)]
pub notes: Option<String>,
#[serde(default)]
pub cover_face_id: Option<i32>,
#[serde(default)]
pub entity_id: Option<i32>,
/// Toggle the ignore flag. Mostly used by the UI to "un-ignore" a
/// person that was previously bound to the bucket.
#[serde(default)]
pub is_ignored: Option<bool>,
}
#[derive(Deserialize, Debug)]
pub struct MergePersonsReq {
/// Person id to merge *into*. The source (`{id}` in the path) is
/// re-pointed to this id, then deleted.
pub into: i32,
}
#[derive(Deserialize, Debug)]
pub struct DeletePersonQuery {
/// `set_null` (default) leaves face rows orphaned (person_id NULL);
/// `delete` cascades through and removes the face rows entirely.
/// Default is set_null because deleting the person almost never means
/// "delete every photo of them ever existed."
#[serde(default)]
pub cascade: Option<String>,
}
#[derive(Deserialize, Debug)]
pub struct CreateFaceReq {
/// Photo path (library-relative). Resolved to content_hash via
/// image_exif before any face row is inserted.
pub path: String,
pub library: Option<i32>,
pub bbox: BboxReq,
/// Optional initial person assignment. Use this when the user draws a
/// box and immediately picks a name from the autocomplete.
#[serde(default)]
pub person_id: Option<i32>,
/// Skip the embedding step. Set when the user wants to tag a region
/// the detector can't find a face in (back of head, profile partly
/// occluded, etc.). The row is stored with a zero-vector embedding,
/// which the cluster suggester filters on `norm <= 0` and auto-bind
/// cosine resolves to 0 against — so the row participates only as a
/// browse-by-person tag, not in similarity matching. The frontend
/// only sets this after a 422 from a strict create plus an explicit
/// operator confirmation.
#[serde(default)]
pub force: bool,
}
#[derive(Deserialize, Debug)]
pub struct BboxReq {
pub x: f32,
pub y: f32,
pub w: f32,
pub h: f32,
}
#[derive(Deserialize, Debug)]
pub struct UpdateFaceReq {
/// `null` literally clears the assignment; missing leaves it alone.
/// Distinguish via `Option<Option<…>>` is tricky in serde without
/// custom deserialization; encode "clear" as `clear_person: true`
/// instead.
#[serde(default)]
pub person_id: Option<i32>,
#[serde(default)]
pub clear_person: bool,
#[serde(default)]
pub bbox: Option<BboxReq>,
}
#[derive(Deserialize, Debug)]
pub struct EmbeddingsQuery {
pub library: Option<i32>,
/// Default true — clustering only cares about unassigned faces. Set
/// false to dump all embeddings (e.g. for re-clustering everything).
#[serde(default = "default_unassigned")]
pub unassigned: bool,
#[serde(default = "default_embeddings_limit")]
pub limit: i64,
#[serde(default)]
pub offset: i64,
}
fn default_unassigned() -> bool {
true
}
fn default_embeddings_limit() -> i64 {
500
}
// ── DAO trait ───────────────────────────────────────────────────────────────
// File-watch hook (Phase 3) and the rerun handler (Phase 6) consume the
// methods the Phase 2 routes don't. Allow dead_code on the trait so we
// don't have to sprinkle attributes on every method that's wired up later.
#[allow(dead_code)]
pub trait FaceDao: Send + Sync {
fn already_scanned(
&mut self,
ctx: &opentelemetry::Context,
content_hash: &str,
) -> anyhow::Result<bool>;
/// Find image_exif rows in `library_id` that have a populated
/// content_hash but no matching face_detections row yet. Used by
/// the watcher's quick-scan path to drain the backlog without
/// re-walking the filesystem. Returns `(rel_path, content_hash)`
/// pairs, capped at `limit`. Distinct on content_hash so the same
/// hash that lives at multiple rel_paths only fires one detection.
fn list_unscanned_candidates(
&mut self,
ctx: &opentelemetry::Context,
library_id: i32,
limit: i64,
) -> anyhow::Result<Vec<(String, String)>>;
fn store_detection(
&mut self,
ctx: &opentelemetry::Context,
row: InsertFaceDetectionInput,
) -> anyhow::Result<FaceDetectionRow>;
fn mark_status(
&mut self,
ctx: &opentelemetry::Context,
library_id: i32,
content_hash: &str,
rel_path: &str,
status: &str,
model_version: &str,
) -> anyhow::Result<()>;
fn list_for_content_hash(
&mut self,
ctx: &opentelemetry::Context,
content_hash: &str,
) -> anyhow::Result<Vec<FaceWithPerson>>;
fn list_for_person(
&mut self,
ctx: &opentelemetry::Context,
person_id: i32,
library_id: Option<i32>,
) -> anyhow::Result<Vec<FaceWithPath>>;
fn list_embeddings(
&mut self,
ctx: &opentelemetry::Context,
library_id: Option<i32>,
unassigned: bool,
limit: i64,
offset: i64,
) -> anyhow::Result<Vec<(FaceDetectionRow, String)>>;
fn get_face(
&mut self,
ctx: &opentelemetry::Context,
id: i32,
) -> anyhow::Result<Option<FaceDetectionRow>>;
fn update_face(
&mut self,
ctx: &opentelemetry::Context,
id: i32,
person_id: Option<Option<i32>>, // None=leave; Some(None)=clear; Some(Some(id))=set
bbox: Option<(f32, f32, f32, f32)>,
embedding: Option<Vec<u8>>,
) -> anyhow::Result<FaceDetectionRow>;
fn delete_face(&mut self, ctx: &opentelemetry::Context, id: i32) -> anyhow::Result<bool>;
fn delete_auto_for_hash(
&mut self,
ctx: &opentelemetry::Context,
content_hash: &str,
) -> anyhow::Result<usize>;
fn stats(
&mut self,
ctx: &opentelemetry::Context,
library_id: Option<i32>,
) -> anyhow::Result<FaceStats>;
// ── Persons ─────────────────────────────────────────────────────────
fn create_person(
&mut self,
ctx: &opentelemetry::Context,
req: &CreatePersonReq,
from_tag: bool,
) -> anyhow::Result<Person>;
fn get_person(
&mut self,
ctx: &opentelemetry::Context,
id: i32,
) -> anyhow::Result<Option<Person>>;
fn list_persons(
&mut self,
ctx: &opentelemetry::Context,
library_id: Option<i32>,
include_ignored: bool,
) -> anyhow::Result<Vec<PersonSummary>>;
/// Get the IGNORE/junk bucket, creating it lazily on first call.
/// Idempotent — returns the same row across calls. Single global
/// bucket per database; the frontend never sees the literal name.
fn get_or_create_ignored_person(
&mut self,
ctx: &opentelemetry::Context,
) -> anyhow::Result<Person>;
fn update_person(
&mut self,
ctx: &opentelemetry::Context,
id: i32,
patch: &UpdatePersonReq,
) -> anyhow::Result<Person>;
/// Delete a person. `cascade=true` removes face rows; otherwise the
/// rows have their `person_id` set NULL by the FK constraint.
fn delete_person(
&mut self,
ctx: &opentelemetry::Context,
id: i32,
cascade_delete_faces: bool,
) -> anyhow::Result<bool>;
fn merge_persons(
&mut self,
ctx: &opentelemetry::Context,
src: i32,
into: i32,
) -> anyhow::Result<Person>;
/// Resolve `(library_id, rel_path)` → `content_hash` via image_exif.
/// Returns None when the photo hasn't been EXIF-indexed yet (no row
/// in image_exif) or when the row exists but content_hash is NULL.
fn resolve_content_hash(
&mut self,
ctx: &opentelemetry::Context,
library_id: i32,
rel_path: &str,
) -> anyhow::Result<Option<String>>;
// ── Auto-bind support (Phase 4) ─────────────────────────────────────
/// Map case-insensitive person names → person id. Used by the
/// auto-bind path to look up "is this tag a known person?". Names
/// passed in are matched LOWER(persons.name); collisions resolve to
/// the person with the lowest id (stable, but the UNIQUE constraint
/// on persons.name COLLATE NOCASE prevents collisions in practice).
fn find_persons_by_names_ci(
&mut self,
ctx: &opentelemetry::Context,
names: &[String],
) -> anyhow::Result<std::collections::HashMap<String, i32>>;
/// Mean of a person's existing face embeddings. Returns the L2-
/// normalized 512-d reference vector, or None when the person has
/// no detected faces yet (auto-bind treats that as "first face wins
/// unconditionally"). Filters by the same model_version that produced
/// the candidate embedding so cross-model averaging never happens.
fn person_reference_embedding(
&mut self,
ctx: &opentelemetry::Context,
person_id: i32,
model_version: &str,
) -> anyhow::Result<Option<Vec<f32>>>;
/// Set face_detections.person_id and, when the target person has no
/// cover_face_id yet, set it to this face. One transaction so a
/// half-bound state can't survive a SQLite write error.
fn assign_face_to_person(
&mut self,
ctx: &opentelemetry::Context,
face_id: i32,
person_id: i32,
) -> anyhow::Result<()>;
}
/// Free-standing input struct; the DAO copies it into [`InsertFaceDetection`]
/// so callers don't need to import the diesel-derived insertable.
#[derive(Debug, Clone)]
pub struct InsertFaceDetectionInput {
pub library_id: i32,
pub content_hash: String,
pub rel_path: String,
pub bbox: Option<(f32, f32, f32, f32)>,
pub embedding: Option<Vec<u8>>,
pub confidence: Option<f32>,
pub source: String,
pub person_id: Option<i32>,
pub status: String,
pub model_version: String,
}
// ── SqliteFaceDao impl ──────────────────────────────────────────────────────
pub struct SqliteFaceDao {
connection: Arc<Mutex<SqliteConnection>>,
}
impl SqliteFaceDao {
pub fn new() -> Self {
Self {
connection: Arc::new(Mutex::new(connect())),
}
}
/// Test helper — bind to a pre-built (typically in-memory) connection.
#[cfg(test)]
pub fn from_connection(connection: Arc<Mutex<SqliteConnection>>) -> Self {
Self { connection }
}
}
impl Default for SqliteFaceDao {
fn default() -> Self {
Self::new()
}
}
impl FaceDao for SqliteFaceDao {
fn already_scanned(
&mut self,
ctx: &opentelemetry::Context,
content_hash: &str,
) -> anyhow::Result<bool> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "face_already_scanned", |span| {
span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
face_detections::table
.filter(face_detections::content_hash.eq(content_hash))
.select(face_detections::id)
.first::<i32>(conn.deref_mut())
.optional()
.map(|x| x.is_some())
.with_context(|| "already_scanned query")
})
}
fn list_unscanned_candidates(
&mut self,
ctx: &opentelemetry::Context,
library_id: i32,
limit: i64,
) -> anyhow::Result<Vec<(String, String)>> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "list_unscanned_candidates", |span| {
span.set_attribute(KeyValue::new("library_id", library_id as i64));
// Pick the smallest-id rel_path per content_hash so we don't
// fire multiple detect calls for the same hash if it lives
// under several rel_paths in the same library. The
// anti-join (NOT EXISTS) drains hashes that have no row in
// face_detections at all. The image-extension predicate
// keeps videos out of the candidate set; without it they'd
// be filtered client-side and re-pulled every tick forever
// because no marker row is written for excluded paths.
let ext_predicate = image_path_predicate("rel_path");
let sql = format!(
"SELECT rel_path, content_hash \
FROM image_exif e \
WHERE library_id = ? \
AND content_hash IS NOT NULL \
AND {ext_predicate} \
AND NOT EXISTS ( \
SELECT 1 FROM face_detections f \
WHERE f.content_hash = e.content_hash \
) \
GROUP BY content_hash \
LIMIT ?"
);
let rows: Vec<(String, String)> = diesel::sql_query(sql)
.bind::<diesel::sql_types::Integer, _>(library_id)
.bind::<diesel::sql_types::BigInt, _>(limit)
.load::<UnscannedRow>(conn.deref_mut())
.with_context(|| "list_unscanned_candidates")?
.into_iter()
.map(|r| (r.rel_path, r.content_hash))
.collect();
Ok(rows)
})
}
fn store_detection(
&mut self,
ctx: &opentelemetry::Context,
row: InsertFaceDetectionInput,
) -> anyhow::Result<FaceDetectionRow> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "insert", "store_detection", |span| {
span.set_attribute(KeyValue::new("status", row.status.clone()));
span.set_attribute(KeyValue::new("source", row.source.clone()));
let now = Utc::now().timestamp();
let (bx, by, bw, bh) = match row.bbox {
Some((x, y, w, h)) => (Some(x), Some(y), Some(w), Some(h)),
None => (None, None, None, None),
};
let insert = InsertFaceDetection {
library_id: row.library_id,
content_hash: row.content_hash,
rel_path: row.rel_path,
bbox_x: bx,
bbox_y: by,
bbox_w: bw,
bbox_h: bh,
embedding: row.embedding,
confidence: row.confidence,
source: row.source,
person_id: row.person_id,
status: row.status,
model_version: row.model_version,
created_at: now,
};
diesel::insert_into(face_detections::table)
.values(&insert)
.execute(conn.deref_mut())
.with_context(|| "insert face_detection")?;
define_sql_function! { fn last_insert_rowid() -> diesel::sql_types::Integer; }
let id = diesel::select(last_insert_rowid())
.get_result::<i32>(conn.deref_mut())
.with_context(|| "last_insert_rowid")?;
face_detections::table
.find(id)
.first::<FaceDetectionRow>(conn.deref_mut())
.with_context(|| "fetch inserted face")
})
}
fn mark_status(
&mut self,
ctx: &opentelemetry::Context,
library_id: i32,
content_hash: &str,
rel_path: &str,
status: &str,
model_version: &str,
) -> anyhow::Result<()> {
// Marker rows have NULL bbox + NULL embedding (CHECK enforces
// this). We let the UNIQUE partial index on (content_hash) WHERE
// status='no_faces' guard against double-marking; for 'failed' we
// do a manual exists-check.
let exists = self.already_scanned(ctx, content_hash)?;
if exists {
// Don't write a second marker if any row already exists for
// this hash — that includes detected rows from a prior run
// that succeeded; the file watcher's already_scanned() check
// should have caught this, but stay idempotent.
return Ok(());
}
self.store_detection(
ctx,
InsertFaceDetectionInput {
library_id,
content_hash: content_hash.to_string(),
rel_path: rel_path.to_string(),
bbox: None,
embedding: None,
confidence: None,
source: "auto".to_string(),
person_id: None,
status: status.to_string(),
model_version: model_version.to_string(),
},
)?;
Ok(())
}
fn list_for_content_hash(
&mut self,
ctx: &opentelemetry::Context,
content_hash: &str,
) -> anyhow::Result<Vec<FaceWithPerson>> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "faces_for_hash", |span| {
span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
face_detections::table
.left_join(persons::table.on(persons::id.nullable().eq(face_detections::person_id)))
.filter(face_detections::content_hash.eq(content_hash))
.filter(face_detections::status.eq("detected"))
.select((
face_detections::id,
face_detections::bbox_x,
face_detections::bbox_y,
face_detections::bbox_w,
face_detections::bbox_h,
face_detections::confidence,
face_detections::source,
face_detections::person_id,
persons::name.nullable(),
face_detections::model_version,
))
.load::<(
i32,
Option<f32>,
Option<f32>,
Option<f32>,
Option<f32>,
Option<f32>,
String,
Option<i32>,
Option<String>,
String,
)>(conn.deref_mut())
.with_context(|| "list faces for hash")
.map(|rows| {
rows.into_iter()
.map(|r| FaceWithPerson {
id: r.0,
bbox_x: r.1.unwrap_or(0.0),
bbox_y: r.2.unwrap_or(0.0),
bbox_w: r.3.unwrap_or(0.0),
bbox_h: r.4.unwrap_or(0.0),
confidence: r.5.unwrap_or(0.0),
source: r.6,
person_id: r.7,
person_name: r.8,
model_version: r.9,
})
.collect()
})
})
}
fn list_for_person(
&mut self,
ctx: &opentelemetry::Context,
person_id: i32,
library_id: Option<i32>,
) -> anyhow::Result<Vec<FaceWithPath>> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "faces_for_person", |span| {
span.set_attribute(KeyValue::new("person_id", person_id as i64));
let mut query = face_detections::table
.filter(face_detections::person_id.eq(person_id))
.filter(face_detections::status.eq("detected"))
.into_boxed();
if let Some(lib) = library_id {
query = query.filter(face_detections::library_id.eq(lib));
}
query
.select((
face_detections::id,
face_detections::library_id,
face_detections::rel_path,
face_detections::bbox_x,
face_detections::bbox_y,
face_detections::bbox_w,
face_detections::bbox_h,
face_detections::confidence,
face_detections::person_id,
face_detections::model_version,
))
.load::<(
i32,
i32,
String,
Option<f32>,
Option<f32>,
Option<f32>,
Option<f32>,
Option<f32>,
Option<i32>,
String,
)>(conn.deref_mut())
.with_context(|| "list faces for person")
.map(|rows| {
rows.into_iter()
.map(|r| FaceWithPath {
id: r.0,
library_id: r.1,
rel_path: r.2,
bbox_x: r.3.unwrap_or(0.0),
bbox_y: r.4.unwrap_or(0.0),
bbox_w: r.5.unwrap_or(0.0),
bbox_h: r.6.unwrap_or(0.0),
confidence: r.7.unwrap_or(0.0),
person_id: r.8,
model_version: r.9,
})
.collect()
})
})
}
fn list_embeddings(
&mut self,
ctx: &opentelemetry::Context,
library_id: Option<i32>,
unassigned: bool,
limit: i64,
offset: i64,
) -> anyhow::Result<Vec<(FaceDetectionRow, String)>> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "list_embeddings", |span| {
span.set_attribute(KeyValue::new("limit", limit));
span.set_attribute(KeyValue::new("offset", offset));
let mut query = face_detections::table
.filter(face_detections::status.eq("detected"))
.into_boxed();
if let Some(lib) = library_id {
query = query.filter(face_detections::library_id.eq(lib));
}
if unassigned {
query = query.filter(face_detections::person_id.is_null());
}
let rows = query
.order(face_detections::id.asc())
.limit(limit)
.offset(offset)
.load::<FaceDetectionRow>(conn.deref_mut())
.with_context(|| "list embeddings")?;
// Pair with the base64-encoded embedding string so the handler
// doesn't need to know the wire format. Skip rows with NULL
// embedding (shouldn't happen on detected rows, but defensive).
// `embedding.take()` moves the bytes out of the row so we can
// hand the (now-empty-embedding) row plus the encoded string
// back to the caller without cloning the whole row — at 20k
// rows × 2 KB that clone was 40 MB of pointless heap traffic
// per cluster-suggest run.
use base64::Engine;
Ok(rows
.into_iter()
.filter_map(|mut r| {
let bytes = r.embedding.take()?;
let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
Some((r, b64))
})
.collect())
})
}
fn get_face(
&mut self,
ctx: &opentelemetry::Context,
id: i32,
) -> anyhow::Result<Option<FaceDetectionRow>> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "get_face", |span| {
span.set_attribute(KeyValue::new("id", id as i64));
face_detections::table
.find(id)
.first::<FaceDetectionRow>(conn.deref_mut())
.optional()
.with_context(|| "get_face")
})
}
fn update_face(
&mut self,
ctx: &opentelemetry::Context,
id: i32,
person_id: Option<Option<i32>>,
bbox: Option<(f32, f32, f32, f32)>,
embedding: Option<Vec<u8>>,
) -> anyhow::Result<FaceDetectionRow> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "update", "update_face", |span| {
span.set_attribute(KeyValue::new("id", id as i64));
// Apply patches one at a time so each set() has the right type.
// Diesel's update DSL is type-driven and combining heterogeneous
// optional sets in one statement is awkward.
if let Some(pid) = person_id {
diesel::update(face_detections::table.find(id))
.set(face_detections::person_id.eq(pid))
.execute(conn.deref_mut())
.with_context(|| "update person_id")?;
}
if let Some((x, y, w, h)) = bbox {
diesel::update(face_detections::table.find(id))
.set((
face_detections::bbox_x.eq(x),
face_detections::bbox_y.eq(y),
face_detections::bbox_w.eq(w),
face_detections::bbox_h.eq(h),
))
.execute(conn.deref_mut())
.with_context(|| "update bbox")?;
}
if let Some(emb) = embedding {
diesel::update(face_detections::table.find(id))
.set(face_detections::embedding.eq(emb))
.execute(conn.deref_mut())
.with_context(|| "update embedding")?;
}
face_detections::table
.find(id)
.first::<FaceDetectionRow>(conn.deref_mut())
.with_context(|| "fetch updated face")
})
}
fn delete_face(&mut self, ctx: &opentelemetry::Context, id: i32) -> anyhow::Result<bool> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "delete", "delete_face", |span| {
span.set_attribute(KeyValue::new("id", id as i64));
let n = diesel::delete(face_detections::table.find(id))
.execute(conn.deref_mut())
.with_context(|| "delete face")?;
Ok(n > 0)
})
}
fn delete_auto_for_hash(
&mut self,
ctx: &opentelemetry::Context,
content_hash: &str,
) -> anyhow::Result<usize> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "delete", "delete_auto_for_hash", |span| {
span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
diesel::delete(
face_detections::table
.filter(face_detections::content_hash.eq(content_hash))
.filter(face_detections::source.eq("auto")),
)
.execute(conn.deref_mut())
.with_context(|| "delete auto rows")
})
}
fn stats(
&mut self,
ctx: &opentelemetry::Context,
library_id: Option<i32>,
) -> anyhow::Result<FaceStats> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "face_stats", |span| {
if let Some(lib) = library_id {
span.set_attribute(KeyValue::new("library_id", lib as i64));
}
// Count distinct content_hashes per status by status — one
// hash can have many rows (multiple detected faces) but we
// want it counted once.
let scanned: i64 = {
let mut q = face_detections::table.into_boxed();
if let Some(lib) = library_id {
q = q.filter(face_detections::library_id.eq(lib));
}
q.select(diesel::dsl::count_distinct(face_detections::content_hash))
.first(conn.deref_mut())
.with_context(|| "stats: scanned")?
};
let with_faces: i64 = {
let mut q = face_detections::table
.filter(face_detections::status.eq("detected"))
.into_boxed();
if let Some(lib) = library_id {
q = q.filter(face_detections::library_id.eq(lib));
}
q.select(diesel::dsl::count_distinct(face_detections::content_hash))
.first(conn.deref_mut())
.with_context(|| "stats: with_faces")?
};
let no_faces: i64 = {
let mut q = face_detections::table
.filter(face_detections::status.eq("no_faces"))
.into_boxed();
if let Some(lib) = library_id {
q = q.filter(face_detections::library_id.eq(lib));
}
q.select(diesel::dsl::count_distinct(face_detections::content_hash))
.first(conn.deref_mut())
.with_context(|| "stats: no_faces")?
};
let failed: i64 = {
let mut q = face_detections::table
.filter(face_detections::status.eq("failed"))
.into_boxed();
if let Some(lib) = library_id {
q = q.filter(face_detections::library_id.eq(lib));
}
q.select(diesel::dsl::count_distinct(face_detections::content_hash))
.first(conn.deref_mut())
.with_context(|| "stats: failed")?
};
// Image-extension filter mirrors `list_unscanned_candidates` so
// SCANNED can actually reach 100%: videos sit in `image_exif` but
// never get a `face_detections` row, so counting them here
// permanently caps the percentage below 100%.
//
// Count DISTINCT content_hash (not rows) so the numerator
// (`scanned`, also distinct-content_hash) and denominator live
// in the same domain. Without this, a file present at multiple
// rel_paths or across libraries inflates total_photos by one
// per duplicate row while face_detections — keyed on
// content_hash — counts the bytes once, leaving a permanent
// gap (e.g. 1101/1103 with nothing actually pending). Rows
// with NULL content_hash are excluded; they're held in the
// hash-backfill backlog and counting them would pin the bar
// below 100% for the duration of that backfill.
let total_photos: i64 = {
let ext_predicate = image_path_predicate("rel_path");
let row: CountRow = if let Some(lib) = library_id {
let sql = format!(
"SELECT COUNT(DISTINCT content_hash) AS count FROM image_exif \
WHERE library_id = ? AND content_hash IS NOT NULL AND {ext_predicate}"
);
diesel::sql_query(sql)
.bind::<diesel::sql_types::Integer, _>(lib)
.get_result(conn.deref_mut())
.with_context(|| "stats: total_photos")?
} else {
let sql = format!(
"SELECT COUNT(DISTINCT content_hash) AS count FROM image_exif \
WHERE content_hash IS NOT NULL AND {ext_predicate}"
);
diesel::sql_query(sql)
.get_result(conn.deref_mut())
.with_context(|| "stats: total_photos")?
};
row.count
};
let persons_count: i64 = persons::table
.select(diesel::dsl::count_star())
.first(conn.deref_mut())
.with_context(|| "stats: persons")?;
let unassigned_faces: i64 = {
let mut q = face_detections::table
.filter(face_detections::status.eq("detected"))
.filter(face_detections::person_id.is_null())
.into_boxed();
if let Some(lib) = library_id {
q = q.filter(face_detections::library_id.eq(lib));
}
q.select(diesel::dsl::count_star())
.first(conn.deref_mut())
.with_context(|| "stats: unassigned")?
};
Ok(FaceStats {
library_id,
total_photos,
scanned,
with_faces,
no_faces,
failed,
persons_count,
unassigned_faces,
})
})
}
fn create_person(
&mut self,
ctx: &opentelemetry::Context,
req: &CreatePersonReq,
from_tag: bool,
) -> anyhow::Result<Person> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "insert", "create_person", |span| {
span.set_attribute(KeyValue::new("name", req.name.clone()));
let now = Utc::now().timestamp();
let insert = InsertPerson {
name: req.name.clone(),
notes: req.notes.clone(),
created_from_tag: from_tag,
is_ignored: req.is_ignored,
created_at: now,
updated_at: now,
};
diesel::insert_into(persons::table)
.values(&insert)
.execute(conn.deref_mut())
.with_context(|| format!("insert person {}", req.name))?;
define_sql_function! { fn last_insert_rowid() -> diesel::sql_types::Integer; }
let id = diesel::select(last_insert_rowid())
.get_result::<i32>(conn.deref_mut())
.with_context(|| "last_insert_rowid persons")?;
// Optional entity bridge — do this as a follow-up update so
// schema's UNIQUE(name COLLATE NOCASE) can fire on insert
// before we touch entity_id.
if let Some(entity_id) = req.entity_id {
diesel::update(persons::table.find(id))
.set(persons::entity_id.eq(entity_id))
.execute(conn.deref_mut())
.with_context(|| "set entity_id on new person")?;
}
persons::table
.find(id)
.first::<Person>(conn.deref_mut())
.with_context(|| "fetch new person")
})
}
fn get_or_create_ignored_person(
&mut self,
ctx: &opentelemetry::Context,
) -> anyhow::Result<Person> {
// Fast path: there's already an is_ignored row → return it.
// Slow path on first use: create one with a stable display name
// ("Ignored"). Race-safe because the UNIQUE(name COLLATE NOCASE)
// index forces only one ever to exist (we trip and look up).
{
let mut conn = self.connection.lock().expect("face dao lock");
if let Some(p) = persons::table
.filter(persons::is_ignored.eq(true))
.order(persons::id.asc())
.first::<Person>(conn.deref_mut())
.optional()
.with_context(|| "lookup ignored person")?
{
return Ok(p);
}
}
// Drop the lock before delegating to create_person — that
// method takes its own lock.
match self.create_person(
ctx,
&CreatePersonReq {
name: "Ignored".to_string(),
notes: Some(
"Bucket for strangers, false detections, and faces \
you don't want bound to a real person."
.to_string(),
),
entity_id: None,
is_ignored: true,
},
/*from_tag*/ false,
) {
Ok(p) => Ok(p),
Err(e) if is_unique_violation(&e) => {
// Race: someone else created the row. Re-read.
let mut conn = self.connection.lock().expect("face dao lock");
persons::table
.filter(persons::is_ignored.eq(true))
.order(persons::id.asc())
.first::<Person>(conn.deref_mut())
.with_context(|| "load ignored person after race")
}
Err(e) => Err(e),
}
}
fn get_person(
&mut self,
ctx: &opentelemetry::Context,
id: i32,
) -> anyhow::Result<Option<Person>> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "get_person", |span| {
span.set_attribute(KeyValue::new("id", id as i64));
persons::table
.find(id)
.first::<Person>(conn.deref_mut())
.optional()
.with_context(|| "get_person")
})
}
fn list_persons(
&mut self,
ctx: &opentelemetry::Context,
library_id: Option<i32>,
include_ignored: bool,
) -> anyhow::Result<Vec<PersonSummary>> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "list_persons", |_| {
// Two-step: load all persons, then a single grouped count
// query for face counts. Using a LEFT JOIN + GROUP BY in
// Diesel here gets noisy with the optional library filter; a
// second roundtrip is cheap and clearer.
let mut person_query = persons::table.into_boxed();
if !include_ignored {
// Default — hide the IGNORE/junk bucket from the list.
// The frontend asks include_ignored=true explicitly when
// it needs to surface ignored persons (e.g. a "show
// ignored" toggle in the management UI).
person_query = person_query.filter(persons::is_ignored.eq(false));
}
let person_rows: Vec<Person> = person_query
.order(persons::name.asc())
.load::<Person>(conn.deref_mut())
.with_context(|| "load persons")?;
// Diesel's BoxedSelectStatement + group_by trips the trait
// resolver into recursion, so this aggregation goes through
// sql_query. The shape is small and the bind list is at most
// one parameter — readability isn't really worse than the DSL.
let counts: Vec<(i32, i64)> = {
use diesel::sql_types::*;
#[derive(QueryableByName)]
struct PersonCountRow {
#[diesel(sql_type = Integer)]
person_id: i32,
#[diesel(sql_type = BigInt)]
count: i64,
}
let sql = if library_id.is_some() {
"SELECT person_id, COUNT(*) AS count FROM face_detections \
WHERE status='detected' AND person_id IS NOT NULL AND library_id = ? \
GROUP BY person_id"
} else {
"SELECT person_id, COUNT(*) AS count FROM face_detections \
WHERE status='detected' AND person_id IS NOT NULL \
GROUP BY person_id"
};
let mut q = diesel::sql_query(sql).into_boxed();
if let Some(lib) = library_id {
q = q.bind::<Integer, _>(lib);
}
q.load::<PersonCountRow>(conn.deref_mut())
.with_context(|| "person face counts")?
.into_iter()
.map(|r| (r.person_id, r.count))
.collect()
};
use std::collections::HashMap;
let count_map: HashMap<i32, i64> = counts.into_iter().collect();
Ok(person_rows
.into_iter()
.map(|p| {
let face_count = count_map.get(&p.id).copied().unwrap_or(0);
PersonSummary {
id: p.id,
name: p.name,
cover_face_id: p.cover_face_id,
entity_id: p.entity_id,
created_from_tag: p.created_from_tag,
notes: p.notes,
is_ignored: p.is_ignored,
face_count,
}
})
.collect())
})
}
fn update_person(
&mut self,
ctx: &opentelemetry::Context,
id: i32,
patch: &UpdatePersonReq,
) -> anyhow::Result<Person> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "update", "update_person", |span| {
span.set_attribute(KeyValue::new("id", id as i64));
let now = Utc::now().timestamp();
// Apply each patched column individually for the same
// reason as update_face — heterogeneous optional sets are
// painful in Diesel's type-driven update DSL.
if let Some(name) = &patch.name {
diesel::update(persons::table.find(id))
.set((persons::name.eq(name), persons::updated_at.eq(now)))
.execute(conn.deref_mut())
.with_context(|| "update person name")?;
}
if let Some(notes) = &patch.notes {
diesel::update(persons::table.find(id))
.set((persons::notes.eq(notes), persons::updated_at.eq(now)))
.execute(conn.deref_mut())
.with_context(|| "update person notes")?;
}
if let Some(cover) = patch.cover_face_id {
diesel::update(persons::table.find(id))
.set((
persons::cover_face_id.eq(cover),
persons::updated_at.eq(now),
))
.execute(conn.deref_mut())
.with_context(|| "update person cover")?;
}
if let Some(eid) = patch.entity_id {
diesel::update(persons::table.find(id))
.set((persons::entity_id.eq(eid), persons::updated_at.eq(now)))
.execute(conn.deref_mut())
.with_context(|| "update person entity_id")?;
}
if let Some(flag) = patch.is_ignored {
diesel::update(persons::table.find(id))
.set((persons::is_ignored.eq(flag), persons::updated_at.eq(now)))
.execute(conn.deref_mut())
.with_context(|| "update person is_ignored")?;
}
persons::table
.find(id)
.first::<Person>(conn.deref_mut())
.with_context(|| "fetch updated person")
})
}
fn delete_person(
&mut self,
ctx: &opentelemetry::Context,
id: i32,
cascade_delete_faces: bool,
) -> anyhow::Result<bool> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "delete", "delete_person", |span| {
span.set_attribute(KeyValue::new("id", id as i64));
span.set_attribute(KeyValue::new("cascade", cascade_delete_faces));
if cascade_delete_faces {
diesel::delete(face_detections::table.filter(face_detections::person_id.eq(id)))
.execute(conn.deref_mut())
.with_context(|| "cascade delete faces for person")?;
}
// Always clear cover_face_id pointers that referenced this
// person's faces (otherwise the FK from persons.cover_face_id
// could hang). cover_face_id has no FK constraint in SQLite
// so this is documentation-only — the explicit nuke is on
// the face rows above.
let n = diesel::delete(persons::table.find(id))
.execute(conn.deref_mut())
.with_context(|| "delete person")?;
Ok(n > 0)
})
}
fn merge_persons(
&mut self,
ctx: &opentelemetry::Context,
src: i32,
into: i32,
) -> anyhow::Result<Person> {
if src == into {
anyhow::bail!("cannot merge a person into itself");
}
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "update", "merge_persons", |span| {
span.set_attribute(KeyValue::new("src", src as i64));
span.set_attribute(KeyValue::new("into", into as i64));
// Wrap in a transaction so a half-merged state can't survive
// a SQLite write error mid-operation.
conn.deref_mut().transaction::<_, anyhow::Error, _>(|tx| {
// Re-point face_detections.
diesel::update(face_detections::table.filter(face_detections::person_id.eq(src)))
.set(face_detections::person_id.eq(into))
.execute(tx)
.with_context(|| "repoint faces on merge")?;
// Copy notes from src into target if the target is empty.
let src_person: Person = persons::table
.find(src)
.first(tx)
.with_context(|| "load src person for merge")?;
let into_person: Person = persons::table
.find(into)
.first(tx)
.with_context(|| "load target person for merge")?;
if into_person.notes.as_deref().unwrap_or("").is_empty()
&& src_person
.notes
.as_deref()
.map(|s| !s.is_empty())
.unwrap_or(false)
{
diesel::update(persons::table.find(into))
.set(persons::notes.eq(src_person.notes))
.execute(tx)
.with_context(|| "copy notes on merge")?;
}
diesel::delete(persons::table.find(src))
.execute(tx)
.with_context(|| "delete src person on merge")?;
persons::table
.find(into)
.first::<Person>(tx)
.with_context(|| "fetch merged person")
})
})
}
fn resolve_content_hash(
&mut self,
ctx: &opentelemetry::Context,
library_id: i32,
rel_path: &str,
) -> anyhow::Result<Option<String>> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "resolve_content_hash", |_| {
image_exif::table
.filter(image_exif::library_id.eq(library_id))
.filter(image_exif::rel_path.eq(rel_path))
.select(image_exif::content_hash)
.first::<Option<String>>(conn.deref_mut())
.optional()
.map(|outer| outer.and_then(|inner| inner))
.with_context(|| "resolve content_hash")
})
}
fn find_persons_by_names_ci(
&mut self,
ctx: &opentelemetry::Context,
names: &[String],
) -> anyhow::Result<std::collections::HashMap<String, i32>> {
if names.is_empty() {
return Ok(std::collections::HashMap::new());
}
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "find_persons_by_names_ci", |span| {
span.set_attribute(KeyValue::new("count", names.len() as i64));
// Lowercase comparison both sides. Use sql_query to keep the
// bind list dynamic without fighting Diesel's type system on
// the LOWER() function.
use diesel::sql_types::*;
let placeholders = std::iter::repeat_n("?", names.len())
.collect::<Vec<_>>()
.join(",");
// Filter out is_ignored persons so the auto-bind path can
// never target the IGNORE/junk bucket — even if a tag name
// happens to match it (e.g. someone tags photos as "Ignored"
// by hand). Ignore-bucket assignment is an explicit operator
// action through the dedicated endpoint, never a heuristic.
let sql = format!(
"SELECT id, LOWER(name) AS lower_name FROM persons \
WHERE is_ignored = 0 AND LOWER(name) IN ({}) \
ORDER BY id ASC",
placeholders
);
#[derive(QueryableByName)]
struct Row {
#[diesel(sql_type = Integer)]
id: i32,
#[diesel(sql_type = Text)]
lower_name: String,
}
let mut q = diesel::sql_query(sql).into_boxed();
for n in names {
q = q.bind::<Text, _>(n.to_lowercase());
}
let rows = q
.load::<Row>(conn.deref_mut())
.with_context(|| "find_persons_by_names_ci")?;
// Lowest id wins on collision (UNIQUE COLLATE NOCASE on the
// table prevents that today, but the deduplication is a
// defensive belt-and-braces).
let mut out = std::collections::HashMap::with_capacity(rows.len());
for r in rows {
out.entry(r.lower_name).or_insert(r.id);
}
Ok(out)
})
}
fn person_reference_embedding(
&mut self,
ctx: &opentelemetry::Context,
person_id: i32,
model_version: &str,
) -> anyhow::Result<Option<Vec<f32>>> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "query", "person_reference_embedding", |span| {
span.set_attribute(KeyValue::new("person_id", person_id as i64));
span.set_attribute(KeyValue::new("model_version", model_version.to_string()));
// Pull only the embedding bytes; we average them in Rust. A
// SQL aggregate over 512-d vectors isn't meaningfully faster
// and would tie us to a specific embedding length.
let blobs: Vec<Option<Vec<u8>>> = face_detections::table
.filter(face_detections::person_id.eq(person_id))
.filter(face_detections::status.eq("detected"))
.filter(face_detections::model_version.eq(model_version))
.select(face_detections::embedding)
.load(conn.deref_mut())
.with_context(|| "load person embeddings")?;
let vectors: Vec<Vec<f32>> = blobs
.into_iter()
.filter_map(|b| b.and_then(|bytes| decode_embedding_bytes(&bytes)))
.collect();
if vectors.is_empty() {
return Ok(None);
}
Ok(Some(mean_normalized(&vectors)))
})
}
fn assign_face_to_person(
&mut self,
ctx: &opentelemetry::Context,
face_id: i32,
person_id: i32,
) -> anyhow::Result<()> {
let mut conn = self.connection.lock().expect("face dao lock");
trace_db_call(ctx, "update", "assign_face_to_person", |span| {
span.set_attribute(KeyValue::new("face_id", face_id as i64));
span.set_attribute(KeyValue::new("person_id", person_id as i64));
conn.deref_mut().transaction::<_, anyhow::Error, _>(|tx| {
diesel::update(face_detections::table.find(face_id))
.set(face_detections::person_id.eq(person_id))
.execute(tx)
.with_context(|| "set face person_id")?;
// If this person has no cover yet, claim this face.
// Don't overwrite an existing cover — the user may have
// hand-picked one in the UI.
let cover: Option<i32> = persons::table
.find(person_id)
.select(persons::cover_face_id)
.first::<Option<i32>>(tx)
.with_context(|| "load person cover")?;
if cover.is_none() {
diesel::update(persons::table.find(person_id))
.set(persons::cover_face_id.eq(face_id))
.execute(tx)
.with_context(|| "set cover_face_id")?;
}
Ok(())
})
})
}
}
// ── Embedding helpers ───────────────────────────────────────────────────────
/// Decode a 2048-byte little-endian f32 BLOB into a Vec<f32> of length 512.
/// Returns None on malformed input rather than erroring — the caller treats
/// "no usable embedding" the same as "no embedding at all" (skip averaging).
pub(crate) fn decode_embedding_bytes(bytes: &[u8]) -> Option<Vec<f32>> {
if bytes.len() != 2048 {
return None;
}
let mut out = Vec::with_capacity(512);
for chunk in bytes.chunks_exact(4) {
out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
}
Some(out)
}
/// Mean of L2-normalized vectors, then re-normalize. ArcFace embeddings
/// from insightface are already L2-normalized, so re-normalizing the
/// average is a one-step "average direction" operation.
fn mean_normalized(vectors: &[Vec<f32>]) -> Vec<f32> {
debug_assert!(
!vectors.is_empty(),
"mean_normalized requires non-empty input"
);
let dim = vectors[0].len();
let mut acc = vec![0.0f32; dim];
for v in vectors {
debug_assert_eq!(v.len(), dim, "mismatched embedding dim");
for (i, x) in v.iter().enumerate() {
acc[i] += *x;
}
}
let n = vectors.len() as f32;
for x in &mut acc {
*x /= n;
}
let norm = acc.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
for x in &mut acc {
*x /= norm;
}
}
acc
}
/// Cosine similarity of two embeddings. Both must be the same length;
/// neither needs to be pre-normalized. Returns 0.0 on length mismatch
/// or zero-magnitude input rather than NaN — the auto-bind path
/// interprets 0.0 as "no useful similarity, leave unassigned".
pub(crate) fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let mut dot = 0.0f32;
let mut na = 0.0f32;
let mut nb = 0.0f32;
for (x, y) in a.iter().zip(b.iter()) {
dot += x * y;
na += x * x;
nb += y * y;
}
let denom = na.sqrt() * nb.sqrt();
if denom <= 0.0 { 0.0 } else { dot / denom }
}
// ── Handlers ────────────────────────────────────────────────────────────────
pub fn add_face_services<T, D: FaceDao + 'static>(app: App<T>) -> App<T>
where
T: ServiceFactory<ServiceRequest, Config = (), Error = actix_web::Error, InitError = ()>,
{
app.service(web::resource("/faces/stats").route(web::get().to(stats_handler::<D>)))
.service(web::resource("/faces/embeddings").route(web::get().to(embeddings_handler::<D>)))
.service(
web::resource("/image/faces")
.route(web::get().to(list_faces_handler::<D>))
.route(web::post().to(create_face_handler::<D>)),
)
.service(
web::resource("/image/faces/{id}")
.route(web::patch().to(update_face_handler::<D>))
.route(web::delete().to(delete_face_handler::<D>)),
)
.service(
web::resource("/persons")
.route(web::get().to(list_persons_handler::<D>))
.route(web::post().to(create_person_handler::<D>)),
)
.service(
web::resource("/persons/bootstrap")
.route(web::post().to(bootstrap_persons_handler::<D>)),
)
.service(
web::resource("/persons/ignore-bucket")
.route(web::post().to(ignore_bucket_handler::<D>)),
)
.service(
web::resource("/tags/people-bootstrap-candidates")
.route(web::get().to(bootstrap_candidates_handler::<D>)),
)
.service(
web::resource("/persons/{id}")
.route(web::get().to(get_person_handler::<D>))
.route(web::patch().to(update_person_handler::<D>))
.route(web::delete().to(delete_person_handler::<D>)),
)
.service(
web::resource("/persons/{id}/merge").route(web::post().to(merge_persons_handler::<D>)),
)
.service(
web::resource("/persons/{id}/faces").route(web::get().to(person_faces_handler::<D>)),
)
}
// ── Bootstrap (Phase 4) ─────────────────────────────────────────────────────
#[derive(Serialize, Debug, Clone)]
pub struct BootstrapCandidate {
/// Display name — most-frequent capitalization across the case-insensitive
/// group, or simply the first one seen if it's a tie.
pub name: String,
/// Lowercased name; the stable key for grouping and the auto-bind path.
pub normalized_name: String,
/// Sum of `tagged_photo` counts across all capitalizations of this name.
pub usage_count: i64,
/// Heuristic suggestion; the UI defaults this to checked but the user
/// confirms before [`bootstrap_persons_handler`] actually creates rows.
pub looks_like_person: bool,
/// True when a `persons` row already exists for this name (any case).
/// The UI hides these — re-running bootstrap is idempotent so it's fine
/// either way, but the noise isn't worth showing.
pub already_exists: bool,
}
#[derive(Serialize, Debug)]
pub struct BootstrapCandidatesResponse {
pub candidates: Vec<BootstrapCandidate>,
}
#[derive(Deserialize, Debug)]
pub struct BootstrapPersonsReq {
pub names: Vec<String>,
}
#[derive(Serialize, Debug)]
pub struct BootstrapPersonsResponse {
pub created: Vec<Person>,
pub skipped: Vec<BootstrapSkipped>,
}
#[derive(Serialize, Debug)]
pub struct BootstrapSkipped {
pub name: String,
pub reason: String,
}
/// Hard filter for the bootstrap candidate list. Returns true if the tag
/// could plausibly be a person name; returns false to drop it from the
/// candidates entirely (not just leave looks_like_person=false).
///
/// Rules — all required:
/// - At least 3 characters after trimming. Two-letter tags ("AB", "OK")
/// are almost always abbreviations or markers, not names.
/// - No emoji or symbol-class characters. SQL-side string sort already
/// surfaces those at the top of the tag list; filtering them keeps
/// the candidate UI focused on names rather than chart-junk.
/// - No control characters or null bytes.
pub(crate) fn is_plausible_name_token(raw: &str) -> bool {
let trimmed = raw.trim();
if trimmed.chars().count() < 3 {
return false;
}
for c in trimmed.chars() {
// Letter / mark / decimal-digit / connector-punctuation /
// dash / apostrophe / period / whitespace are all plausible in a
// name. Anything else (emoji, symbols, math operators, arrows,
// box drawing, control codes) disqualifies the whole tag.
if c.is_alphabetic()
|| c.is_whitespace()
|| matches!(c, '\'' | '-' | '.' | '_' | '\u{2019}')
{
continue;
}
if c.is_ascii_digit() {
// Digits don't disqualify here — `looks_like_person` rejects
// them later, but `is_plausible_name_token` is just about
// "could this be in the candidate list at all?". A tag like
// "Sarah2" stays as a candidate (display-flagged not-a-person
// by looks_like_person) so the operator can still spot and
// confirm it manually if it's an alias.
continue;
}
return false;
}
true
}
/// Conservative "this tag *might* be a person name" heuristic. False
/// negatives are fine — the operator confirms in the UI before any row
/// is created. False positives are also fine for the same reason; the
/// goal is just to default sensible candidates to checked.
///
/// Rules:
/// - 12 whitespace-separated words
/// - Each word starts with an uppercase character
/// - No digits anywhere (rejects "Trip 2018", "2024", etc.)
/// - Single-word names not on a small denylist of common non-person
/// tags (cat, christmas, beach, ...). Two-word names skip the
/// denylist because a real two-word person name is the dominant
/// case ("Sarah Smith") and false-blocking it is worse than false-
/// accepting "Sunset Walk".
pub(crate) fn looks_like_person(raw: &str) -> bool {
let trimmed = raw.trim();
if trimmed.is_empty() {
return false;
}
let words: Vec<&str> = trimmed.split_whitespace().collect();
if !(1..=2).contains(&words.len()) {
return false;
}
for w in &words {
let Some(first) = w.chars().next() else {
return false;
};
if !first.is_uppercase() {
return false;
}
if w.chars().any(|c| c.is_ascii_digit()) {
return false;
}
}
if words.len() == 1 {
const DENY: &[&str] = &[
// Pets / animals
"cat",
"dog",
"kitten",
"puppy",
"bird",
"fish",
"pet",
"pets",
// Events / occasions
"birthday",
"christmas",
"halloween",
"easter",
"thanksgiving",
"wedding",
"anniversary",
"vacation",
"holiday",
"party",
"trip",
"graduation",
"concert",
// Places (generic)
"home",
"work",
"beach",
"park",
"hotel",
"restaurant",
"office",
"house",
"garden",
// Subjects / styles
"food",
"sunset",
"sunrise",
"landscape",
"portrait",
"selfie",
"nature",
"flowers",
"flower",
"snow",
"rain",
"sky",
// Buckets
"untagged",
"favorites",
"favourites",
"misc",
"other",
"random",
];
let lower = trimmed.to_lowercase();
if DENY.iter().any(|w| *w == lower) {
return false;
}
}
true
}
async fn bootstrap_candidates_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
face_dao: web::Data<Mutex<D>>,
tag_dao: web::Data<Mutex<crate::tags::SqliteTagDao>>,
) -> impl Responder {
use std::collections::HashMap;
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("faces.bootstrap_candidates", &context);
let span_context = opentelemetry::Context::current_with_span(span);
// All tags + their counts. Path filter unused — bootstrap is library-wide.
let tags_with_counts = {
let mut td = tag_dao.lock().expect("tag dao lock");
match crate::tags::TagDao::get_all_tags(&mut *td, &span_context, None) {
Ok(t) => t,
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
}
};
// Group by lowercase name. Pick the most-frequent capitalization
// for the display name (ties broken by first-seen). Filter out
// short tags and tags carrying non-name characters (emojis, symbols)
// before grouping — they're noise no operator would tick, so showing
// them just makes the candidate list harder to scan.
struct Group {
display: String,
display_freq: i64,
total_count: i64,
}
let mut groups: HashMap<String, Group> = HashMap::new();
for (count, tag) in tags_with_counts {
if !is_plausible_name_token(&tag.name) {
continue;
}
let lower = tag.name.to_lowercase();
let g = groups.entry(lower).or_insert_with(|| Group {
display: tag.name.clone(),
display_freq: 0,
total_count: 0,
});
g.total_count += count;
if count > g.display_freq {
g.display = tag.name.clone();
g.display_freq = count;
}
}
// Cross-reference against existing persons (bulk one-query lookup).
let lower_names: Vec<String> = groups.keys().cloned().collect();
let existing = {
let mut fd = face_dao.lock().expect("face dao lock");
match fd.find_persons_by_names_ci(&span_context, &lower_names) {
Ok(m) => m,
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
}
};
let mut candidates: Vec<BootstrapCandidate> = groups
.into_iter()
.map(|(lower, g)| BootstrapCandidate {
looks_like_person: looks_like_person(&g.display),
already_exists: existing.contains_key(&lower),
name: g.display,
normalized_name: lower,
usage_count: g.total_count,
})
.collect();
// Sort: persons-first heuristic by descending count, then alphabetical.
// Persons-likely candidates surface near the top so the user doesn't
// scroll past dozens of "vacation"-style tags to find them.
candidates.sort_by(|a, b| {
b.looks_like_person
.cmp(&a.looks_like_person)
.then(b.usage_count.cmp(&a.usage_count))
.then(a.normalized_name.cmp(&b.normalized_name))
});
HttpResponse::Ok().json(BootstrapCandidatesResponse { candidates })
}
async fn bootstrap_persons_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
body: web::Json<BootstrapPersonsReq>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("faces.bootstrap_persons", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let mut created: Vec<Person> = Vec::new();
let mut skipped: Vec<BootstrapSkipped> = Vec::new();
let mut dao = face_dao.lock().expect("face dao lock");
// Pre-fetch the existing-name set so a duplicate request reports
// "already exists" (skipped) rather than firing N inserts that all
// 409 against the UNIQUE COLLATE NOCASE constraint.
let lower_names: Vec<String> = body.names.iter().map(|n| n.to_lowercase()).collect();
let existing = match dao.find_persons_by_names_ci(&span_context, &lower_names) {
Ok(m) => m,
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
};
for name in &body.names {
let trimmed = name.trim();
if trimmed.is_empty() {
skipped.push(BootstrapSkipped {
name: name.clone(),
reason: "empty name".into(),
});
continue;
}
let lower = trimmed.to_lowercase();
if existing.contains_key(&lower) {
skipped.push(BootstrapSkipped {
name: trimmed.to_string(),
reason: "person already exists".into(),
});
continue;
}
match dao.create_person(
&span_context,
&CreatePersonReq {
name: trimmed.to_string(),
notes: None,
entity_id: None,
is_ignored: false,
},
/*from_tag*/ true,
) {
Ok(p) => created.push(p),
Err(e) => {
if is_unique_violation(&e) {
// Race with a concurrent create; treat as skipped.
skipped.push(BootstrapSkipped {
name: trimmed.to_string(),
reason: "person already exists".into(),
});
} else {
skipped.push(BootstrapSkipped {
name: trimmed.to_string(),
reason: format!("{:#}", e),
});
}
}
}
}
HttpResponse::Ok().json(BootstrapPersonsResponse { created, skipped })
}
// ── Stats / list ────────────────────────────────────────────────────────────
#[derive(Deserialize)]
pub struct LibraryQuery {
pub library: Option<String>,
}
/// `GET /persons` query: optional library scope, optional include of
/// the IGNORE/junk bucket. The bucket is hidden by default so the
/// management UI shows only "real" persons; the persons-management
/// screen requests it explicitly when it needs to surface ignored.
#[derive(Deserialize)]
pub struct ListPersonsQuery {
pub library: Option<String>,
#[serde(default)]
pub include_ignored: bool,
}
async fn stats_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
app_state: web::Data<AppState>,
query: web::Query<LibraryQuery>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("faces.stats", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
.ok()
.flatten()
.map(|l| l.id);
let mut dao = face_dao.lock().expect("face dao lock");
dao.stats(&span_context, library_id)
.map(|s| {
span_context.span().set_status(Status::Ok);
HttpResponse::Ok().json(s)
})
.into_http_internal_err()
}
async fn list_faces_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
query: web::Query<ThumbnailRequest>,
app_state: web::Data<AppState>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("faces.list", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let normalized_path = normalize_path(&query.path);
// resolve_library_param returns Option<&Library>; clone so the result
// is owned (matching the primary_library fallback's type).
let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref())
.ok()
.flatten()
.cloned()
.unwrap_or_else(|| app_state.primary_library().clone());
let mut dao = face_dao.lock().expect("face dao lock");
let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
Ok(Some(h)) => h,
Ok(None) => {
// Photo not yet hashed — empty face list is a graceful answer.
// The carousel falls back to "no overlay" which is fine until
// the watcher catches up.
return HttpResponse::Ok().json(Vec::<FaceWithPerson>::new());
}
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
};
match dao.list_for_content_hash(&span_context, &hash) {
Ok(faces) => HttpResponse::Ok().json(faces),
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
}
}
async fn embeddings_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
query: web::Query<EmbeddingsQuery>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("faces.embeddings", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let limit = query.limit.clamp(1, 5_000);
let offset = query.offset.max(0);
let mut dao = face_dao.lock().expect("face dao lock");
dao.list_embeddings(
&span_context,
query.library,
query.unassigned,
limit,
offset,
)
.map(|rows| {
let out: Vec<FaceEmbeddingRow> = rows
.into_iter()
.map(|(r, b64)| FaceEmbeddingRow {
id: r.id,
library_id: r.library_id,
rel_path: r.rel_path,
content_hash: r.content_hash,
person_id: r.person_id,
model_version: r.model_version,
embedding: b64,
bbox_x: r.bbox_x,
bbox_y: r.bbox_y,
bbox_w: r.bbox_w,
bbox_h: r.bbox_h,
})
.collect();
HttpResponse::Ok().json(out)
})
.into_http_internal_err()
}
// ── Manual face create / update / delete ────────────────────────────────────
async fn create_face_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
body: web::Json<CreateFaceReq>,
app_state: web::Data<AppState>,
face_client: web::Data<FaceClient>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("faces.create_manual", &context);
let span_context = opentelemetry::Context::current_with_span(span);
// The force path doesn't need Apollo at all (no embed call); the
// strict path does. Surface the disabled state only when we'd
// actually use the client.
if !body.force && !face_client.is_enabled() {
return HttpResponse::ServiceUnavailable().body("face client disabled");
}
let normalized_path = normalize_path(&body.path);
let library: Library = match libraries::resolve_library_param(
&app_state,
body.library.as_ref().map(|i| i.to_string()).as_deref(),
) {
Ok(Some(lib)) => lib.clone(),
_ => app_state.primary_library().clone(),
};
// 1. Resolve content_hash for the photo.
let hash = {
let mut dao = face_dao.lock().expect("face dao lock");
match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
Ok(Some(h)) => h,
Ok(None) => {
return HttpResponse::Conflict()
.body("photo not yet hashed; wait for next watcher pass");
}
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
}
};
// 2 + 3. Crop + embed via Apollo (strict path), or skip both (force).
//
// Force is the "tag a face the detector can't see" path — back of
// head, heavily-occluded profile, etc. We store a zero-vector
// embedding under a sentinel model_version so the row participates
// only as a browse-by-person tag: clustering filters norm<=0 (see
// face_clustering._decode_b64_embedding) and auto-bind cosine
// resolves to 0 / NaN, never crossing the threshold. Cluster
// suggester also groups by model_version so this sentinel never
// mixes with real buffalo_l rows.
let (embedding_bytes, model_version, confidence) = if body.force {
info!(
"manual face (force): skipping detection for {:?} bbox=({},{},{},{})",
normalized_path, body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h
);
(vec![0u8; 2048], "manual_no_embed".to_string(), 0.0_f32)
} else {
let abs_path = library.resolve(&normalized_path);
let crop_bytes = match crop_image_to_bbox(
&abs_path,
body.bbox.x,
body.bbox.y,
body.bbox.w,
body.bbox.h,
) {
Ok(b) => b,
Err(e) => {
warn!("crop_image_to_bbox failed for {:?}: {:?}", abs_path, e);
return HttpResponse::BadRequest().body(format!("cannot crop photo: {}", e));
}
};
let meta = DetectMeta {
content_hash: hash.clone(),
library_id: library.id,
rel_path: normalized_path.clone(),
orientation: None,
model_version: None,
};
let detect = match face_client.embed(crop_bytes, meta).await {
Ok(r) => r,
Err(FaceDetectError::Permanent(e)) => {
return HttpResponse::UnprocessableEntity().body(format!("{}", e));
}
Err(FaceDetectError::Transient(e)) => {
return HttpResponse::ServiceUnavailable().body(format!("{}", e));
}
Err(FaceDetectError::Disabled) => {
return HttpResponse::ServiceUnavailable().body("face client disabled");
}
};
let detected = match detect.faces.first() {
Some(f) => f.clone(),
None => {
// Apollo would have returned 422 on no_face_in_crop; defensive.
return HttpResponse::UnprocessableEntity().body("no face in crop");
}
};
let bytes = match detected.decode_embedding() {
Ok(b) => b,
Err(e) => {
warn!("manual face: decode embedding failed: {:?}", e);
return HttpResponse::BadGateway().body("invalid embedding from face service");
}
};
(bytes, detect.model_version, detected.confidence)
};
// 4. Insert the manual row using the bbox the user drew (NOT the
// detector's tighter box around their drawing — they get what they
// asked for; cluster matching uses the embedding which is from the
// detector's true box anyway).
let mut dao = face_dao.lock().expect("face dao lock");
let row = match dao.store_detection(
&span_context,
InsertFaceDetectionInput {
library_id: library.id,
content_hash: hash,
rel_path: normalized_path,
bbox: Some((body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h)),
embedding: Some(embedding_bytes),
confidence: Some(confidence),
source: "manual".to_string(),
person_id: body.person_id,
status: "detected".to_string(),
model_version,
},
) {
Ok(r) => r,
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
};
info!(
"Created manual face id={} library={} hash={} person_id={:?}",
row.id, row.library_id, row.content_hash, row.person_id
);
match hydrate_face_with_person(&mut *dao, &span_context, row) {
Ok(joined) => HttpResponse::Created().json(joined),
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
}
}
async fn update_face_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
path: web::Path<i32>,
body: web::Json<UpdateFaceReq>,
app_state: web::Data<AppState>,
face_client: web::Data<FaceClient>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("faces.update", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let id = path.into_inner();
let person_patch: Option<Option<i32>> = if body.clear_person {
Some(None)
} else {
body.person_id.map(Some)
};
let bbox_patch = body.bbox.as_ref().map(|b| (b.x, b.y, b.w, b.h));
// Bbox change → re-embed. The embedding is what auto-bind and the
// cluster suggester key on, so leaving it stale would silently
// corrupt every downstream similarity match. We crop the new bbox,
// pass it through face_client.embed, and store the fresh vector.
// Net cost: one Apollo round-trip per bbox edit (~100-500ms on
// CPU); acceptable for a manual operator action.
let mut new_embedding: Option<Vec<u8>> = None;
if let Some((bx, by, bw, bh)) = bbox_patch {
if !face_client.is_enabled() {
warn!(
"PATCH /image/faces/{}: 503 — face client not enabled \
(APOLLO_FACE_API_BASE_URL / APOLLO_API_BASE_URL both unset). \
Bbox edit requires Apollo to re-embed.",
id
);
return HttpResponse::ServiceUnavailable()
.body("face client disabled — bbox edit requires Apollo");
}
// Look up the current row so we know which photo to crop.
let current = {
let mut dao = face_dao.lock().expect("face dao lock");
match dao.get_face(&span_context, id) {
Ok(Some(r)) => r,
Ok(None) => return HttpResponse::NotFound().finish(),
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
}
};
let library = match app_state.library_by_id(current.library_id) {
Some(l) => l.clone(),
None => {
return HttpResponse::InternalServerError().body(format!(
"face row references unknown library_id {}",
current.library_id
));
}
};
let abs_path = library.resolve(&current.rel_path);
let crop_bytes = match crop_image_to_bbox(&abs_path, bx, by, bw, bh) {
Ok(b) => b,
Err(e) => {
warn!(
"PATCH /image/faces/{}: crop failed for {:?}: {:?}",
id, abs_path, e
);
return HttpResponse::BadRequest().body(format!("cannot crop new bbox: {}", e));
}
};
let meta = DetectMeta {
content_hash: current.content_hash.clone(),
library_id: current.library_id,
rel_path: current.rel_path.clone(),
orientation: None,
model_version: Some(current.model_version.clone()),
};
// Soft contract on the re-embed: we'd LIKE a fresh ArcFace
// vector for the new crop, but the operator's bbox edit is
// sacred. If detection finds no face in the new region (they
// dragged the box slightly off-center, or moved it to a back-
// of-head shot they've already manually tagged), or returns a
// bad embedding, we keep the old embedding and apply the bbox
// anyway. Cost: stale embedding for that row, which slightly
// pollutes clustering for files re-detected against this
// person — accepted because dropping the user's drag is a
// worse UX. Transient failures (cuda_oom, engine unavailable)
// still 503 so the operator can retry once Apollo recovers.
match face_client.embed(crop_bytes, meta).await {
Ok(resp) => {
if let Some(face) = resp.faces.first() {
match face.decode_embedding() {
Ok(b) => new_embedding = Some(b),
Err(e) => {
warn!(
"PATCH /image/faces/{}: bad embedding from face service ({:?}); keeping old embedding, bbox still applied",
id, e
);
}
}
} else {
info!(
"PATCH /image/faces/{}: no face detected in new bbox — keeping old embedding, bbox still applied",
id
);
}
}
Err(FaceDetectError::Permanent(e)) => {
info!(
"PATCH /image/faces/{}: embed permanent error ({}); keeping old embedding, bbox still applied",
id, e
);
}
Err(FaceDetectError::Transient(e)) => {
warn!(
"PATCH /image/faces/{}: 503 — Apollo face client transient \
error during re-embed: {}",
id, e
);
return HttpResponse::ServiceUnavailable().body(format!("{}", e));
}
Err(FaceDetectError::Disabled) => {
warn!(
"PATCH /image/faces/{}: 503 — face client became disabled \
mid-flight",
id
);
return HttpResponse::ServiceUnavailable().body("face client disabled mid-flight");
}
}
}
let mut dao = face_dao.lock().expect("face dao lock");
let row = match dao.update_face(&span_context, id, person_patch, bbox_patch, new_embedding) {
Ok(r) => r,
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
};
// Hydrate person_name so the response shape matches GET /image/faces
// — the carousel overlay does an optimistic replace on this row, and
// a bare FaceDetectionRow with no person_name would visibly drop the
// VFD label off the bbox even though the assignment didn't change.
match hydrate_face_with_person(&mut *dao, &span_context, row) {
Ok(joined) => HttpResponse::Ok().json(joined),
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
}
}
async fn delete_face_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
path: web::Path<i32>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("faces.delete", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let mut dao = face_dao.lock().expect("face dao lock");
match dao.delete_face(&span_context, path.into_inner()) {
Ok(true) => HttpResponse::NoContent().finish(),
Ok(false) => HttpResponse::NotFound().finish(),
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
}
}
// ── Persons ─────────────────────────────────────────────────────────────────
async fn list_persons_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
app_state: web::Data<AppState>,
query: web::Query<ListPersonsQuery>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("persons.list", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
.ok()
.flatten()
.map(|l| l.id);
let mut dao = face_dao.lock().expect("face dao lock");
dao.list_persons(&span_context, library_id, query.include_ignored)
.map(|p| HttpResponse::Ok().json(p))
.into_http_internal_err()
}
async fn ignore_bucket_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("persons.ignore_bucket", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let mut dao = face_dao.lock().expect("face dao lock");
dao.get_or_create_ignored_person(&span_context)
.map(|p| HttpResponse::Ok().json(p))
.into_http_internal_err()
}
async fn create_person_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
body: web::Json<CreatePersonReq>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("persons.create", &context);
let span_context = opentelemetry::Context::current_with_span(span);
if body.name.trim().is_empty() {
return HttpResponse::BadRequest().body("name required");
}
let mut dao = face_dao.lock().expect("face dao lock");
match dao.create_person(&span_context, &body, /*from_tag*/ false) {
Ok(p) => HttpResponse::Created().json(p),
Err(e) => {
// SQLite UNIQUE(name COLLATE NOCASE) → 409 Conflict so the UI
// can show "name already exists" without parsing. Use {:#} to
// include the source chain — anyhow's plain Display only shows
// the outermost context ("insert person ...") which hides the
// diesel "UNIQUE constraint failed" we're keying on.
if is_unique_violation(&e) {
HttpResponse::Conflict().body("person name already exists")
} else {
HttpResponse::InternalServerError().body(format!("{:#}", e))
}
}
}
}
async fn get_person_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
path: web::Path<i32>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("persons.get", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let mut dao = face_dao.lock().expect("face dao lock");
match dao.get_person(&span_context, path.into_inner()) {
Ok(Some(p)) => HttpResponse::Ok().json(p),
Ok(None) => HttpResponse::NotFound().finish(),
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
}
}
async fn update_person_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
path: web::Path<i32>,
body: web::Json<UpdatePersonReq>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("persons.update", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let mut dao = face_dao.lock().expect("face dao lock");
match dao.update_person(&span_context, path.into_inner(), &body) {
Ok(p) => HttpResponse::Ok().json(p),
Err(e) => {
if is_unique_violation(&e) {
HttpResponse::Conflict().body("person name already exists")
} else {
HttpResponse::InternalServerError().body(format!("{:#}", e))
}
}
}
}
async fn delete_person_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
path: web::Path<i32>,
query: web::Query<DeletePersonQuery>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("persons.delete", &context);
let span_context = opentelemetry::Context::current_with_span(span);
// Default cascade=set_null — don't destroy face history just because
// the user renamed/removed the identity.
let cascade = matches!(query.cascade.as_deref(), Some("delete"));
let mut dao = face_dao.lock().expect("face dao lock");
match dao.delete_person(&span_context, path.into_inner(), cascade) {
Ok(true) => HttpResponse::NoContent().finish(),
Ok(false) => HttpResponse::NotFound().finish(),
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
}
}
async fn merge_persons_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
path: web::Path<i32>,
body: web::Json<MergePersonsReq>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("persons.merge", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let src = path.into_inner();
let mut dao = face_dao.lock().expect("face dao lock");
match dao.merge_persons(&span_context, src, body.into) {
Ok(p) => HttpResponse::Ok().json(p),
Err(e) => {
let msg = format!("{:#}", e);
if msg.contains("itself") {
HttpResponse::BadRequest().body(msg)
} else {
HttpResponse::InternalServerError().body(msg)
}
}
}
}
async fn person_faces_handler<D: FaceDao>(
_: Claims,
request: HttpRequest,
path: web::Path<i32>,
app_state: web::Data<AppState>,
query: web::Query<LibraryQuery>,
face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
let context = extract_context_from_request(&request);
let span = global_tracer().start_with_context("persons.faces", &context);
let span_context = opentelemetry::Context::current_with_span(span);
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
.ok()
.flatten()
.map(|l| l.id);
let mut dao = face_dao.lock().expect("face dao lock");
dao.list_for_person(&span_context, path.into_inner(), library_id)
.map(|faces| HttpResponse::Ok().json(faces))
.into_http_internal_err()
}
// ── Helpers ─────────────────────────────────────────────────────────────────
/// Crop `abs_path` to the normalized bbox and re-encode as JPEG for the
/// face service. `image::open` decodes most photo formats Apollo will see;
/// HEIC/RAW are out of scope for the manual flow (the user can't draw a
/// face on a thumbnail of a non-decodable file anyway).
fn crop_image_to_bbox(
abs_path: &std::path::Path,
nx: f32,
ny: f32,
nw: f32,
nh: f32,
) -> anyhow::Result<Vec<u8>> {
if !(0.0..=1.0).contains(&nx) || !(0.0..=1.0).contains(&ny) {
return Err(anyhow!("bbox xy out of [0,1]"));
}
if nw <= 0.0 || nh <= 0.0 || nx + nw > 1.001 || ny + nh > 1.001 {
return Err(anyhow!("bbox wh out of bounds or zero"));
}
let raw = image::open(abs_path).with_context(|| format!("open {:?}", abs_path))?;
// EXIF rotation: the bbox arrives in display space (the carousel /
// overlay are rendered post-rotation by the browser), but the
// `image` crate hands us raw pre-rotation pixels. For any phone
// photo with Orientation 6/8/etc., applying the bbox without
// rotating first lands the crop on a completely different region
// of the image — which is why manually-drawn bboxes basically
// never resolved a face on re-detection. Apply the orientation
// first, then index into the canonical-oriented dims. Photos with
// no EXIF rotation tag pay nothing (apply_orientation is a no-op).
let orientation = exif::read_orientation(abs_path).unwrap_or(1);
let img = exif::apply_orientation(raw, orientation);
let (w, h) = img.dimensions();
let px = (nx * w as f32).round().clamp(0.0, w as f32 - 1.0) as u32;
let py = (ny * h as f32).round().clamp(0.0, h as f32 - 1.0) as u32;
let pw = ((nw * w as f32).round() as u32).min(w.saturating_sub(px));
let ph = ((nh * h as f32).round() as u32).min(h.saturating_sub(py));
if pw == 0 || ph == 0 {
return Err(anyhow!("crop produced zero-dim image"));
}
// Generous padding so RetinaFace has anchor-friendly context.
// Insightface internally resizes to det_size=640 (square). A
// tightly-drawn 200×250 face bbox + 10 % padding becomes ~240×300,
// which after resize fills ~95 % of the input — near the upper
// edge of RetinaFace's anchor scales, where it routinely returns
// zero detections. Padding to 50 % on each side makes the crop
// 2× the bbox dims (face occupies ~50 % of the input), where
// anchors hit cleanly. Bbox is clamped to image bounds, so
// edge-of-image bboxes just get less padding on the clipped side.
let pad_x = (pw / 2).max(1);
let pad_y = (ph / 2).max(1);
let cx = px.saturating_sub(pad_x);
let cy = py.saturating_sub(pad_y);
let cw = (pw + 2 * pad_x).min(w - cx);
let ch = (ph + 2 * pad_y).min(h - cy);
let cropped = img.crop_imm(cx, cy, cw, ch);
let mut out = std::io::Cursor::new(Vec::new());
cropped
.write_to(&mut out, image::ImageFormat::Jpeg)
.with_context(|| "encode crop as JPEG")?;
Ok(out.into_inner())
}
/// Returns true if `err` (or anything in its source chain) is a SQLite
/// `UNIQUE constraint failed`. Walks the chain so callers don't have to
/// know the wrapping order — anyhow `with_context` plus diesel's own
/// error layering buries the database error two levels deep.
///
/// String matching on `format!("{:#}", e)` would also work but is
/// fragile (locale-dependent SQLite messages, false positives like
/// "uniquely identifies"). Downcasting to the actual diesel kind is
/// the contract-stable check.
fn is_unique_violation(err: &anyhow::Error) -> bool {
use diesel::result::{DatabaseErrorKind, Error as DieselError};
err.chain().any(|cause| {
cause.downcast_ref::<DieselError>().is_some_and(|de| {
matches!(
de,
DieselError::DatabaseError(DatabaseErrorKind::UniqueViolation, _)
)
})
})
}
// ── Tests ───────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use crate::database::test::in_memory_db_connection;
fn fresh_dao() -> SqliteFaceDao {
SqliteFaceDao::from_connection(Arc::new(Mutex::new(in_memory_db_connection())))
}
fn ctx() -> opentelemetry::Context {
opentelemetry::Context::current()
}
#[test]
fn is_unique_violation_walks_chain() {
// The bug we hit in manual testing: anyhow's plain Display only
// shows the outermost context ("insert person Cameron"), so a
// naive `format!("{}", e).contains("unique")` check misses the
// diesel UNIQUE error nested below. Downcasting the source chain
// is the stable contract.
let mut dao = fresh_dao();
let _ = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Cameron".into(),
notes: None,
entity_id: None,
is_ignored: false,
},
false,
)
.expect("first insert");
let dup_err = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Cameron".into(),
notes: None,
entity_id: None,
is_ignored: false,
},
false,
)
.expect_err("second insert must fail");
// Plain Display hides the UNIQUE — that's the bug we're guarding
// against. We don't assert a specific outer message; we just
// confirm string-matching at the top level is unreliable.
let plain = format!("{}", dup_err);
assert!(
!plain.to_lowercase().contains("unique"),
"if Display starts surfacing UNIQUE we can drop the helper, but \
today it doesn't and the handler must downcast"
);
// Alt-Display walks the chain — useful for debug body content too.
let chained = format!("{:#}", dup_err);
assert!(
chained.to_uppercase().contains("UNIQUE"),
"chained display must surface the diesel error: {chained}"
);
// The contract-stable check the handler actually uses.
assert!(
is_unique_violation(&dup_err),
"is_unique_violation must downcast into the diesel chain"
);
}
// ── Phase 4: bootstrap heuristic + cosine + DAO support ─────────────
#[test]
fn is_plausible_name_token_filters_short_and_emoji() {
// Hard filter applied before grouping — emojis and tags shorter
// than 3 chars never make it into the candidate list, regardless
// of looks_like_person's later assessment.
assert!(is_plausible_name_token("Cameron"));
assert!(is_plausible_name_token("Sarah Smith"));
assert!(is_plausible_name_token("O'Brien"));
assert!(is_plausible_name_token("Jean-Luc"));
assert!(is_plausible_name_token("St. James"));
assert!(is_plausible_name_token("Renée"));
assert!(is_plausible_name_token("José"));
// Asian script names — the alphabetic/letter check covers any
// script, not just Latin.
assert!(is_plausible_name_token("田中太郎"));
// Below the 3-character floor.
assert!(!is_plausible_name_token(""));
assert!(!is_plausible_name_token(" "));
assert!(!is_plausible_name_token("Bo"));
assert!(!is_plausible_name_token("AB"));
// Trim before counting — surrounding whitespace doesn't count.
assert!(!is_plausible_name_token(" AB "));
// Emoji / symbol classes get the whole tag dropped.
assert!(!is_plausible_name_token("🐱cat"));
assert!(!is_plausible_name_token("Heart ❤"));
assert!(!is_plausible_name_token("📸Photo"));
assert!(!is_plausible_name_token("→ Trip"));
assert!(!is_plausible_name_token("★Vacation"));
// Digits are kept (handled by looks_like_person, not here).
assert!(is_plausible_name_token("Trip 2018"));
assert!(is_plausible_name_token("2024"));
}
#[test]
fn looks_like_person_accepts_typical_names() {
assert!(looks_like_person("Cameron"));
assert!(looks_like_person("Sarah Smith"));
assert!(looks_like_person("Mary Jane"));
// Non-ASCII title-cased single word still counts.
assert!(looks_like_person("Renée"));
}
#[test]
fn looks_like_person_rejects_obvious_non_people() {
// Digits, lowercase, three-or-more words, denylist hits.
assert!(!looks_like_person("2018"));
assert!(!looks_like_person("Trip 2018"));
assert!(!looks_like_person("trip"));
assert!(!looks_like_person("Birthday Party Cake"));
assert!(!looks_like_person("cat"));
assert!(!looks_like_person("Cat")); // denied even when title-cased
assert!(!looks_like_person("Christmas"));
assert!(!looks_like_person("home"));
assert!(!looks_like_person(""));
assert!(!looks_like_person(" "));
}
#[test]
fn looks_like_person_two_words_skips_denylist() {
// Two-word names get a pass on the single-word denylist —
// "Sunset Walk" is much more likely a real album than a person,
// but false-accepting is fine because the operator confirms.
// What matters is we don't false-reject "Sarah Smith".
assert!(looks_like_person("Sunset Walk"));
assert!(looks_like_person("Sarah Smith"));
}
#[test]
fn cosine_similarity_known_vectors() {
// Identical vectors → 1.0; orthogonal → 0.0; opposite → -1.0.
let a = vec![1.0, 0.0, 0.0];
let b = vec![1.0, 0.0, 0.0];
let c = vec![0.0, 1.0, 0.0];
let d = vec![-1.0, 0.0, 0.0];
assert!((cosine_similarity(&a, &b) - 1.0).abs() < 1e-6);
assert!(cosine_similarity(&a, &c).abs() < 1e-6);
assert!((cosine_similarity(&a, &d) - (-1.0)).abs() < 1e-6);
// Mismatched length → 0.0 (defensive, not NaN).
assert_eq!(cosine_similarity(&a, &[1.0, 0.0]), 0.0);
// Empty input → 0.0.
assert_eq!(cosine_similarity(&[], &[]), 0.0);
// Zero vector → 0.0 (denominator guard, not NaN).
let zero = vec![0.0, 0.0, 0.0];
assert_eq!(cosine_similarity(&a, &zero), 0.0);
}
#[test]
fn decode_embedding_bytes_round_trip() {
// 512×f32 LE = 2048 bytes. Anything else returns None.
let v: Vec<f32> = (0..512).map(|i| i as f32 * 0.001).collect();
let mut bytes = Vec::with_capacity(2048);
for f in &v {
bytes.extend_from_slice(&f.to_le_bytes());
}
let decoded = decode_embedding_bytes(&bytes).expect("decode");
assert_eq!(decoded.len(), 512);
for (a, b) in v.iter().zip(decoded.iter()) {
assert!((a - b).abs() < 1e-9);
}
assert_eq!(decode_embedding_bytes(&[0u8; 100]), None);
assert_eq!(decode_embedding_bytes(&[0u8; 4096]), None);
}
#[test]
fn find_persons_by_names_ci_groups_case() {
let mut dao = fresh_dao();
let _ = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Alice".into(),
notes: None,
entity_id: None,
is_ignored: false,
},
false,
)
.unwrap();
let _ = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Bob".into(),
notes: None,
entity_id: None,
is_ignored: false,
},
false,
)
.unwrap();
// Mix of cases + a name that has no person row.
let m = dao
.find_persons_by_names_ci(&ctx(), &["alice".into(), "BOB".into(), "charlie".into()])
.expect("lookup");
assert!(m.contains_key("alice"));
assert!(m.contains_key("bob"));
assert!(!m.contains_key("charlie"));
// Empty input is a no-op (don't fire a SQL with zero binds).
assert!(
dao.find_persons_by_names_ci(&ctx(), &[])
.unwrap()
.is_empty()
);
}
#[test]
fn person_reference_embedding_filters_by_model_version() {
// A person with embeddings from buffalo_l shouldn't have its
// reference contaminated by a future buffalo_xl row. The auto-
// bind path passes the candidate's model_version so old rows
// never reach the average.
let mut dao = fresh_dao();
diesel::sql_query(
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
VALUES (1, 'main', '/tmp', 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed libraries");
let p = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Subject".into(),
notes: None,
entity_id: None,
is_ignored: false,
},
false,
)
.unwrap();
// 512-d unit vector along axis 0, written for buffalo_l.
let mut emb_l: Vec<f32> = vec![0.0; 512];
emb_l[0] = 1.0;
let mut emb_l_bytes = Vec::with_capacity(2048);
for f in &emb_l {
emb_l_bytes.extend_from_slice(&f.to_le_bytes());
}
// 512-d unit vector along axis 1, written for some-other model.
let mut emb_xl: Vec<f32> = vec![0.0; 512];
emb_xl[1] = 1.0;
let mut emb_xl_bytes = Vec::with_capacity(2048);
for f in &emb_xl {
emb_xl_bytes.extend_from_slice(&f.to_le_bytes());
}
for (bytes, mv) in [(emb_l_bytes, "buffalo_l"), (emb_xl_bytes, "buffalo_xl")] {
let _ = dao
.store_detection(
&ctx(),
InsertFaceDetectionInput {
library_id: 1,
content_hash: format!("h-{mv}"),
rel_path: format!("p-{mv}.jpg"),
bbox: Some((0.1, 0.1, 0.2, 0.2)),
embedding: Some(bytes),
confidence: Some(0.9),
source: "auto".into(),
person_id: Some(p.id),
status: "detected".into(),
model_version: mv.into(),
},
)
.unwrap();
}
let ref_l = dao
.person_reference_embedding(&ctx(), p.id, "buffalo_l")
.unwrap()
.expect("buffalo_l ref");
// Reference for buffalo_l should match emb_l (axis-0 unit).
assert!((ref_l[0] - 1.0).abs() < 1e-5, "axis 0 should be ~1.0");
assert!(ref_l[1].abs() < 1e-5, "axis 1 should be ~0.0");
// Unknown model_version → None, not a cross-version average.
assert!(
dao.person_reference_embedding(&ctx(), p.id, "buffalo_xxxl")
.unwrap()
.is_none()
);
}
#[test]
fn assign_face_to_person_sets_cover_when_unset() {
let mut dao = fresh_dao();
diesel::sql_query(
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
VALUES (1, 'main', '/tmp', 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed libraries");
let p = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Cover".into(),
notes: None,
entity_id: None,
is_ignored: false,
},
false,
)
.unwrap();
assert!(p.cover_face_id.is_none());
// Insert two faces unbound.
let face1 = dao
.store_detection(
&ctx(),
InsertFaceDetectionInput {
library_id: 1,
content_hash: "h1".into(),
rel_path: "p1.jpg".into(),
bbox: Some((0.1, 0.1, 0.2, 0.2)),
embedding: Some(vec![0u8; 2048]),
confidence: Some(0.9),
source: "auto".into(),
person_id: None,
status: "detected".into(),
model_version: "buffalo_l".into(),
},
)
.unwrap();
let face2 = dao
.store_detection(
&ctx(),
InsertFaceDetectionInput {
library_id: 1,
content_hash: "h2".into(),
rel_path: "p2.jpg".into(),
bbox: Some((0.1, 0.1, 0.2, 0.2)),
embedding: Some(vec![0u8; 2048]),
confidence: Some(0.9),
source: "auto".into(),
person_id: None,
status: "detected".into(),
model_version: "buffalo_l".into(),
},
)
.unwrap();
// First assignment claims the cover.
dao.assign_face_to_person(&ctx(), face1.id, p.id).unwrap();
let p_after_first = dao.get_person(&ctx(), p.id).unwrap().unwrap();
assert_eq!(p_after_first.cover_face_id, Some(face1.id));
// Second assignment must NOT overwrite — operator may have
// hand-picked the cover after the first auto-bind.
dao.assign_face_to_person(&ctx(), face2.id, p.id).unwrap();
let p_after_second = dao.get_person(&ctx(), p.id).unwrap().unwrap();
assert_eq!(
p_after_second.cover_face_id,
Some(face1.id),
"cover must remain face1 after second auto-bind"
);
}
#[test]
fn person_crud_roundtrip() {
let mut dao = fresh_dao();
let p = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Alice".into(),
notes: Some("the boss".into()),
entity_id: None,
is_ignored: false,
},
false,
)
.expect("create person");
assert_eq!(p.name, "Alice");
assert_eq!(p.notes.as_deref(), Some("the boss"));
assert!(!p.created_from_tag);
// Case-insensitive uniqueness — second create with same name in
// different case must fail with a UNIQUE violation, surfacing
// as 409 Conflict at the handler layer.
let dup = dao.create_person(
&ctx(),
&CreatePersonReq {
name: "alice".into(),
notes: None,
entity_id: None,
is_ignored: false,
},
false,
);
assert!(dup.is_err(), "case-insensitive UNIQUE must reject 'alice'");
// Update notes; verify updated_at moves forward.
let prev_updated = p.updated_at;
std::thread::sleep(std::time::Duration::from_millis(1100)); // boundary cross
let updated = dao
.update_person(
&ctx(),
p.id,
&UpdatePersonReq {
name: None,
notes: Some("a new note".into()),
cover_face_id: None,
entity_id: None,
is_ignored: None,
},
)
.expect("update");
assert_eq!(updated.notes.as_deref(), Some("a new note"));
assert!(updated.updated_at >= prev_updated);
// List + delete.
let listed = dao.list_persons(&ctx(), None, false).expect("list");
assert_eq!(listed.len(), 1);
assert_eq!(listed[0].face_count, 0);
assert!(dao.delete_person(&ctx(), p.id, false).expect("delete"));
assert!(
dao.list_persons(&ctx(), None, false)
.expect("list")
.is_empty()
);
}
#[test]
fn ignore_bucket_idempotent_and_filters_auto_bind() {
// First call creates the bucket; second returns the same row.
// Once it exists, find_persons_by_names_ci must skip it even if
// the search term matches its name — the auto-bind path must
// NEVER target the IGNORE/junk bucket.
let mut dao = fresh_dao();
let first = dao
.get_or_create_ignored_person(&ctx())
.expect("create bucket");
assert!(first.is_ignored);
let second = dao
.get_or_create_ignored_person(&ctx())
.expect("re-fetch bucket");
assert_eq!(first.id, second.id, "bucket must be idempotent");
// Searching by the bucket's name must return nothing — the
// auto-bind look-up filters is_ignored=true.
let m = dao
.find_persons_by_names_ci(&ctx(), &["ignored".into()])
.expect("name lookup");
assert!(
!m.contains_key("ignored"),
"find_persons_by_names_ci must skip the ignore bucket: {m:?}"
);
// Default list_persons hides it; include_ignored=true surfaces it.
let visible = dao.list_persons(&ctx(), None, false).expect("list");
assert!(visible.iter().all(|p| !p.is_ignored));
let all = dao.list_persons(&ctx(), None, true).expect("list all");
assert!(all.iter().any(|p| p.is_ignored && p.id == first.id));
}
#[test]
fn marker_rows_idempotent() {
let mut dao = fresh_dao();
// Need a libraries row to satisfy face_detections.library_id FK
// without DEFERRED — SQLite enforces FK immediately by default.
// The :memory: DB already has the libraries seed via
// seed_or_patch_from_env? No — in_memory_db_connection just runs
// migrations; the libraries seed is a runtime path. Insert one
// manually for the test.
// Migrations may seed libraries(id=1); INSERT OR IGNORE keeps the
// test runnable either way.
diesel::sql_query(
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
VALUES (1, 'main', '/tmp', 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed libraries");
// Marker insert.
dao.mark_status(&ctx(), 1, "abc123", "x.jpg", "no_faces", "buffalo_l")
.expect("first mark");
assert!(
dao.already_scanned(&ctx(), "abc123").expect("scan"),
"already_scanned should report true after marker"
);
// Second mark for the same hash is a no-op (the partial UNIQUE
// index would otherwise reject; the DAO short-circuits before the
// insert).
dao.mark_status(&ctx(), 1, "abc123", "x.jpg", "no_faces", "buffalo_l")
.expect("second mark idempotent");
// Stats reflect the no_faces marker.
let stats = dao.stats(&ctx(), Some(1)).expect("stats");
assert_eq!(stats.no_faces, 1);
assert_eq!(stats.scanned, 1);
assert_eq!(stats.with_faces, 0);
}
#[test]
fn stats_total_photos_excludes_videos() {
// SCANNED counts content_hashes in face_detections; total_photos
// must apply the same image-extension filter as the watcher
// backlog query so the percentage can reach 100%. Without this,
// videos sit in image_exif but never produce a face_detections
// row (Apollo decodes images only) and the bar caps below 100%.
let mut dao = fresh_dao();
diesel::sql_query(
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
VALUES (1, 'main', '/tmp', 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed libraries");
diesel::sql_query(
"INSERT INTO image_exif \
(library_id, rel_path, content_hash, created_time, last_modified) VALUES \
(1, 'a.jpg', 'h-a', 0, 0), \
(1, 'b.JPEG', 'h-b', 0, 0), \
(1, 'movie.mp4', 'h-mp4', 0, 0), \
(1, 'clip.MOV', 'h-mov', 0, 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed image_exif");
let stats = dao.stats(&ctx(), Some(1)).expect("stats");
assert_eq!(
stats.total_photos, 2,
"videos should not count toward total"
);
}
#[test]
fn merge_persons_repoints_faces() {
let mut dao = fresh_dao();
// Migrations may seed libraries(id=1); INSERT OR IGNORE keeps the
// test runnable either way.
diesel::sql_query(
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
VALUES (1, 'main', '/tmp', 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed libraries");
let alice = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Alice".into(),
notes: None,
entity_id: None,
is_ignored: false,
},
false,
)
.unwrap();
let alyse = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Alyse".into(),
notes: Some("dup of alice".into()),
entity_id: None,
is_ignored: false,
},
false,
)
.unwrap();
// Insert a detected face row owned by `alyse`.
let _ = dao
.store_detection(
&ctx(),
InsertFaceDetectionInput {
library_id: 1,
content_hash: "h1".into(),
rel_path: "p1.jpg".into(),
bbox: Some((0.1, 0.1, 0.2, 0.2)),
embedding: Some(vec![0u8; 2048]),
confidence: Some(0.9),
source: "auto".into(),
person_id: Some(alyse.id),
status: "detected".into(),
model_version: "buffalo_l".into(),
},
)
.unwrap();
// Merge alyse → alice. Notes from src copy when target empty.
let merged = dao.merge_persons(&ctx(), alyse.id, alice.id).unwrap();
assert_eq!(merged.id, alice.id);
assert_eq!(merged.notes.as_deref(), Some("dup of alice"));
// alyse is gone.
assert!(dao.get_person(&ctx(), alyse.id).unwrap().is_none());
// The face is now alice's.
let faces = dao.list_for_person(&ctx(), alice.id, Some(1)).unwrap();
assert_eq!(faces.len(), 1);
assert_eq!(faces[0].person_id, Some(alice.id));
}
// ── crop_image_to_bbox ──────────────────────────────────────────────
// Pure helper used by the manual face-create handler. Generate a tiny
// image in memory, write it to a temp file, then exercise the bbox
// validation + crop math.
fn write_solid_image(w: u32, h: u32) -> tempfile::NamedTempFile {
let mut img = image::RgbImage::new(w, h);
for p in img.pixels_mut() {
*p = image::Rgb([200, 200, 200]);
}
let f = tempfile::Builder::new()
.suffix(".jpg")
.tempfile()
.expect("tempfile");
image::DynamicImage::ImageRgb8(img)
.save(f.path())
.expect("save jpg");
f
}
#[test]
fn crop_rejects_invalid_bbox() {
let f = write_solid_image(64, 64);
// x out of [0,1]
assert!(crop_image_to_bbox(f.path(), -0.1, 0.0, 0.5, 0.5).is_err());
assert!(crop_image_to_bbox(f.path(), 1.5, 0.0, 0.5, 0.5).is_err());
// zero / negative dimensions
assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.0, 0.5).is_err());
assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.5, -0.1).is_err());
// overflows the image
assert!(crop_image_to_bbox(f.path(), 0.7, 0.0, 0.5, 0.5).is_err());
}
#[test]
fn crop_returns_decodable_jpeg() {
let f = write_solid_image(200, 200);
let bytes = crop_image_to_bbox(f.path(), 0.25, 0.25, 0.5, 0.5).expect("center crop");
// Re-decode to confirm the pipeline produced a valid JPEG. Exact
// dimensions depend on the 10% padding clamp, so just assert
// sanity bounds rather than pinning numbers (padding math can
// legitimately drift if we tweak the heuristic later).
let img = image::load_from_memory(&bytes).expect("decode crop");
let (w, h) = (img.width(), img.height());
assert!((80..=200).contains(&w), "unexpected crop width: {w}");
assert!((80..=200).contains(&h), "unexpected crop height: {h}");
}
#[test]
fn crop_padding_clamps_to_image_bounds() {
// A bbox right at the corner should pad inward as far as it can,
// never outside the image — otherwise we'd pass invalid coords
// to the embedding service.
let f = write_solid_image(100, 100);
let bytes = crop_image_to_bbox(f.path(), 0.9, 0.9, 0.1, 0.1).expect("corner crop");
let img = image::load_from_memory(&bytes).expect("decode corner crop");
// Padded crop must fit within the source's 100x100.
assert!(img.width() <= 100);
assert!(img.height() <= 100);
assert!(img.width() > 0 && img.height() > 0);
}
// ── hydrate_face_with_person — PATCH/POST /image/faces response shape ──
fn seed_library_and_face(dao: &mut SqliteFaceDao, person_id: Option<i32>) -> FaceDetectionRow {
diesel::sql_query(
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
VALUES (1, 'main', '/tmp', 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed libraries");
dao.store_detection(
&ctx(),
InsertFaceDetectionInput {
library_id: 1,
content_hash: "h-hydrate".into(),
rel_path: "p.jpg".into(),
bbox: Some((0.1, 0.2, 0.3, 0.4)),
embedding: Some(vec![0u8; 2048]),
confidence: Some(0.9),
source: "manual".into(),
person_id,
status: "detected".into(),
model_version: "buffalo_l".into(),
},
)
.unwrap()
}
#[test]
fn hydrate_face_carries_person_name_when_assigned() {
// Regression guard for the bug where PATCH /image/faces/{id}
// returned a bare FaceDetectionRow (no person_name), causing
// the carousel overlay's optimistic replace to drop the VFD
// label off the bbox after every save. The handler hydrates
// via this helper; if anyone refactors the helper to skip the
// persons join, this test fails.
let mut dao = fresh_dao();
let p = dao
.create_person(
&ctx(),
&CreatePersonReq {
name: "Alice".into(),
notes: None,
entity_id: None,
is_ignored: false,
},
false,
)
.unwrap();
let row = seed_library_and_face(&mut dao, Some(p.id));
let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate assigned");
assert_eq!(joined.person_id, Some(p.id));
assert_eq!(joined.person_name.as_deref(), Some("Alice"));
// Bbox + confidence + source must round-trip — these are what
// the optimistic-replace also keys on.
assert!((joined.bbox_x - 0.1).abs() < 1e-6);
assert!((joined.bbox_y - 0.2).abs() < 1e-6);
assert!((joined.bbox_w - 0.3).abs() < 1e-6);
assert!((joined.bbox_h - 0.4).abs() < 1e-6);
assert_eq!(joined.source, "manual");
}
#[test]
fn hydrate_face_leaves_person_name_null_when_unassigned() {
// Mirror branch: an unassigned face must hydrate cleanly with
// person_name = None, not a stale value left over from a
// previously-assigned row's serialization.
let mut dao = fresh_dao();
let row = seed_library_and_face(&mut dao, None);
let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate unassigned");
assert!(joined.person_id.is_none());
assert!(joined.person_name.is_none());
}
#[test]
fn list_unscanned_candidates_filters_to_hashed_unscanned_in_library() {
// The watcher's per-tick backlog drain depends on this query
// returning *only* image_exif rows with a populated
// content_hash and no matching face_detections row in the
// requested library. A regression here would either silently
// re-scan files (waste of inference) or skip files that need
// scanning (the symptom we just shipped a fix for).
let mut dao = fresh_dao();
diesel::sql_query(
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
VALUES (1, 'main', '/tmp', 0), (2, 'other', '/tmp2', 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed libraries");
// Seed image_exif: mix of hashed/unhashed/scanned/cross-library,
// plus a video and a mixed-case image extension. Videos register
// in image_exif but can never produce a face_detections row, so
// the SQL must filter them out — otherwise the per-tick backlog
// drain re-pulls them every tick (no marker is ever written, so
// they loop forever) and the SCANNED stat is permanently capped.
diesel::sql_query(
"INSERT INTO image_exif \
(library_id, rel_path, content_hash, created_time, last_modified) VALUES \
(1, 'a.jpg', 'h-a', 0, 0), \
(1, 'b.jpg', 'h-b', 0, 0), \
(1, 'c.jpg', NULL, 0, 0), \
(1, 'd.jpg', 'h-d', 0, 0), \
(1, 'movie.mp4', 'h-mp4', 0, 0), \
(1, 'clip.MOV', 'h-mov', 0, 0), \
(1, 'photo.JPG', 'h-jpg-upper', 0, 0), \
(2, 'e.jpg', 'h-e', 0, 0)",
)
.execute(dao.connection.lock().unwrap().deref_mut())
.expect("seed image_exif");
// 'b' has been scanned (no_faces marker) — expect it filtered out.
dao.mark_status(&ctx(), 1, "h-b", "b.jpg", "no_faces", "buffalo_l")
.expect("scanned marker");
let cands = dao
.list_unscanned_candidates(&ctx(), 1, 10)
.expect("list unscanned");
let hashes: std::collections::HashSet<_> = cands.iter().map(|(_, h)| h.clone()).collect();
// Should contain a, d, and the upper-case .JPG (image-extension
// match is case-insensitive).
assert!(hashes.contains("h-a"), "missing h-a: {:?}", hashes);
assert!(hashes.contains("h-d"), "missing h-d: {:?}", hashes);
assert!(
hashes.contains("h-jpg-upper"),
"missing h-jpg-upper: {:?}",
hashes
);
// Should NOT contain b (scanned), c (no hash), e (other library),
// or videos (mp4/mov are not image extensions).
assert!(!hashes.contains("h-b"), "expected h-b filtered (scanned)");
assert!(
!hashes.contains("h-e"),
"expected h-e filtered (other library)"
);
assert!(!hashes.contains("h-mp4"), "expected h-mp4 filtered (video)");
assert!(!hashes.contains("h-mov"), "expected h-mov filtered (video)");
assert_eq!(cands.len(), 3, "unexpected candidates: {:?}", cands);
}
}