list_embeddings cloned the full FaceDetectionRow inside the filter_map just to pair it with the base64-encoded embedding. The 2 KB BLOB was already on the row — at 20k unassigned faces that's 40 MB of pointless heap traffic per Apollo cluster-suggest run. Move the bytes out via Option::take() so the row drops the BLOB instead of duplicating it. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
3527 lines
138 KiB
Rust
3527 lines
138 KiB
Rust
//! Local face recognition: data layer + HTTP surface.
|
||
//!
|
||
//! Phase 2 ships the persistence model and the manual CRUD endpoints; the
|
||
//! file-watch hook that drives automatic detection lives in `process_new_files`
|
||
//! (Phase 3) and is not registered yet. Inference is delegated to Apollo over
|
||
//! HTTP via [`crate::ai::face_client`]; this module never imports onnxruntime.
|
||
//!
|
||
//! Data model:
|
||
//! - `persons` are visual identities (the "who" of a face).
|
||
//! - `face_detections` rows are either real detections (`status='detected'`)
|
||
//! or markers (`status='no_faces' | 'failed'`). Both are keyed on
|
||
//! `content_hash` so the same JPEG in two libraries is scanned once.
|
||
//! - The `(library_id, rel_path)` pair is the *display* lookup; we resolve
|
||
//! it through `image_exif.content_hash` on every read so renames don't
|
||
//! strand face rows.
|
||
//!
|
||
//! The `FaceDao` trait abstracts persistence; `SqliteFaceDao` is the
|
||
//! production impl. The Phase 2 endpoints use it directly. A test impl
|
||
//! (in-memory) lives at the bottom of the module behind `#[cfg(test)]`.
|
||
|
||
use crate::Claims;
|
||
use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError};
|
||
use crate::database::schema::{face_detections, image_exif, persons};
|
||
use crate::error::IntoHttpError;
|
||
use crate::exif;
|
||
use crate::file_types;
|
||
use crate::libraries::{self, Library};
|
||
use crate::otel::{extract_context_from_request, global_tracer, trace_db_call};
|
||
use crate::state::AppState;
|
||
use crate::utils::normalize_path;
|
||
use crate::{ThumbnailRequest, connect};
|
||
use actix_web::dev::{ServiceFactory, ServiceRequest};
|
||
use actix_web::{App, HttpRequest, HttpResponse, Responder, web};
|
||
use anyhow::{Context, anyhow};
|
||
use chrono::Utc;
|
||
use diesel::prelude::*;
|
||
use image::GenericImageView;
|
||
use log::{info, warn};
|
||
use opentelemetry::KeyValue;
|
||
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
|
||
use serde::{Deserialize, Serialize};
|
||
use std::ops::DerefMut;
|
||
use std::sync::{Arc, Mutex};
|
||
|
||
// ── Wire types ──────────────────────────────────────────────────────────────
|
||
|
||
/// Visual identity. The optional `entity_id` bridges this person to an
|
||
/// LLM-extracted knowledge-graph entity (textual side). Persons are NOT
|
||
/// auto-bridged at creation — only when the user explicitly links them in
|
||
/// the management UI, or when bootstrap finds an exact-name match.
|
||
#[derive(Serialize, Queryable, Clone, Debug)]
|
||
pub struct Person {
|
||
pub id: i32,
|
||
pub name: String,
|
||
pub cover_face_id: Option<i32>,
|
||
pub entity_id: Option<i32>,
|
||
pub created_from_tag: bool,
|
||
pub notes: Option<String>,
|
||
pub created_at: i64,
|
||
pub updated_at: i64,
|
||
/// True for the IGNORE / junk bucket. Hidden from the default
|
||
/// persons list, skipped by `find_persons_by_names_ci` (so a tag
|
||
/// match can never auto-bind a real face into the ignore bucket),
|
||
/// and excluded from cluster suggestions because cluster-suggest
|
||
/// already filters by `person_id IS NULL` and ignored faces have
|
||
/// a non-null person_id.
|
||
pub is_ignored: bool,
|
||
}
|
||
|
||
#[derive(Insertable, Debug)]
|
||
#[diesel(table_name = persons)]
|
||
struct InsertPerson {
|
||
name: String,
|
||
notes: Option<String>,
|
||
created_from_tag: bool,
|
||
is_ignored: bool,
|
||
created_at: i64,
|
||
updated_at: i64,
|
||
}
|
||
|
||
#[derive(Serialize, Queryable, Clone, Debug)]
|
||
pub struct FaceDetectionRow {
|
||
pub id: i32,
|
||
pub library_id: i32,
|
||
pub content_hash: String,
|
||
pub rel_path: String,
|
||
pub bbox_x: Option<f32>,
|
||
pub bbox_y: Option<f32>,
|
||
pub bbox_w: Option<f32>,
|
||
pub bbox_h: Option<f32>,
|
||
/// Skip on the wire — clients call /faces/embeddings explicitly when
|
||
/// they need it. Saves ~2 KB per face on every list response.
|
||
#[serde(skip_serializing)]
|
||
pub embedding: Option<Vec<u8>>,
|
||
pub confidence: Option<f32>,
|
||
pub source: String,
|
||
pub person_id: Option<i32>,
|
||
pub status: String,
|
||
pub model_version: String,
|
||
pub created_at: i64,
|
||
}
|
||
|
||
/// SQL fragment restricting an `image_exif.rel_path` (or `face_detections.rel_path`)
|
||
/// column to image extensions. Videos register in `image_exif` with a
|
||
/// populated `content_hash` but can never produce a `face_detections` row
|
||
/// — applying this filter at query time keeps videos out of the per-tick
|
||
/// backlog drain (which would otherwise loop forever — `filter_excluded`
|
||
/// drops them client-side without writing a marker) and out of the SCANNED
|
||
/// stat denominator (so 100% is reachable).
|
||
fn image_path_predicate(col: &str) -> String {
|
||
let clauses: Vec<String> = file_types::IMAGE_EXTENSIONS
|
||
.iter()
|
||
.map(|ext| format!("lower({col}) LIKE '%.{ext}'"))
|
||
.collect();
|
||
format!("({})", clauses.join(" OR "))
|
||
}
|
||
|
||
/// Row shape for `list_unscanned_candidates`'s raw SQL. Diesel's
|
||
/// `sql_query` requires a `QueryableByName` row type with explicit
|
||
/// column SQL types; using a tuple isn't supported.
|
||
#[derive(diesel::QueryableByName, Debug)]
|
||
struct CountRow {
|
||
#[diesel(sql_type = diesel::sql_types::BigInt)]
|
||
count: i64,
|
||
}
|
||
|
||
#[derive(diesel::QueryableByName, Debug)]
|
||
struct UnscannedRow {
|
||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||
rel_path: String,
|
||
#[diesel(sql_type = diesel::sql_types::Text)]
|
||
content_hash: String,
|
||
}
|
||
|
||
#[derive(Insertable, Debug)]
|
||
#[diesel(table_name = face_detections)]
|
||
struct InsertFaceDetection {
|
||
library_id: i32,
|
||
content_hash: String,
|
||
rel_path: String,
|
||
bbox_x: Option<f32>,
|
||
bbox_y: Option<f32>,
|
||
bbox_w: Option<f32>,
|
||
bbox_h: Option<f32>,
|
||
embedding: Option<Vec<u8>>,
|
||
confidence: Option<f32>,
|
||
source: String,
|
||
person_id: Option<i32>,
|
||
status: String,
|
||
model_version: String,
|
||
created_at: i64,
|
||
}
|
||
|
||
/// Build a [`FaceWithPerson`] from a freshly-mutated row by resolving the
|
||
/// person name via [`FaceDao::get_person`]. Used by `create_face_handler`
|
||
/// and `update_face_handler` so PATCH/POST responses match the join shape
|
||
/// `/image/faces` returns — without this the carousel overlay's
|
||
/// optimistic-replace would clobber the rendered name (the bare
|
||
/// [`FaceDetectionRow`] doesn't carry it).
|
||
fn hydrate_face_with_person<D: FaceDao>(
|
||
dao: &mut D,
|
||
ctx: &opentelemetry::Context,
|
||
row: FaceDetectionRow,
|
||
) -> anyhow::Result<FaceWithPerson> {
|
||
let person_name = match row.person_id {
|
||
Some(pid) => dao.get_person(ctx, pid)?.map(|p| p.name),
|
||
None => None,
|
||
};
|
||
Ok(FaceWithPerson {
|
||
id: row.id,
|
||
bbox_x: row.bbox_x.unwrap_or(0.0),
|
||
bbox_y: row.bbox_y.unwrap_or(0.0),
|
||
bbox_w: row.bbox_w.unwrap_or(0.0),
|
||
bbox_h: row.bbox_h.unwrap_or(0.0),
|
||
confidence: row.confidence.unwrap_or(0.0),
|
||
source: row.source,
|
||
person_id: row.person_id,
|
||
person_name,
|
||
model_version: row.model_version,
|
||
})
|
||
}
|
||
|
||
/// Face row decorated with its assigned person's name. Returned by
|
||
/// `/image/faces` for the rendering side (carousel overlay, person chips).
|
||
#[derive(Serialize, Debug, Clone)]
|
||
pub struct FaceWithPerson {
|
||
pub id: i32,
|
||
pub bbox_x: f32,
|
||
pub bbox_y: f32,
|
||
pub bbox_w: f32,
|
||
pub bbox_h: f32,
|
||
pub confidence: f32,
|
||
pub source: String,
|
||
pub person_id: Option<i32>,
|
||
pub person_name: Option<String>,
|
||
pub model_version: String,
|
||
}
|
||
|
||
/// Face row plus the photo it lives on. Powers the per-person photo grid
|
||
/// (`GET /persons/{id}/faces`) and unassigned-cluster surfacing in Apollo.
|
||
#[derive(Serialize, Debug, Clone)]
|
||
pub struct FaceWithPath {
|
||
pub id: i32,
|
||
pub library_id: i32,
|
||
pub rel_path: String,
|
||
pub bbox_x: f32,
|
||
pub bbox_y: f32,
|
||
pub bbox_w: f32,
|
||
pub bbox_h: f32,
|
||
pub confidence: f32,
|
||
pub person_id: Option<i32>,
|
||
pub model_version: String,
|
||
}
|
||
|
||
/// Embedding-bearing face row. Returned by `/faces/embeddings` for Apollo's
|
||
/// clustering layer; embedding is base64-encoded so the JSON payload is
|
||
/// self-contained (Apollo's DBSCAN runs over numpy arrays decoded from this).
|
||
#[derive(Serialize, Debug, Clone)]
|
||
pub struct FaceEmbeddingRow {
|
||
pub id: i32,
|
||
pub library_id: i32,
|
||
pub rel_path: String,
|
||
pub content_hash: String,
|
||
pub person_id: Option<i32>,
|
||
pub model_version: String,
|
||
/// base64 of 2048 bytes (512×f32 LE).
|
||
pub embedding: String,
|
||
/// Normalized bbox 0..1, included so the cluster suggester UI can
|
||
/// crop a face thumbnail without an extra round-trip per cluster.
|
||
/// Shouldn't be NULL for `status='detected'` rows (CHECK constraint
|
||
/// in the migration), but the DB type is nullable so we mirror it.
|
||
pub bbox_x: Option<f32>,
|
||
pub bbox_y: Option<f32>,
|
||
pub bbox_w: Option<f32>,
|
||
pub bbox_h: Option<f32>,
|
||
}
|
||
|
||
#[derive(Serialize, Debug, Default)]
|
||
pub struct FaceStats {
|
||
pub library_id: Option<i32>,
|
||
pub total_photos: i64,
|
||
pub scanned: i64,
|
||
pub with_faces: i64,
|
||
pub no_faces: i64,
|
||
pub failed: i64,
|
||
pub persons_count: i64,
|
||
pub unassigned_faces: i64,
|
||
}
|
||
|
||
#[derive(Serialize, Debug, Clone)]
|
||
pub struct PersonSummary {
|
||
pub id: i32,
|
||
pub name: String,
|
||
pub cover_face_id: Option<i32>,
|
||
pub entity_id: Option<i32>,
|
||
pub created_from_tag: bool,
|
||
pub notes: Option<String>,
|
||
pub is_ignored: bool,
|
||
pub face_count: i64,
|
||
}
|
||
|
||
// ── Request bodies ──────────────────────────────────────────────────────────
|
||
|
||
#[derive(Deserialize, Debug)]
|
||
pub struct CreatePersonReq {
|
||
pub name: String,
|
||
#[serde(default)]
|
||
pub notes: Option<String>,
|
||
/// Optional bridge to an existing entity. NULL/missing leaves it
|
||
/// unbridged; set explicitly to wire the person to LLM-extracted facts.
|
||
#[serde(default)]
|
||
pub entity_id: Option<i32>,
|
||
/// True for the IGNORE / junk bucket. The frontend sets this when
|
||
/// lazily creating the Ignored person via the dedicated endpoint;
|
||
/// hand-rolled callers leave it false.
|
||
#[serde(default)]
|
||
pub is_ignored: bool,
|
||
}
|
||
|
||
#[derive(Deserialize, Debug)]
|
||
pub struct UpdatePersonReq {
|
||
#[serde(default)]
|
||
pub name: Option<String>,
|
||
#[serde(default)]
|
||
pub notes: Option<String>,
|
||
#[serde(default)]
|
||
pub cover_face_id: Option<i32>,
|
||
#[serde(default)]
|
||
pub entity_id: Option<i32>,
|
||
/// Toggle the ignore flag. Mostly used by the UI to "un-ignore" a
|
||
/// person that was previously bound to the bucket.
|
||
#[serde(default)]
|
||
pub is_ignored: Option<bool>,
|
||
}
|
||
|
||
#[derive(Deserialize, Debug)]
|
||
pub struct MergePersonsReq {
|
||
/// Person id to merge *into*. The source (`{id}` in the path) is
|
||
/// re-pointed to this id, then deleted.
|
||
pub into: i32,
|
||
}
|
||
|
||
#[derive(Deserialize, Debug)]
|
||
pub struct DeletePersonQuery {
|
||
/// `set_null` (default) leaves face rows orphaned (person_id NULL);
|
||
/// `delete` cascades through and removes the face rows entirely.
|
||
/// Default is set_null because deleting the person almost never means
|
||
/// "delete every photo of them ever existed."
|
||
#[serde(default)]
|
||
pub cascade: Option<String>,
|
||
}
|
||
|
||
#[derive(Deserialize, Debug)]
|
||
pub struct CreateFaceReq {
|
||
/// Photo path (library-relative). Resolved to content_hash via
|
||
/// image_exif before any face row is inserted.
|
||
pub path: String,
|
||
pub library: Option<i32>,
|
||
pub bbox: BboxReq,
|
||
/// Optional initial person assignment. Use this when the user draws a
|
||
/// box and immediately picks a name from the autocomplete.
|
||
#[serde(default)]
|
||
pub person_id: Option<i32>,
|
||
/// Skip the embedding step. Set when the user wants to tag a region
|
||
/// the detector can't find a face in (back of head, profile partly
|
||
/// occluded, etc.). The row is stored with a zero-vector embedding,
|
||
/// which the cluster suggester filters on `norm <= 0` and auto-bind
|
||
/// cosine resolves to 0 against — so the row participates only as a
|
||
/// browse-by-person tag, not in similarity matching. The frontend
|
||
/// only sets this after a 422 from a strict create plus an explicit
|
||
/// operator confirmation.
|
||
#[serde(default)]
|
||
pub force: bool,
|
||
}
|
||
|
||
#[derive(Deserialize, Debug)]
|
||
pub struct BboxReq {
|
||
pub x: f32,
|
||
pub y: f32,
|
||
pub w: f32,
|
||
pub h: f32,
|
||
}
|
||
|
||
#[derive(Deserialize, Debug)]
|
||
pub struct UpdateFaceReq {
|
||
/// `null` literally clears the assignment; missing leaves it alone.
|
||
/// Distinguish via `Option<Option<…>>` is tricky in serde without
|
||
/// custom deserialization; encode "clear" as `clear_person: true`
|
||
/// instead.
|
||
#[serde(default)]
|
||
pub person_id: Option<i32>,
|
||
#[serde(default)]
|
||
pub clear_person: bool,
|
||
#[serde(default)]
|
||
pub bbox: Option<BboxReq>,
|
||
}
|
||
|
||
#[derive(Deserialize, Debug)]
|
||
pub struct EmbeddingsQuery {
|
||
pub library: Option<i32>,
|
||
/// Default true — clustering only cares about unassigned faces. Set
|
||
/// false to dump all embeddings (e.g. for re-clustering everything).
|
||
#[serde(default = "default_unassigned")]
|
||
pub unassigned: bool,
|
||
#[serde(default = "default_embeddings_limit")]
|
||
pub limit: i64,
|
||
#[serde(default)]
|
||
pub offset: i64,
|
||
}
|
||
|
||
fn default_unassigned() -> bool {
|
||
true
|
||
}
|
||
fn default_embeddings_limit() -> i64 {
|
||
500
|
||
}
|
||
|
||
// ── DAO trait ───────────────────────────────────────────────────────────────
|
||
|
||
// File-watch hook (Phase 3) and the rerun handler (Phase 6) consume the
|
||
// methods the Phase 2 routes don't. Allow dead_code on the trait so we
|
||
// don't have to sprinkle attributes on every method that's wired up later.
|
||
#[allow(dead_code)]
|
||
pub trait FaceDao: Send + Sync {
|
||
fn already_scanned(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
content_hash: &str,
|
||
) -> anyhow::Result<bool>;
|
||
/// Find image_exif rows in `library_id` that have a populated
|
||
/// content_hash but no matching face_detections row yet. Used by
|
||
/// the watcher's quick-scan path to drain the backlog without
|
||
/// re-walking the filesystem. Returns `(rel_path, content_hash)`
|
||
/// pairs, capped at `limit`. Distinct on content_hash so the same
|
||
/// hash that lives at multiple rel_paths only fires one detection.
|
||
fn list_unscanned_candidates(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: i32,
|
||
limit: i64,
|
||
) -> anyhow::Result<Vec<(String, String)>>;
|
||
fn store_detection(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
row: InsertFaceDetectionInput,
|
||
) -> anyhow::Result<FaceDetectionRow>;
|
||
fn mark_status(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: i32,
|
||
content_hash: &str,
|
||
rel_path: &str,
|
||
status: &str,
|
||
model_version: &str,
|
||
) -> anyhow::Result<()>;
|
||
fn list_for_content_hash(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
content_hash: &str,
|
||
) -> anyhow::Result<Vec<FaceWithPerson>>;
|
||
fn list_for_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
person_id: i32,
|
||
library_id: Option<i32>,
|
||
) -> anyhow::Result<Vec<FaceWithPath>>;
|
||
fn list_embeddings(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: Option<i32>,
|
||
unassigned: bool,
|
||
limit: i64,
|
||
offset: i64,
|
||
) -> anyhow::Result<Vec<(FaceDetectionRow, String)>>;
|
||
fn get_face(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
id: i32,
|
||
) -> anyhow::Result<Option<FaceDetectionRow>>;
|
||
fn update_face(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
id: i32,
|
||
person_id: Option<Option<i32>>, // None=leave; Some(None)=clear; Some(Some(id))=set
|
||
bbox: Option<(f32, f32, f32, f32)>,
|
||
embedding: Option<Vec<u8>>,
|
||
) -> anyhow::Result<FaceDetectionRow>;
|
||
fn delete_face(&mut self, ctx: &opentelemetry::Context, id: i32) -> anyhow::Result<bool>;
|
||
fn delete_auto_for_hash(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
content_hash: &str,
|
||
) -> anyhow::Result<usize>;
|
||
fn stats(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: Option<i32>,
|
||
) -> anyhow::Result<FaceStats>;
|
||
|
||
// ── Persons ─────────────────────────────────────────────────────────
|
||
fn create_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
req: &CreatePersonReq,
|
||
from_tag: bool,
|
||
) -> anyhow::Result<Person>;
|
||
fn get_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
id: i32,
|
||
) -> anyhow::Result<Option<Person>>;
|
||
fn list_persons(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: Option<i32>,
|
||
include_ignored: bool,
|
||
) -> anyhow::Result<Vec<PersonSummary>>;
|
||
/// Get the IGNORE/junk bucket, creating it lazily on first call.
|
||
/// Idempotent — returns the same row across calls. Single global
|
||
/// bucket per database; the frontend never sees the literal name.
|
||
fn get_or_create_ignored_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
) -> anyhow::Result<Person>;
|
||
fn update_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
id: i32,
|
||
patch: &UpdatePersonReq,
|
||
) -> anyhow::Result<Person>;
|
||
/// Delete a person. `cascade=true` removes face rows; otherwise the
|
||
/// rows have their `person_id` set NULL by the FK constraint.
|
||
fn delete_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
id: i32,
|
||
cascade_delete_faces: bool,
|
||
) -> anyhow::Result<bool>;
|
||
fn merge_persons(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
src: i32,
|
||
into: i32,
|
||
) -> anyhow::Result<Person>;
|
||
|
||
/// Resolve `(library_id, rel_path)` → `content_hash` via image_exif.
|
||
/// Returns None when the photo hasn't been EXIF-indexed yet (no row
|
||
/// in image_exif) or when the row exists but content_hash is NULL.
|
||
fn resolve_content_hash(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: i32,
|
||
rel_path: &str,
|
||
) -> anyhow::Result<Option<String>>;
|
||
|
||
// ── Auto-bind support (Phase 4) ─────────────────────────────────────
|
||
|
||
/// Map case-insensitive person names → person id. Used by the
|
||
/// auto-bind path to look up "is this tag a known person?". Names
|
||
/// passed in are matched LOWER(persons.name); collisions resolve to
|
||
/// the person with the lowest id (stable, but the UNIQUE constraint
|
||
/// on persons.name COLLATE NOCASE prevents collisions in practice).
|
||
fn find_persons_by_names_ci(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
names: &[String],
|
||
) -> anyhow::Result<std::collections::HashMap<String, i32>>;
|
||
|
||
/// Mean of a person's existing face embeddings. Returns the L2-
|
||
/// normalized 512-d reference vector, or None when the person has
|
||
/// no detected faces yet (auto-bind treats that as "first face wins
|
||
/// unconditionally"). Filters by the same model_version that produced
|
||
/// the candidate embedding so cross-model averaging never happens.
|
||
fn person_reference_embedding(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
person_id: i32,
|
||
model_version: &str,
|
||
) -> anyhow::Result<Option<Vec<f32>>>;
|
||
|
||
/// Set face_detections.person_id and, when the target person has no
|
||
/// cover_face_id yet, set it to this face. One transaction so a
|
||
/// half-bound state can't survive a SQLite write error.
|
||
fn assign_face_to_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
face_id: i32,
|
||
person_id: i32,
|
||
) -> anyhow::Result<()>;
|
||
}
|
||
|
||
/// Free-standing input struct; the DAO copies it into [`InsertFaceDetection`]
|
||
/// so callers don't need to import the diesel-derived insertable.
|
||
#[derive(Debug, Clone)]
|
||
pub struct InsertFaceDetectionInput {
|
||
pub library_id: i32,
|
||
pub content_hash: String,
|
||
pub rel_path: String,
|
||
pub bbox: Option<(f32, f32, f32, f32)>,
|
||
pub embedding: Option<Vec<u8>>,
|
||
pub confidence: Option<f32>,
|
||
pub source: String,
|
||
pub person_id: Option<i32>,
|
||
pub status: String,
|
||
pub model_version: String,
|
||
}
|
||
|
||
// ── SqliteFaceDao impl ──────────────────────────────────────────────────────
|
||
|
||
pub struct SqliteFaceDao {
|
||
connection: Arc<Mutex<SqliteConnection>>,
|
||
}
|
||
|
||
impl SqliteFaceDao {
|
||
pub fn new() -> Self {
|
||
Self {
|
||
connection: Arc::new(Mutex::new(connect())),
|
||
}
|
||
}
|
||
|
||
/// Test helper — bind to a pre-built (typically in-memory) connection.
|
||
#[cfg(test)]
|
||
pub fn from_connection(connection: Arc<Mutex<SqliteConnection>>) -> Self {
|
||
Self { connection }
|
||
}
|
||
}
|
||
|
||
impl Default for SqliteFaceDao {
|
||
fn default() -> Self {
|
||
Self::new()
|
||
}
|
||
}
|
||
|
||
impl FaceDao for SqliteFaceDao {
|
||
fn already_scanned(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
content_hash: &str,
|
||
) -> anyhow::Result<bool> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "face_already_scanned", |span| {
|
||
span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
|
||
face_detections::table
|
||
.filter(face_detections::content_hash.eq(content_hash))
|
||
.select(face_detections::id)
|
||
.first::<i32>(conn.deref_mut())
|
||
.optional()
|
||
.map(|x| x.is_some())
|
||
.with_context(|| "already_scanned query")
|
||
})
|
||
}
|
||
|
||
fn list_unscanned_candidates(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: i32,
|
||
limit: i64,
|
||
) -> anyhow::Result<Vec<(String, String)>> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "list_unscanned_candidates", |span| {
|
||
span.set_attribute(KeyValue::new("library_id", library_id as i64));
|
||
// Pick the smallest-id rel_path per content_hash so we don't
|
||
// fire multiple detect calls for the same hash if it lives
|
||
// under several rel_paths in the same library. The
|
||
// anti-join (NOT EXISTS) drains hashes that have no row in
|
||
// face_detections at all. The image-extension predicate
|
||
// keeps videos out of the candidate set; without it they'd
|
||
// be filtered client-side and re-pulled every tick forever
|
||
// because no marker row is written for excluded paths.
|
||
let ext_predicate = image_path_predicate("rel_path");
|
||
let sql = format!(
|
||
"SELECT rel_path, content_hash \
|
||
FROM image_exif e \
|
||
WHERE library_id = ? \
|
||
AND content_hash IS NOT NULL \
|
||
AND {ext_predicate} \
|
||
AND NOT EXISTS ( \
|
||
SELECT 1 FROM face_detections f \
|
||
WHERE f.content_hash = e.content_hash \
|
||
) \
|
||
GROUP BY content_hash \
|
||
LIMIT ?"
|
||
);
|
||
let rows: Vec<(String, String)> = diesel::sql_query(sql)
|
||
.bind::<diesel::sql_types::Integer, _>(library_id)
|
||
.bind::<diesel::sql_types::BigInt, _>(limit)
|
||
.load::<UnscannedRow>(conn.deref_mut())
|
||
.with_context(|| "list_unscanned_candidates")?
|
||
.into_iter()
|
||
.map(|r| (r.rel_path, r.content_hash))
|
||
.collect();
|
||
Ok(rows)
|
||
})
|
||
}
|
||
|
||
fn store_detection(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
row: InsertFaceDetectionInput,
|
||
) -> anyhow::Result<FaceDetectionRow> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "insert", "store_detection", |span| {
|
||
span.set_attribute(KeyValue::new("status", row.status.clone()));
|
||
span.set_attribute(KeyValue::new("source", row.source.clone()));
|
||
let now = Utc::now().timestamp();
|
||
let (bx, by, bw, bh) = match row.bbox {
|
||
Some((x, y, w, h)) => (Some(x), Some(y), Some(w), Some(h)),
|
||
None => (None, None, None, None),
|
||
};
|
||
let insert = InsertFaceDetection {
|
||
library_id: row.library_id,
|
||
content_hash: row.content_hash,
|
||
rel_path: row.rel_path,
|
||
bbox_x: bx,
|
||
bbox_y: by,
|
||
bbox_w: bw,
|
||
bbox_h: bh,
|
||
embedding: row.embedding,
|
||
confidence: row.confidence,
|
||
source: row.source,
|
||
person_id: row.person_id,
|
||
status: row.status,
|
||
model_version: row.model_version,
|
||
created_at: now,
|
||
};
|
||
diesel::insert_into(face_detections::table)
|
||
.values(&insert)
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "insert face_detection")?;
|
||
define_sql_function! { fn last_insert_rowid() -> diesel::sql_types::Integer; }
|
||
let id = diesel::select(last_insert_rowid())
|
||
.get_result::<i32>(conn.deref_mut())
|
||
.with_context(|| "last_insert_rowid")?;
|
||
face_detections::table
|
||
.find(id)
|
||
.first::<FaceDetectionRow>(conn.deref_mut())
|
||
.with_context(|| "fetch inserted face")
|
||
})
|
||
}
|
||
|
||
fn mark_status(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: i32,
|
||
content_hash: &str,
|
||
rel_path: &str,
|
||
status: &str,
|
||
model_version: &str,
|
||
) -> anyhow::Result<()> {
|
||
// Marker rows have NULL bbox + NULL embedding (CHECK enforces
|
||
// this). We let the UNIQUE partial index on (content_hash) WHERE
|
||
// status='no_faces' guard against double-marking; for 'failed' we
|
||
// do a manual exists-check.
|
||
let exists = self.already_scanned(ctx, content_hash)?;
|
||
if exists {
|
||
// Don't write a second marker if any row already exists for
|
||
// this hash — that includes detected rows from a prior run
|
||
// that succeeded; the file watcher's already_scanned() check
|
||
// should have caught this, but stay idempotent.
|
||
return Ok(());
|
||
}
|
||
self.store_detection(
|
||
ctx,
|
||
InsertFaceDetectionInput {
|
||
library_id,
|
||
content_hash: content_hash.to_string(),
|
||
rel_path: rel_path.to_string(),
|
||
bbox: None,
|
||
embedding: None,
|
||
confidence: None,
|
||
source: "auto".to_string(),
|
||
person_id: None,
|
||
status: status.to_string(),
|
||
model_version: model_version.to_string(),
|
||
},
|
||
)?;
|
||
Ok(())
|
||
}
|
||
|
||
fn list_for_content_hash(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
content_hash: &str,
|
||
) -> anyhow::Result<Vec<FaceWithPerson>> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "faces_for_hash", |span| {
|
||
span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
|
||
face_detections::table
|
||
.left_join(persons::table.on(persons::id.nullable().eq(face_detections::person_id)))
|
||
.filter(face_detections::content_hash.eq(content_hash))
|
||
.filter(face_detections::status.eq("detected"))
|
||
.select((
|
||
face_detections::id,
|
||
face_detections::bbox_x,
|
||
face_detections::bbox_y,
|
||
face_detections::bbox_w,
|
||
face_detections::bbox_h,
|
||
face_detections::confidence,
|
||
face_detections::source,
|
||
face_detections::person_id,
|
||
persons::name.nullable(),
|
||
face_detections::model_version,
|
||
))
|
||
.load::<(
|
||
i32,
|
||
Option<f32>,
|
||
Option<f32>,
|
||
Option<f32>,
|
||
Option<f32>,
|
||
Option<f32>,
|
||
String,
|
||
Option<i32>,
|
||
Option<String>,
|
||
String,
|
||
)>(conn.deref_mut())
|
||
.with_context(|| "list faces for hash")
|
||
.map(|rows| {
|
||
rows.into_iter()
|
||
.map(|r| FaceWithPerson {
|
||
id: r.0,
|
||
bbox_x: r.1.unwrap_or(0.0),
|
||
bbox_y: r.2.unwrap_or(0.0),
|
||
bbox_w: r.3.unwrap_or(0.0),
|
||
bbox_h: r.4.unwrap_or(0.0),
|
||
confidence: r.5.unwrap_or(0.0),
|
||
source: r.6,
|
||
person_id: r.7,
|
||
person_name: r.8,
|
||
model_version: r.9,
|
||
})
|
||
.collect()
|
||
})
|
||
})
|
||
}
|
||
|
||
fn list_for_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
person_id: i32,
|
||
library_id: Option<i32>,
|
||
) -> anyhow::Result<Vec<FaceWithPath>> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "faces_for_person", |span| {
|
||
span.set_attribute(KeyValue::new("person_id", person_id as i64));
|
||
let mut query = face_detections::table
|
||
.filter(face_detections::person_id.eq(person_id))
|
||
.filter(face_detections::status.eq("detected"))
|
||
.into_boxed();
|
||
if let Some(lib) = library_id {
|
||
query = query.filter(face_detections::library_id.eq(lib));
|
||
}
|
||
query
|
||
.select((
|
||
face_detections::id,
|
||
face_detections::library_id,
|
||
face_detections::rel_path,
|
||
face_detections::bbox_x,
|
||
face_detections::bbox_y,
|
||
face_detections::bbox_w,
|
||
face_detections::bbox_h,
|
||
face_detections::confidence,
|
||
face_detections::person_id,
|
||
face_detections::model_version,
|
||
))
|
||
.load::<(
|
||
i32,
|
||
i32,
|
||
String,
|
||
Option<f32>,
|
||
Option<f32>,
|
||
Option<f32>,
|
||
Option<f32>,
|
||
Option<f32>,
|
||
Option<i32>,
|
||
String,
|
||
)>(conn.deref_mut())
|
||
.with_context(|| "list faces for person")
|
||
.map(|rows| {
|
||
rows.into_iter()
|
||
.map(|r| FaceWithPath {
|
||
id: r.0,
|
||
library_id: r.1,
|
||
rel_path: r.2,
|
||
bbox_x: r.3.unwrap_or(0.0),
|
||
bbox_y: r.4.unwrap_or(0.0),
|
||
bbox_w: r.5.unwrap_or(0.0),
|
||
bbox_h: r.6.unwrap_or(0.0),
|
||
confidence: r.7.unwrap_or(0.0),
|
||
person_id: r.8,
|
||
model_version: r.9,
|
||
})
|
||
.collect()
|
||
})
|
||
})
|
||
}
|
||
|
||
fn list_embeddings(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: Option<i32>,
|
||
unassigned: bool,
|
||
limit: i64,
|
||
offset: i64,
|
||
) -> anyhow::Result<Vec<(FaceDetectionRow, String)>> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "list_embeddings", |span| {
|
||
span.set_attribute(KeyValue::new("limit", limit));
|
||
span.set_attribute(KeyValue::new("offset", offset));
|
||
let mut query = face_detections::table
|
||
.filter(face_detections::status.eq("detected"))
|
||
.into_boxed();
|
||
if let Some(lib) = library_id {
|
||
query = query.filter(face_detections::library_id.eq(lib));
|
||
}
|
||
if unassigned {
|
||
query = query.filter(face_detections::person_id.is_null());
|
||
}
|
||
let rows = query
|
||
.order(face_detections::id.asc())
|
||
.limit(limit)
|
||
.offset(offset)
|
||
.load::<FaceDetectionRow>(conn.deref_mut())
|
||
.with_context(|| "list embeddings")?;
|
||
// Pair with the base64-encoded embedding string so the handler
|
||
// doesn't need to know the wire format. Skip rows with NULL
|
||
// embedding (shouldn't happen on detected rows, but defensive).
|
||
// `embedding.take()` moves the bytes out of the row so we can
|
||
// hand the (now-empty-embedding) row plus the encoded string
|
||
// back to the caller without cloning the whole row — at 20k
|
||
// rows × 2 KB that clone was 40 MB of pointless heap traffic
|
||
// per cluster-suggest run.
|
||
use base64::Engine;
|
||
Ok(rows
|
||
.into_iter()
|
||
.filter_map(|mut r| {
|
||
let bytes = r.embedding.take()?;
|
||
let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
|
||
Some((r, b64))
|
||
})
|
||
.collect())
|
||
})
|
||
}
|
||
|
||
fn get_face(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
id: i32,
|
||
) -> anyhow::Result<Option<FaceDetectionRow>> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "get_face", |span| {
|
||
span.set_attribute(KeyValue::new("id", id as i64));
|
||
face_detections::table
|
||
.find(id)
|
||
.first::<FaceDetectionRow>(conn.deref_mut())
|
||
.optional()
|
||
.with_context(|| "get_face")
|
||
})
|
||
}
|
||
|
||
fn update_face(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
id: i32,
|
||
person_id: Option<Option<i32>>,
|
||
bbox: Option<(f32, f32, f32, f32)>,
|
||
embedding: Option<Vec<u8>>,
|
||
) -> anyhow::Result<FaceDetectionRow> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "update", "update_face", |span| {
|
||
span.set_attribute(KeyValue::new("id", id as i64));
|
||
// Apply patches one at a time so each set() has the right type.
|
||
// Diesel's update DSL is type-driven and combining heterogeneous
|
||
// optional sets in one statement is awkward.
|
||
if let Some(pid) = person_id {
|
||
diesel::update(face_detections::table.find(id))
|
||
.set(face_detections::person_id.eq(pid))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "update person_id")?;
|
||
}
|
||
if let Some((x, y, w, h)) = bbox {
|
||
diesel::update(face_detections::table.find(id))
|
||
.set((
|
||
face_detections::bbox_x.eq(x),
|
||
face_detections::bbox_y.eq(y),
|
||
face_detections::bbox_w.eq(w),
|
||
face_detections::bbox_h.eq(h),
|
||
))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "update bbox")?;
|
||
}
|
||
if let Some(emb) = embedding {
|
||
diesel::update(face_detections::table.find(id))
|
||
.set(face_detections::embedding.eq(emb))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "update embedding")?;
|
||
}
|
||
face_detections::table
|
||
.find(id)
|
||
.first::<FaceDetectionRow>(conn.deref_mut())
|
||
.with_context(|| "fetch updated face")
|
||
})
|
||
}
|
||
|
||
fn delete_face(&mut self, ctx: &opentelemetry::Context, id: i32) -> anyhow::Result<bool> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "delete", "delete_face", |span| {
|
||
span.set_attribute(KeyValue::new("id", id as i64));
|
||
let n = diesel::delete(face_detections::table.find(id))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "delete face")?;
|
||
Ok(n > 0)
|
||
})
|
||
}
|
||
|
||
fn delete_auto_for_hash(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
content_hash: &str,
|
||
) -> anyhow::Result<usize> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "delete", "delete_auto_for_hash", |span| {
|
||
span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
|
||
diesel::delete(
|
||
face_detections::table
|
||
.filter(face_detections::content_hash.eq(content_hash))
|
||
.filter(face_detections::source.eq("auto")),
|
||
)
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "delete auto rows")
|
||
})
|
||
}
|
||
|
||
fn stats(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: Option<i32>,
|
||
) -> anyhow::Result<FaceStats> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "face_stats", |span| {
|
||
if let Some(lib) = library_id {
|
||
span.set_attribute(KeyValue::new("library_id", lib as i64));
|
||
}
|
||
// Count distinct content_hashes per status by status — one
|
||
// hash can have many rows (multiple detected faces) but we
|
||
// want it counted once.
|
||
let scanned: i64 = {
|
||
let mut q = face_detections::table.into_boxed();
|
||
if let Some(lib) = library_id {
|
||
q = q.filter(face_detections::library_id.eq(lib));
|
||
}
|
||
q.select(diesel::dsl::count_distinct(face_detections::content_hash))
|
||
.first(conn.deref_mut())
|
||
.with_context(|| "stats: scanned")?
|
||
};
|
||
let with_faces: i64 = {
|
||
let mut q = face_detections::table
|
||
.filter(face_detections::status.eq("detected"))
|
||
.into_boxed();
|
||
if let Some(lib) = library_id {
|
||
q = q.filter(face_detections::library_id.eq(lib));
|
||
}
|
||
q.select(diesel::dsl::count_distinct(face_detections::content_hash))
|
||
.first(conn.deref_mut())
|
||
.with_context(|| "stats: with_faces")?
|
||
};
|
||
let no_faces: i64 = {
|
||
let mut q = face_detections::table
|
||
.filter(face_detections::status.eq("no_faces"))
|
||
.into_boxed();
|
||
if let Some(lib) = library_id {
|
||
q = q.filter(face_detections::library_id.eq(lib));
|
||
}
|
||
q.select(diesel::dsl::count_distinct(face_detections::content_hash))
|
||
.first(conn.deref_mut())
|
||
.with_context(|| "stats: no_faces")?
|
||
};
|
||
let failed: i64 = {
|
||
let mut q = face_detections::table
|
||
.filter(face_detections::status.eq("failed"))
|
||
.into_boxed();
|
||
if let Some(lib) = library_id {
|
||
q = q.filter(face_detections::library_id.eq(lib));
|
||
}
|
||
q.select(diesel::dsl::count_distinct(face_detections::content_hash))
|
||
.first(conn.deref_mut())
|
||
.with_context(|| "stats: failed")?
|
||
};
|
||
// Image-extension filter mirrors `list_unscanned_candidates` so
|
||
// SCANNED can actually reach 100%: videos sit in `image_exif` but
|
||
// never get a `face_detections` row, so counting them here
|
||
// permanently caps the percentage below 100%.
|
||
//
|
||
// Count DISTINCT content_hash (not rows) so the numerator
|
||
// (`scanned`, also distinct-content_hash) and denominator live
|
||
// in the same domain. Without this, a file present at multiple
|
||
// rel_paths or across libraries inflates total_photos by one
|
||
// per duplicate row while face_detections — keyed on
|
||
// content_hash — counts the bytes once, leaving a permanent
|
||
// gap (e.g. 1101/1103 with nothing actually pending). Rows
|
||
// with NULL content_hash are excluded; they're held in the
|
||
// hash-backfill backlog and counting them would pin the bar
|
||
// below 100% for the duration of that backfill.
|
||
let total_photos: i64 = {
|
||
let ext_predicate = image_path_predicate("rel_path");
|
||
let row: CountRow = if let Some(lib) = library_id {
|
||
let sql = format!(
|
||
"SELECT COUNT(DISTINCT content_hash) AS count FROM image_exif \
|
||
WHERE library_id = ? AND content_hash IS NOT NULL AND {ext_predicate}"
|
||
);
|
||
diesel::sql_query(sql)
|
||
.bind::<diesel::sql_types::Integer, _>(lib)
|
||
.get_result(conn.deref_mut())
|
||
.with_context(|| "stats: total_photos")?
|
||
} else {
|
||
let sql = format!(
|
||
"SELECT COUNT(DISTINCT content_hash) AS count FROM image_exif \
|
||
WHERE content_hash IS NOT NULL AND {ext_predicate}"
|
||
);
|
||
diesel::sql_query(sql)
|
||
.get_result(conn.deref_mut())
|
||
.with_context(|| "stats: total_photos")?
|
||
};
|
||
row.count
|
||
};
|
||
let persons_count: i64 = persons::table
|
||
.select(diesel::dsl::count_star())
|
||
.first(conn.deref_mut())
|
||
.with_context(|| "stats: persons")?;
|
||
let unassigned_faces: i64 = {
|
||
let mut q = face_detections::table
|
||
.filter(face_detections::status.eq("detected"))
|
||
.filter(face_detections::person_id.is_null())
|
||
.into_boxed();
|
||
if let Some(lib) = library_id {
|
||
q = q.filter(face_detections::library_id.eq(lib));
|
||
}
|
||
q.select(diesel::dsl::count_star())
|
||
.first(conn.deref_mut())
|
||
.with_context(|| "stats: unassigned")?
|
||
};
|
||
|
||
Ok(FaceStats {
|
||
library_id,
|
||
total_photos,
|
||
scanned,
|
||
with_faces,
|
||
no_faces,
|
||
failed,
|
||
persons_count,
|
||
unassigned_faces,
|
||
})
|
||
})
|
||
}
|
||
|
||
fn create_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
req: &CreatePersonReq,
|
||
from_tag: bool,
|
||
) -> anyhow::Result<Person> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "insert", "create_person", |span| {
|
||
span.set_attribute(KeyValue::new("name", req.name.clone()));
|
||
let now = Utc::now().timestamp();
|
||
let insert = InsertPerson {
|
||
name: req.name.clone(),
|
||
notes: req.notes.clone(),
|
||
created_from_tag: from_tag,
|
||
is_ignored: req.is_ignored,
|
||
created_at: now,
|
||
updated_at: now,
|
||
};
|
||
diesel::insert_into(persons::table)
|
||
.values(&insert)
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| format!("insert person {}", req.name))?;
|
||
define_sql_function! { fn last_insert_rowid() -> diesel::sql_types::Integer; }
|
||
let id = diesel::select(last_insert_rowid())
|
||
.get_result::<i32>(conn.deref_mut())
|
||
.with_context(|| "last_insert_rowid persons")?;
|
||
// Optional entity bridge — do this as a follow-up update so
|
||
// schema's UNIQUE(name COLLATE NOCASE) can fire on insert
|
||
// before we touch entity_id.
|
||
if let Some(entity_id) = req.entity_id {
|
||
diesel::update(persons::table.find(id))
|
||
.set(persons::entity_id.eq(entity_id))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "set entity_id on new person")?;
|
||
}
|
||
persons::table
|
||
.find(id)
|
||
.first::<Person>(conn.deref_mut())
|
||
.with_context(|| "fetch new person")
|
||
})
|
||
}
|
||
|
||
fn get_or_create_ignored_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
) -> anyhow::Result<Person> {
|
||
// Fast path: there's already an is_ignored row → return it.
|
||
// Slow path on first use: create one with a stable display name
|
||
// ("Ignored"). Race-safe because the UNIQUE(name COLLATE NOCASE)
|
||
// index forces only one ever to exist (we trip and look up).
|
||
{
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
if let Some(p) = persons::table
|
||
.filter(persons::is_ignored.eq(true))
|
||
.order(persons::id.asc())
|
||
.first::<Person>(conn.deref_mut())
|
||
.optional()
|
||
.with_context(|| "lookup ignored person")?
|
||
{
|
||
return Ok(p);
|
||
}
|
||
}
|
||
// Drop the lock before delegating to create_person — that
|
||
// method takes its own lock.
|
||
match self.create_person(
|
||
ctx,
|
||
&CreatePersonReq {
|
||
name: "Ignored".to_string(),
|
||
notes: Some(
|
||
"Bucket for strangers, false detections, and faces \
|
||
you don't want bound to a real person."
|
||
.to_string(),
|
||
),
|
||
entity_id: None,
|
||
is_ignored: true,
|
||
},
|
||
/*from_tag*/ false,
|
||
) {
|
||
Ok(p) => Ok(p),
|
||
Err(e) if is_unique_violation(&e) => {
|
||
// Race: someone else created the row. Re-read.
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
persons::table
|
||
.filter(persons::is_ignored.eq(true))
|
||
.order(persons::id.asc())
|
||
.first::<Person>(conn.deref_mut())
|
||
.with_context(|| "load ignored person after race")
|
||
}
|
||
Err(e) => Err(e),
|
||
}
|
||
}
|
||
|
||
fn get_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
id: i32,
|
||
) -> anyhow::Result<Option<Person>> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "get_person", |span| {
|
||
span.set_attribute(KeyValue::new("id", id as i64));
|
||
persons::table
|
||
.find(id)
|
||
.first::<Person>(conn.deref_mut())
|
||
.optional()
|
||
.with_context(|| "get_person")
|
||
})
|
||
}
|
||
|
||
fn list_persons(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: Option<i32>,
|
||
include_ignored: bool,
|
||
) -> anyhow::Result<Vec<PersonSummary>> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "list_persons", |_| {
|
||
// Two-step: load all persons, then a single grouped count
|
||
// query for face counts. Using a LEFT JOIN + GROUP BY in
|
||
// Diesel here gets noisy with the optional library filter; a
|
||
// second roundtrip is cheap and clearer.
|
||
let mut person_query = persons::table.into_boxed();
|
||
if !include_ignored {
|
||
// Default — hide the IGNORE/junk bucket from the list.
|
||
// The frontend asks include_ignored=true explicitly when
|
||
// it needs to surface ignored persons (e.g. a "show
|
||
// ignored" toggle in the management UI).
|
||
person_query = person_query.filter(persons::is_ignored.eq(false));
|
||
}
|
||
let person_rows: Vec<Person> = person_query
|
||
.order(persons::name.asc())
|
||
.load::<Person>(conn.deref_mut())
|
||
.with_context(|| "load persons")?;
|
||
|
||
// Diesel's BoxedSelectStatement + group_by trips the trait
|
||
// resolver into recursion, so this aggregation goes through
|
||
// sql_query. The shape is small and the bind list is at most
|
||
// one parameter — readability isn't really worse than the DSL.
|
||
let counts: Vec<(i32, i64)> = {
|
||
use diesel::sql_types::*;
|
||
#[derive(QueryableByName)]
|
||
struct PersonCountRow {
|
||
#[diesel(sql_type = Integer)]
|
||
person_id: i32,
|
||
#[diesel(sql_type = BigInt)]
|
||
count: i64,
|
||
}
|
||
let sql = if library_id.is_some() {
|
||
"SELECT person_id, COUNT(*) AS count FROM face_detections \
|
||
WHERE status='detected' AND person_id IS NOT NULL AND library_id = ? \
|
||
GROUP BY person_id"
|
||
} else {
|
||
"SELECT person_id, COUNT(*) AS count FROM face_detections \
|
||
WHERE status='detected' AND person_id IS NOT NULL \
|
||
GROUP BY person_id"
|
||
};
|
||
let mut q = diesel::sql_query(sql).into_boxed();
|
||
if let Some(lib) = library_id {
|
||
q = q.bind::<Integer, _>(lib);
|
||
}
|
||
q.load::<PersonCountRow>(conn.deref_mut())
|
||
.with_context(|| "person face counts")?
|
||
.into_iter()
|
||
.map(|r| (r.person_id, r.count))
|
||
.collect()
|
||
};
|
||
use std::collections::HashMap;
|
||
let count_map: HashMap<i32, i64> = counts.into_iter().collect();
|
||
|
||
Ok(person_rows
|
||
.into_iter()
|
||
.map(|p| {
|
||
let face_count = count_map.get(&p.id).copied().unwrap_or(0);
|
||
PersonSummary {
|
||
id: p.id,
|
||
name: p.name,
|
||
cover_face_id: p.cover_face_id,
|
||
entity_id: p.entity_id,
|
||
created_from_tag: p.created_from_tag,
|
||
notes: p.notes,
|
||
is_ignored: p.is_ignored,
|
||
face_count,
|
||
}
|
||
})
|
||
.collect())
|
||
})
|
||
}
|
||
|
||
fn update_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
id: i32,
|
||
patch: &UpdatePersonReq,
|
||
) -> anyhow::Result<Person> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "update", "update_person", |span| {
|
||
span.set_attribute(KeyValue::new("id", id as i64));
|
||
let now = Utc::now().timestamp();
|
||
// Apply each patched column individually for the same
|
||
// reason as update_face — heterogeneous optional sets are
|
||
// painful in Diesel's type-driven update DSL.
|
||
if let Some(name) = &patch.name {
|
||
diesel::update(persons::table.find(id))
|
||
.set((persons::name.eq(name), persons::updated_at.eq(now)))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "update person name")?;
|
||
}
|
||
if let Some(notes) = &patch.notes {
|
||
diesel::update(persons::table.find(id))
|
||
.set((persons::notes.eq(notes), persons::updated_at.eq(now)))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "update person notes")?;
|
||
}
|
||
if let Some(cover) = patch.cover_face_id {
|
||
diesel::update(persons::table.find(id))
|
||
.set((
|
||
persons::cover_face_id.eq(cover),
|
||
persons::updated_at.eq(now),
|
||
))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "update person cover")?;
|
||
}
|
||
if let Some(eid) = patch.entity_id {
|
||
diesel::update(persons::table.find(id))
|
||
.set((persons::entity_id.eq(eid), persons::updated_at.eq(now)))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "update person entity_id")?;
|
||
}
|
||
if let Some(flag) = patch.is_ignored {
|
||
diesel::update(persons::table.find(id))
|
||
.set((persons::is_ignored.eq(flag), persons::updated_at.eq(now)))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "update person is_ignored")?;
|
||
}
|
||
persons::table
|
||
.find(id)
|
||
.first::<Person>(conn.deref_mut())
|
||
.with_context(|| "fetch updated person")
|
||
})
|
||
}
|
||
|
||
fn delete_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
id: i32,
|
||
cascade_delete_faces: bool,
|
||
) -> anyhow::Result<bool> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "delete", "delete_person", |span| {
|
||
span.set_attribute(KeyValue::new("id", id as i64));
|
||
span.set_attribute(KeyValue::new("cascade", cascade_delete_faces));
|
||
if cascade_delete_faces {
|
||
diesel::delete(face_detections::table.filter(face_detections::person_id.eq(id)))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "cascade delete faces for person")?;
|
||
}
|
||
// Always clear cover_face_id pointers that referenced this
|
||
// person's faces (otherwise the FK from persons.cover_face_id
|
||
// could hang). cover_face_id has no FK constraint in SQLite
|
||
// so this is documentation-only — the explicit nuke is on
|
||
// the face rows above.
|
||
let n = diesel::delete(persons::table.find(id))
|
||
.execute(conn.deref_mut())
|
||
.with_context(|| "delete person")?;
|
||
Ok(n > 0)
|
||
})
|
||
}
|
||
|
||
fn merge_persons(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
src: i32,
|
||
into: i32,
|
||
) -> anyhow::Result<Person> {
|
||
if src == into {
|
||
anyhow::bail!("cannot merge a person into itself");
|
||
}
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "update", "merge_persons", |span| {
|
||
span.set_attribute(KeyValue::new("src", src as i64));
|
||
span.set_attribute(KeyValue::new("into", into as i64));
|
||
// Wrap in a transaction so a half-merged state can't survive
|
||
// a SQLite write error mid-operation.
|
||
conn.deref_mut().transaction::<_, anyhow::Error, _>(|tx| {
|
||
// Re-point face_detections.
|
||
diesel::update(face_detections::table.filter(face_detections::person_id.eq(src)))
|
||
.set(face_detections::person_id.eq(into))
|
||
.execute(tx)
|
||
.with_context(|| "repoint faces on merge")?;
|
||
// Copy notes from src into target if the target is empty.
|
||
let src_person: Person = persons::table
|
||
.find(src)
|
||
.first(tx)
|
||
.with_context(|| "load src person for merge")?;
|
||
let into_person: Person = persons::table
|
||
.find(into)
|
||
.first(tx)
|
||
.with_context(|| "load target person for merge")?;
|
||
if into_person.notes.as_deref().unwrap_or("").is_empty()
|
||
&& src_person
|
||
.notes
|
||
.as_deref()
|
||
.map(|s| !s.is_empty())
|
||
.unwrap_or(false)
|
||
{
|
||
diesel::update(persons::table.find(into))
|
||
.set(persons::notes.eq(src_person.notes))
|
||
.execute(tx)
|
||
.with_context(|| "copy notes on merge")?;
|
||
}
|
||
diesel::delete(persons::table.find(src))
|
||
.execute(tx)
|
||
.with_context(|| "delete src person on merge")?;
|
||
persons::table
|
||
.find(into)
|
||
.first::<Person>(tx)
|
||
.with_context(|| "fetch merged person")
|
||
})
|
||
})
|
||
}
|
||
|
||
fn resolve_content_hash(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
library_id: i32,
|
||
rel_path: &str,
|
||
) -> anyhow::Result<Option<String>> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "resolve_content_hash", |_| {
|
||
image_exif::table
|
||
.filter(image_exif::library_id.eq(library_id))
|
||
.filter(image_exif::rel_path.eq(rel_path))
|
||
.select(image_exif::content_hash)
|
||
.first::<Option<String>>(conn.deref_mut())
|
||
.optional()
|
||
.map(|outer| outer.and_then(|inner| inner))
|
||
.with_context(|| "resolve content_hash")
|
||
})
|
||
}
|
||
|
||
fn find_persons_by_names_ci(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
names: &[String],
|
||
) -> anyhow::Result<std::collections::HashMap<String, i32>> {
|
||
if names.is_empty() {
|
||
return Ok(std::collections::HashMap::new());
|
||
}
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "find_persons_by_names_ci", |span| {
|
||
span.set_attribute(KeyValue::new("count", names.len() as i64));
|
||
// Lowercase comparison both sides. Use sql_query to keep the
|
||
// bind list dynamic without fighting Diesel's type system on
|
||
// the LOWER() function.
|
||
use diesel::sql_types::*;
|
||
let placeholders = std::iter::repeat_n("?", names.len())
|
||
.collect::<Vec<_>>()
|
||
.join(",");
|
||
// Filter out is_ignored persons so the auto-bind path can
|
||
// never target the IGNORE/junk bucket — even if a tag name
|
||
// happens to match it (e.g. someone tags photos as "Ignored"
|
||
// by hand). Ignore-bucket assignment is an explicit operator
|
||
// action through the dedicated endpoint, never a heuristic.
|
||
let sql = format!(
|
||
"SELECT id, LOWER(name) AS lower_name FROM persons \
|
||
WHERE is_ignored = 0 AND LOWER(name) IN ({}) \
|
||
ORDER BY id ASC",
|
||
placeholders
|
||
);
|
||
#[derive(QueryableByName)]
|
||
struct Row {
|
||
#[diesel(sql_type = Integer)]
|
||
id: i32,
|
||
#[diesel(sql_type = Text)]
|
||
lower_name: String,
|
||
}
|
||
let mut q = diesel::sql_query(sql).into_boxed();
|
||
for n in names {
|
||
q = q.bind::<Text, _>(n.to_lowercase());
|
||
}
|
||
let rows = q
|
||
.load::<Row>(conn.deref_mut())
|
||
.with_context(|| "find_persons_by_names_ci")?;
|
||
// Lowest id wins on collision (UNIQUE COLLATE NOCASE on the
|
||
// table prevents that today, but the deduplication is a
|
||
// defensive belt-and-braces).
|
||
let mut out = std::collections::HashMap::with_capacity(rows.len());
|
||
for r in rows {
|
||
out.entry(r.lower_name).or_insert(r.id);
|
||
}
|
||
Ok(out)
|
||
})
|
||
}
|
||
|
||
fn person_reference_embedding(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
person_id: i32,
|
||
model_version: &str,
|
||
) -> anyhow::Result<Option<Vec<f32>>> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "query", "person_reference_embedding", |span| {
|
||
span.set_attribute(KeyValue::new("person_id", person_id as i64));
|
||
span.set_attribute(KeyValue::new("model_version", model_version.to_string()));
|
||
// Pull only the embedding bytes; we average them in Rust. A
|
||
// SQL aggregate over 512-d vectors isn't meaningfully faster
|
||
// and would tie us to a specific embedding length.
|
||
let blobs: Vec<Option<Vec<u8>>> = face_detections::table
|
||
.filter(face_detections::person_id.eq(person_id))
|
||
.filter(face_detections::status.eq("detected"))
|
||
.filter(face_detections::model_version.eq(model_version))
|
||
.select(face_detections::embedding)
|
||
.load(conn.deref_mut())
|
||
.with_context(|| "load person embeddings")?;
|
||
let vectors: Vec<Vec<f32>> = blobs
|
||
.into_iter()
|
||
.filter_map(|b| b.and_then(|bytes| decode_embedding_bytes(&bytes)))
|
||
.collect();
|
||
if vectors.is_empty() {
|
||
return Ok(None);
|
||
}
|
||
Ok(Some(mean_normalized(&vectors)))
|
||
})
|
||
}
|
||
|
||
fn assign_face_to_person(
|
||
&mut self,
|
||
ctx: &opentelemetry::Context,
|
||
face_id: i32,
|
||
person_id: i32,
|
||
) -> anyhow::Result<()> {
|
||
let mut conn = self.connection.lock().expect("face dao lock");
|
||
trace_db_call(ctx, "update", "assign_face_to_person", |span| {
|
||
span.set_attribute(KeyValue::new("face_id", face_id as i64));
|
||
span.set_attribute(KeyValue::new("person_id", person_id as i64));
|
||
conn.deref_mut().transaction::<_, anyhow::Error, _>(|tx| {
|
||
diesel::update(face_detections::table.find(face_id))
|
||
.set(face_detections::person_id.eq(person_id))
|
||
.execute(tx)
|
||
.with_context(|| "set face person_id")?;
|
||
// If this person has no cover yet, claim this face.
|
||
// Don't overwrite an existing cover — the user may have
|
||
// hand-picked one in the UI.
|
||
let cover: Option<i32> = persons::table
|
||
.find(person_id)
|
||
.select(persons::cover_face_id)
|
||
.first::<Option<i32>>(tx)
|
||
.with_context(|| "load person cover")?;
|
||
if cover.is_none() {
|
||
diesel::update(persons::table.find(person_id))
|
||
.set(persons::cover_face_id.eq(face_id))
|
||
.execute(tx)
|
||
.with_context(|| "set cover_face_id")?;
|
||
}
|
||
Ok(())
|
||
})
|
||
})
|
||
}
|
||
}
|
||
|
||
// ── Embedding helpers ───────────────────────────────────────────────────────
|
||
|
||
/// Decode a 2048-byte little-endian f32 BLOB into a Vec<f32> of length 512.
|
||
/// Returns None on malformed input rather than erroring — the caller treats
|
||
/// "no usable embedding" the same as "no embedding at all" (skip averaging).
|
||
pub(crate) fn decode_embedding_bytes(bytes: &[u8]) -> Option<Vec<f32>> {
|
||
if bytes.len() != 2048 {
|
||
return None;
|
||
}
|
||
let mut out = Vec::with_capacity(512);
|
||
for chunk in bytes.chunks_exact(4) {
|
||
out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
|
||
}
|
||
Some(out)
|
||
}
|
||
|
||
/// Mean of L2-normalized vectors, then re-normalize. ArcFace embeddings
|
||
/// from insightface are already L2-normalized, so re-normalizing the
|
||
/// average is a one-step "average direction" operation.
|
||
fn mean_normalized(vectors: &[Vec<f32>]) -> Vec<f32> {
|
||
debug_assert!(
|
||
!vectors.is_empty(),
|
||
"mean_normalized requires non-empty input"
|
||
);
|
||
let dim = vectors[0].len();
|
||
let mut acc = vec![0.0f32; dim];
|
||
for v in vectors {
|
||
debug_assert_eq!(v.len(), dim, "mismatched embedding dim");
|
||
for (i, x) in v.iter().enumerate() {
|
||
acc[i] += *x;
|
||
}
|
||
}
|
||
let n = vectors.len() as f32;
|
||
for x in &mut acc {
|
||
*x /= n;
|
||
}
|
||
let norm = acc.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||
if norm > 0.0 {
|
||
for x in &mut acc {
|
||
*x /= norm;
|
||
}
|
||
}
|
||
acc
|
||
}
|
||
|
||
/// Cosine similarity of two embeddings. Both must be the same length;
|
||
/// neither needs to be pre-normalized. Returns 0.0 on length mismatch
|
||
/// or zero-magnitude input rather than NaN — the auto-bind path
|
||
/// interprets 0.0 as "no useful similarity, leave unassigned".
|
||
pub(crate) fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||
if a.len() != b.len() || a.is_empty() {
|
||
return 0.0;
|
||
}
|
||
let mut dot = 0.0f32;
|
||
let mut na = 0.0f32;
|
||
let mut nb = 0.0f32;
|
||
for (x, y) in a.iter().zip(b.iter()) {
|
||
dot += x * y;
|
||
na += x * x;
|
||
nb += y * y;
|
||
}
|
||
let denom = na.sqrt() * nb.sqrt();
|
||
if denom <= 0.0 { 0.0 } else { dot / denom }
|
||
}
|
||
|
||
// ── Handlers ────────────────────────────────────────────────────────────────
|
||
|
||
pub fn add_face_services<T, D: FaceDao + 'static>(app: App<T>) -> App<T>
|
||
where
|
||
T: ServiceFactory<ServiceRequest, Config = (), Error = actix_web::Error, InitError = ()>,
|
||
{
|
||
app.service(web::resource("/faces/stats").route(web::get().to(stats_handler::<D>)))
|
||
.service(web::resource("/faces/embeddings").route(web::get().to(embeddings_handler::<D>)))
|
||
.service(
|
||
web::resource("/image/faces")
|
||
.route(web::get().to(list_faces_handler::<D>))
|
||
.route(web::post().to(create_face_handler::<D>)),
|
||
)
|
||
.service(
|
||
web::resource("/image/faces/{id}")
|
||
.route(web::patch().to(update_face_handler::<D>))
|
||
.route(web::delete().to(delete_face_handler::<D>)),
|
||
)
|
||
.service(
|
||
web::resource("/persons")
|
||
.route(web::get().to(list_persons_handler::<D>))
|
||
.route(web::post().to(create_person_handler::<D>)),
|
||
)
|
||
.service(
|
||
web::resource("/persons/bootstrap")
|
||
.route(web::post().to(bootstrap_persons_handler::<D>)),
|
||
)
|
||
.service(
|
||
web::resource("/persons/ignore-bucket")
|
||
.route(web::post().to(ignore_bucket_handler::<D>)),
|
||
)
|
||
.service(
|
||
web::resource("/tags/people-bootstrap-candidates")
|
||
.route(web::get().to(bootstrap_candidates_handler::<D>)),
|
||
)
|
||
.service(
|
||
web::resource("/persons/{id}")
|
||
.route(web::get().to(get_person_handler::<D>))
|
||
.route(web::patch().to(update_person_handler::<D>))
|
||
.route(web::delete().to(delete_person_handler::<D>)),
|
||
)
|
||
.service(
|
||
web::resource("/persons/{id}/merge").route(web::post().to(merge_persons_handler::<D>)),
|
||
)
|
||
.service(
|
||
web::resource("/persons/{id}/faces").route(web::get().to(person_faces_handler::<D>)),
|
||
)
|
||
}
|
||
|
||
// ── Bootstrap (Phase 4) ─────────────────────────────────────────────────────
|
||
|
||
#[derive(Serialize, Debug, Clone)]
|
||
pub struct BootstrapCandidate {
|
||
/// Display name — most-frequent capitalization across the case-insensitive
|
||
/// group, or simply the first one seen if it's a tie.
|
||
pub name: String,
|
||
/// Lowercased name; the stable key for grouping and the auto-bind path.
|
||
pub normalized_name: String,
|
||
/// Sum of `tagged_photo` counts across all capitalizations of this name.
|
||
pub usage_count: i64,
|
||
/// Heuristic suggestion; the UI defaults this to checked but the user
|
||
/// confirms before [`bootstrap_persons_handler`] actually creates rows.
|
||
pub looks_like_person: bool,
|
||
/// True when a `persons` row already exists for this name (any case).
|
||
/// The UI hides these — re-running bootstrap is idempotent so it's fine
|
||
/// either way, but the noise isn't worth showing.
|
||
pub already_exists: bool,
|
||
}
|
||
|
||
#[derive(Serialize, Debug)]
|
||
pub struct BootstrapCandidatesResponse {
|
||
pub candidates: Vec<BootstrapCandidate>,
|
||
}
|
||
|
||
#[derive(Deserialize, Debug)]
|
||
pub struct BootstrapPersonsReq {
|
||
pub names: Vec<String>,
|
||
}
|
||
|
||
#[derive(Serialize, Debug)]
|
||
pub struct BootstrapPersonsResponse {
|
||
pub created: Vec<Person>,
|
||
pub skipped: Vec<BootstrapSkipped>,
|
||
}
|
||
|
||
#[derive(Serialize, Debug)]
|
||
pub struct BootstrapSkipped {
|
||
pub name: String,
|
||
pub reason: String,
|
||
}
|
||
|
||
/// Hard filter for the bootstrap candidate list. Returns true if the tag
|
||
/// could plausibly be a person name; returns false to drop it from the
|
||
/// candidates entirely (not just leave looks_like_person=false).
|
||
///
|
||
/// Rules — all required:
|
||
/// - At least 3 characters after trimming. Two-letter tags ("AB", "OK")
|
||
/// are almost always abbreviations or markers, not names.
|
||
/// - No emoji or symbol-class characters. SQL-side string sort already
|
||
/// surfaces those at the top of the tag list; filtering them keeps
|
||
/// the candidate UI focused on names rather than chart-junk.
|
||
/// - No control characters or null bytes.
|
||
pub(crate) fn is_plausible_name_token(raw: &str) -> bool {
|
||
let trimmed = raw.trim();
|
||
if trimmed.chars().count() < 3 {
|
||
return false;
|
||
}
|
||
for c in trimmed.chars() {
|
||
// Letter / mark / decimal-digit / connector-punctuation /
|
||
// dash / apostrophe / period / whitespace are all plausible in a
|
||
// name. Anything else (emoji, symbols, math operators, arrows,
|
||
// box drawing, control codes) disqualifies the whole tag.
|
||
if c.is_alphabetic()
|
||
|| c.is_whitespace()
|
||
|| matches!(c, '\'' | '-' | '.' | '_' | '\u{2019}')
|
||
{
|
||
continue;
|
||
}
|
||
if c.is_ascii_digit() {
|
||
// Digits don't disqualify here — `looks_like_person` rejects
|
||
// them later, but `is_plausible_name_token` is just about
|
||
// "could this be in the candidate list at all?". A tag like
|
||
// "Sarah2" stays as a candidate (display-flagged not-a-person
|
||
// by looks_like_person) so the operator can still spot and
|
||
// confirm it manually if it's an alias.
|
||
continue;
|
||
}
|
||
return false;
|
||
}
|
||
true
|
||
}
|
||
|
||
/// Conservative "this tag *might* be a person name" heuristic. False
|
||
/// negatives are fine — the operator confirms in the UI before any row
|
||
/// is created. False positives are also fine for the same reason; the
|
||
/// goal is just to default sensible candidates to checked.
|
||
///
|
||
/// Rules:
|
||
/// - 1–2 whitespace-separated words
|
||
/// - Each word starts with an uppercase character
|
||
/// - No digits anywhere (rejects "Trip 2018", "2024", etc.)
|
||
/// - Single-word names not on a small denylist of common non-person
|
||
/// tags (cat, christmas, beach, ...). Two-word names skip the
|
||
/// denylist because a real two-word person name is the dominant
|
||
/// case ("Sarah Smith") and false-blocking it is worse than false-
|
||
/// accepting "Sunset Walk".
|
||
pub(crate) fn looks_like_person(raw: &str) -> bool {
|
||
let trimmed = raw.trim();
|
||
if trimmed.is_empty() {
|
||
return false;
|
||
}
|
||
let words: Vec<&str> = trimmed.split_whitespace().collect();
|
||
if !(1..=2).contains(&words.len()) {
|
||
return false;
|
||
}
|
||
for w in &words {
|
||
let Some(first) = w.chars().next() else {
|
||
return false;
|
||
};
|
||
if !first.is_uppercase() {
|
||
return false;
|
||
}
|
||
if w.chars().any(|c| c.is_ascii_digit()) {
|
||
return false;
|
||
}
|
||
}
|
||
if words.len() == 1 {
|
||
const DENY: &[&str] = &[
|
||
// Pets / animals
|
||
"cat",
|
||
"dog",
|
||
"kitten",
|
||
"puppy",
|
||
"bird",
|
||
"fish",
|
||
"pet",
|
||
"pets",
|
||
// Events / occasions
|
||
"birthday",
|
||
"christmas",
|
||
"halloween",
|
||
"easter",
|
||
"thanksgiving",
|
||
"wedding",
|
||
"anniversary",
|
||
"vacation",
|
||
"holiday",
|
||
"party",
|
||
"trip",
|
||
"graduation",
|
||
"concert",
|
||
// Places (generic)
|
||
"home",
|
||
"work",
|
||
"beach",
|
||
"park",
|
||
"hotel",
|
||
"restaurant",
|
||
"office",
|
||
"house",
|
||
"garden",
|
||
// Subjects / styles
|
||
"food",
|
||
"sunset",
|
||
"sunrise",
|
||
"landscape",
|
||
"portrait",
|
||
"selfie",
|
||
"nature",
|
||
"flowers",
|
||
"flower",
|
||
"snow",
|
||
"rain",
|
||
"sky",
|
||
// Buckets
|
||
"untagged",
|
||
"favorites",
|
||
"favourites",
|
||
"misc",
|
||
"other",
|
||
"random",
|
||
];
|
||
let lower = trimmed.to_lowercase();
|
||
if DENY.iter().any(|w| *w == lower) {
|
||
return false;
|
||
}
|
||
}
|
||
true
|
||
}
|
||
|
||
async fn bootstrap_candidates_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
tag_dao: web::Data<Mutex<crate::tags::SqliteTagDao>>,
|
||
) -> impl Responder {
|
||
use std::collections::HashMap;
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("faces.bootstrap_candidates", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
|
||
// All tags + their counts. Path filter unused — bootstrap is library-wide.
|
||
let tags_with_counts = {
|
||
let mut td = tag_dao.lock().expect("tag dao lock");
|
||
match crate::tags::TagDao::get_all_tags(&mut *td, &span_context, None) {
|
||
Ok(t) => t,
|
||
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
|
||
}
|
||
};
|
||
|
||
// Group by lowercase name. Pick the most-frequent capitalization
|
||
// for the display name (ties broken by first-seen). Filter out
|
||
// short tags and tags carrying non-name characters (emojis, symbols)
|
||
// before grouping — they're noise no operator would tick, so showing
|
||
// them just makes the candidate list harder to scan.
|
||
struct Group {
|
||
display: String,
|
||
display_freq: i64,
|
||
total_count: i64,
|
||
}
|
||
let mut groups: HashMap<String, Group> = HashMap::new();
|
||
for (count, tag) in tags_with_counts {
|
||
if !is_plausible_name_token(&tag.name) {
|
||
continue;
|
||
}
|
||
let lower = tag.name.to_lowercase();
|
||
let g = groups.entry(lower).or_insert_with(|| Group {
|
||
display: tag.name.clone(),
|
||
display_freq: 0,
|
||
total_count: 0,
|
||
});
|
||
g.total_count += count;
|
||
if count > g.display_freq {
|
||
g.display = tag.name.clone();
|
||
g.display_freq = count;
|
||
}
|
||
}
|
||
|
||
// Cross-reference against existing persons (bulk one-query lookup).
|
||
let lower_names: Vec<String> = groups.keys().cloned().collect();
|
||
let existing = {
|
||
let mut fd = face_dao.lock().expect("face dao lock");
|
||
match fd.find_persons_by_names_ci(&span_context, &lower_names) {
|
||
Ok(m) => m,
|
||
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
|
||
}
|
||
};
|
||
|
||
let mut candidates: Vec<BootstrapCandidate> = groups
|
||
.into_iter()
|
||
.map(|(lower, g)| BootstrapCandidate {
|
||
looks_like_person: looks_like_person(&g.display),
|
||
already_exists: existing.contains_key(&lower),
|
||
name: g.display,
|
||
normalized_name: lower,
|
||
usage_count: g.total_count,
|
||
})
|
||
.collect();
|
||
// Sort: persons-first heuristic by descending count, then alphabetical.
|
||
// Persons-likely candidates surface near the top so the user doesn't
|
||
// scroll past dozens of "vacation"-style tags to find them.
|
||
candidates.sort_by(|a, b| {
|
||
b.looks_like_person
|
||
.cmp(&a.looks_like_person)
|
||
.then(b.usage_count.cmp(&a.usage_count))
|
||
.then(a.normalized_name.cmp(&b.normalized_name))
|
||
});
|
||
|
||
HttpResponse::Ok().json(BootstrapCandidatesResponse { candidates })
|
||
}
|
||
|
||
async fn bootstrap_persons_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
body: web::Json<BootstrapPersonsReq>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("faces.bootstrap_persons", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
|
||
let mut created: Vec<Person> = Vec::new();
|
||
let mut skipped: Vec<BootstrapSkipped> = Vec::new();
|
||
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
|
||
// Pre-fetch the existing-name set so a duplicate request reports
|
||
// "already exists" (skipped) rather than firing N inserts that all
|
||
// 409 against the UNIQUE COLLATE NOCASE constraint.
|
||
let lower_names: Vec<String> = body.names.iter().map(|n| n.to_lowercase()).collect();
|
||
let existing = match dao.find_persons_by_names_ci(&span_context, &lower_names) {
|
||
Ok(m) => m,
|
||
Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
|
||
};
|
||
|
||
for name in &body.names {
|
||
let trimmed = name.trim();
|
||
if trimmed.is_empty() {
|
||
skipped.push(BootstrapSkipped {
|
||
name: name.clone(),
|
||
reason: "empty name".into(),
|
||
});
|
||
continue;
|
||
}
|
||
let lower = trimmed.to_lowercase();
|
||
if existing.contains_key(&lower) {
|
||
skipped.push(BootstrapSkipped {
|
||
name: trimmed.to_string(),
|
||
reason: "person already exists".into(),
|
||
});
|
||
continue;
|
||
}
|
||
match dao.create_person(
|
||
&span_context,
|
||
&CreatePersonReq {
|
||
name: trimmed.to_string(),
|
||
notes: None,
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
/*from_tag*/ true,
|
||
) {
|
||
Ok(p) => created.push(p),
|
||
Err(e) => {
|
||
if is_unique_violation(&e) {
|
||
// Race with a concurrent create; treat as skipped.
|
||
skipped.push(BootstrapSkipped {
|
||
name: trimmed.to_string(),
|
||
reason: "person already exists".into(),
|
||
});
|
||
} else {
|
||
skipped.push(BootstrapSkipped {
|
||
name: trimmed.to_string(),
|
||
reason: format!("{:#}", e),
|
||
});
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
HttpResponse::Ok().json(BootstrapPersonsResponse { created, skipped })
|
||
}
|
||
|
||
// ── Stats / list ────────────────────────────────────────────────────────────
|
||
|
||
#[derive(Deserialize)]
|
||
pub struct LibraryQuery {
|
||
pub library: Option<String>,
|
||
}
|
||
|
||
/// `GET /persons` query: optional library scope, optional include of
|
||
/// the IGNORE/junk bucket. The bucket is hidden by default so the
|
||
/// management UI shows only "real" persons; the persons-management
|
||
/// screen requests it explicitly when it needs to surface ignored.
|
||
#[derive(Deserialize)]
|
||
pub struct ListPersonsQuery {
|
||
pub library: Option<String>,
|
||
#[serde(default)]
|
||
pub include_ignored: bool,
|
||
}
|
||
|
||
async fn stats_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
app_state: web::Data<AppState>,
|
||
query: web::Query<LibraryQuery>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("faces.stats", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
|
||
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
||
.ok()
|
||
.flatten()
|
||
.map(|l| l.id);
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
dao.stats(&span_context, library_id)
|
||
.map(|s| {
|
||
span_context.span().set_status(Status::Ok);
|
||
HttpResponse::Ok().json(s)
|
||
})
|
||
.into_http_internal_err()
|
||
}
|
||
|
||
async fn list_faces_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
query: web::Query<ThumbnailRequest>,
|
||
app_state: web::Data<AppState>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("faces.list", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
|
||
let normalized_path = normalize_path(&query.path);
|
||
// resolve_library_param returns Option<&Library>; clone so the result
|
||
// is owned (matching the primary_library fallback's type).
|
||
let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
||
.ok()
|
||
.flatten()
|
||
.cloned()
|
||
.unwrap_or_else(|| app_state.primary_library().clone());
|
||
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
|
||
Ok(Some(h)) => h,
|
||
Ok(None) => {
|
||
// Photo not yet hashed — empty face list is a graceful answer.
|
||
// The carousel falls back to "no overlay" which is fine until
|
||
// the watcher catches up.
|
||
return HttpResponse::Ok().json(Vec::<FaceWithPerson>::new());
|
||
}
|
||
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
|
||
};
|
||
match dao.list_for_content_hash(&span_context, &hash) {
|
||
Ok(faces) => HttpResponse::Ok().json(faces),
|
||
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
|
||
}
|
||
}
|
||
|
||
async fn embeddings_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
query: web::Query<EmbeddingsQuery>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("faces.embeddings", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
|
||
let limit = query.limit.clamp(1, 5_000);
|
||
let offset = query.offset.max(0);
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
dao.list_embeddings(
|
||
&span_context,
|
||
query.library,
|
||
query.unassigned,
|
||
limit,
|
||
offset,
|
||
)
|
||
.map(|rows| {
|
||
let out: Vec<FaceEmbeddingRow> = rows
|
||
.into_iter()
|
||
.map(|(r, b64)| FaceEmbeddingRow {
|
||
id: r.id,
|
||
library_id: r.library_id,
|
||
rel_path: r.rel_path,
|
||
content_hash: r.content_hash,
|
||
person_id: r.person_id,
|
||
model_version: r.model_version,
|
||
embedding: b64,
|
||
bbox_x: r.bbox_x,
|
||
bbox_y: r.bbox_y,
|
||
bbox_w: r.bbox_w,
|
||
bbox_h: r.bbox_h,
|
||
})
|
||
.collect();
|
||
HttpResponse::Ok().json(out)
|
||
})
|
||
.into_http_internal_err()
|
||
}
|
||
|
||
// ── Manual face create / update / delete ────────────────────────────────────
|
||
|
||
async fn create_face_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
body: web::Json<CreateFaceReq>,
|
||
app_state: web::Data<AppState>,
|
||
face_client: web::Data<FaceClient>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("faces.create_manual", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
|
||
// The force path doesn't need Apollo at all (no embed call); the
|
||
// strict path does. Surface the disabled state only when we'd
|
||
// actually use the client.
|
||
if !body.force && !face_client.is_enabled() {
|
||
return HttpResponse::ServiceUnavailable().body("face client disabled");
|
||
}
|
||
|
||
let normalized_path = normalize_path(&body.path);
|
||
let library: Library = match libraries::resolve_library_param(
|
||
&app_state,
|
||
body.library.as_ref().map(|i| i.to_string()).as_deref(),
|
||
) {
|
||
Ok(Some(lib)) => lib.clone(),
|
||
_ => app_state.primary_library().clone(),
|
||
};
|
||
|
||
// 1. Resolve content_hash for the photo.
|
||
let hash = {
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
|
||
Ok(Some(h)) => h,
|
||
Ok(None) => {
|
||
return HttpResponse::Conflict()
|
||
.body("photo not yet hashed; wait for next watcher pass");
|
||
}
|
||
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
|
||
}
|
||
};
|
||
|
||
// 2 + 3. Crop + embed via Apollo (strict path), or skip both (force).
|
||
//
|
||
// Force is the "tag a face the detector can't see" path — back of
|
||
// head, heavily-occluded profile, etc. We store a zero-vector
|
||
// embedding under a sentinel model_version so the row participates
|
||
// only as a browse-by-person tag: clustering filters norm<=0 (see
|
||
// face_clustering._decode_b64_embedding) and auto-bind cosine
|
||
// resolves to 0 / NaN, never crossing the threshold. Cluster
|
||
// suggester also groups by model_version so this sentinel never
|
||
// mixes with real buffalo_l rows.
|
||
let (embedding_bytes, model_version, confidence) = if body.force {
|
||
info!(
|
||
"manual face (force): skipping detection for {:?} bbox=({},{},{},{})",
|
||
normalized_path, body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h
|
||
);
|
||
(vec![0u8; 2048], "manual_no_embed".to_string(), 0.0_f32)
|
||
} else {
|
||
let abs_path = library.resolve(&normalized_path);
|
||
let crop_bytes = match crop_image_to_bbox(
|
||
&abs_path,
|
||
body.bbox.x,
|
||
body.bbox.y,
|
||
body.bbox.w,
|
||
body.bbox.h,
|
||
) {
|
||
Ok(b) => b,
|
||
Err(e) => {
|
||
warn!("crop_image_to_bbox failed for {:?}: {:?}", abs_path, e);
|
||
return HttpResponse::BadRequest().body(format!("cannot crop photo: {}", e));
|
||
}
|
||
};
|
||
|
||
let meta = DetectMeta {
|
||
content_hash: hash.clone(),
|
||
library_id: library.id,
|
||
rel_path: normalized_path.clone(),
|
||
orientation: None,
|
||
model_version: None,
|
||
};
|
||
let detect = match face_client.embed(crop_bytes, meta).await {
|
||
Ok(r) => r,
|
||
Err(FaceDetectError::Permanent(e)) => {
|
||
return HttpResponse::UnprocessableEntity().body(format!("{}", e));
|
||
}
|
||
Err(FaceDetectError::Transient(e)) => {
|
||
return HttpResponse::ServiceUnavailable().body(format!("{}", e));
|
||
}
|
||
Err(FaceDetectError::Disabled) => {
|
||
return HttpResponse::ServiceUnavailable().body("face client disabled");
|
||
}
|
||
};
|
||
|
||
let detected = match detect.faces.first() {
|
||
Some(f) => f.clone(),
|
||
None => {
|
||
// Apollo would have returned 422 on no_face_in_crop; defensive.
|
||
return HttpResponse::UnprocessableEntity().body("no face in crop");
|
||
}
|
||
};
|
||
let bytes = match detected.decode_embedding() {
|
||
Ok(b) => b,
|
||
Err(e) => {
|
||
warn!("manual face: decode embedding failed: {:?}", e);
|
||
return HttpResponse::BadGateway().body("invalid embedding from face service");
|
||
}
|
||
};
|
||
(bytes, detect.model_version, detected.confidence)
|
||
};
|
||
|
||
// 4. Insert the manual row using the bbox the user drew (NOT the
|
||
// detector's tighter box around their drawing — they get what they
|
||
// asked for; cluster matching uses the embedding which is from the
|
||
// detector's true box anyway).
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
let row = match dao.store_detection(
|
||
&span_context,
|
||
InsertFaceDetectionInput {
|
||
library_id: library.id,
|
||
content_hash: hash,
|
||
rel_path: normalized_path,
|
||
bbox: Some((body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h)),
|
||
embedding: Some(embedding_bytes),
|
||
confidence: Some(confidence),
|
||
source: "manual".to_string(),
|
||
person_id: body.person_id,
|
||
status: "detected".to_string(),
|
||
model_version,
|
||
},
|
||
) {
|
||
Ok(r) => r,
|
||
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
|
||
};
|
||
info!(
|
||
"Created manual face id={} library={} hash={} person_id={:?}",
|
||
row.id, row.library_id, row.content_hash, row.person_id
|
||
);
|
||
match hydrate_face_with_person(&mut *dao, &span_context, row) {
|
||
Ok(joined) => HttpResponse::Created().json(joined),
|
||
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
|
||
}
|
||
}
|
||
|
||
async fn update_face_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
path: web::Path<i32>,
|
||
body: web::Json<UpdateFaceReq>,
|
||
app_state: web::Data<AppState>,
|
||
face_client: web::Data<FaceClient>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("faces.update", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
let id = path.into_inner();
|
||
|
||
let person_patch: Option<Option<i32>> = if body.clear_person {
|
||
Some(None)
|
||
} else {
|
||
body.person_id.map(Some)
|
||
};
|
||
let bbox_patch = body.bbox.as_ref().map(|b| (b.x, b.y, b.w, b.h));
|
||
|
||
// Bbox change → re-embed. The embedding is what auto-bind and the
|
||
// cluster suggester key on, so leaving it stale would silently
|
||
// corrupt every downstream similarity match. We crop the new bbox,
|
||
// pass it through face_client.embed, and store the fresh vector.
|
||
// Net cost: one Apollo round-trip per bbox edit (~100-500ms on
|
||
// CPU); acceptable for a manual operator action.
|
||
let mut new_embedding: Option<Vec<u8>> = None;
|
||
if let Some((bx, by, bw, bh)) = bbox_patch {
|
||
if !face_client.is_enabled() {
|
||
warn!(
|
||
"PATCH /image/faces/{}: 503 — face client not enabled \
|
||
(APOLLO_FACE_API_BASE_URL / APOLLO_API_BASE_URL both unset). \
|
||
Bbox edit requires Apollo to re-embed.",
|
||
id
|
||
);
|
||
return HttpResponse::ServiceUnavailable()
|
||
.body("face client disabled — bbox edit requires Apollo");
|
||
}
|
||
// Look up the current row so we know which photo to crop.
|
||
let current = {
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
match dao.get_face(&span_context, id) {
|
||
Ok(Some(r)) => r,
|
||
Ok(None) => return HttpResponse::NotFound().finish(),
|
||
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
|
||
}
|
||
};
|
||
let library = match app_state.library_by_id(current.library_id) {
|
||
Some(l) => l.clone(),
|
||
None => {
|
||
return HttpResponse::InternalServerError().body(format!(
|
||
"face row references unknown library_id {}",
|
||
current.library_id
|
||
));
|
||
}
|
||
};
|
||
let abs_path = library.resolve(¤t.rel_path);
|
||
let crop_bytes = match crop_image_to_bbox(&abs_path, bx, by, bw, bh) {
|
||
Ok(b) => b,
|
||
Err(e) => {
|
||
warn!(
|
||
"PATCH /image/faces/{}: crop failed for {:?}: {:?}",
|
||
id, abs_path, e
|
||
);
|
||
return HttpResponse::BadRequest().body(format!("cannot crop new bbox: {}", e));
|
||
}
|
||
};
|
||
let meta = DetectMeta {
|
||
content_hash: current.content_hash.clone(),
|
||
library_id: current.library_id,
|
||
rel_path: current.rel_path.clone(),
|
||
orientation: None,
|
||
model_version: Some(current.model_version.clone()),
|
||
};
|
||
// Soft contract on the re-embed: we'd LIKE a fresh ArcFace
|
||
// vector for the new crop, but the operator's bbox edit is
|
||
// sacred. If detection finds no face in the new region (they
|
||
// dragged the box slightly off-center, or moved it to a back-
|
||
// of-head shot they've already manually tagged), or returns a
|
||
// bad embedding, we keep the old embedding and apply the bbox
|
||
// anyway. Cost: stale embedding for that row, which slightly
|
||
// pollutes clustering for files re-detected against this
|
||
// person — accepted because dropping the user's drag is a
|
||
// worse UX. Transient failures (cuda_oom, engine unavailable)
|
||
// still 503 so the operator can retry once Apollo recovers.
|
||
match face_client.embed(crop_bytes, meta).await {
|
||
Ok(resp) => {
|
||
if let Some(face) = resp.faces.first() {
|
||
match face.decode_embedding() {
|
||
Ok(b) => new_embedding = Some(b),
|
||
Err(e) => {
|
||
warn!(
|
||
"PATCH /image/faces/{}: bad embedding from face service ({:?}); keeping old embedding, bbox still applied",
|
||
id, e
|
||
);
|
||
}
|
||
}
|
||
} else {
|
||
info!(
|
||
"PATCH /image/faces/{}: no face detected in new bbox — keeping old embedding, bbox still applied",
|
||
id
|
||
);
|
||
}
|
||
}
|
||
Err(FaceDetectError::Permanent(e)) => {
|
||
info!(
|
||
"PATCH /image/faces/{}: embed permanent error ({}); keeping old embedding, bbox still applied",
|
||
id, e
|
||
);
|
||
}
|
||
Err(FaceDetectError::Transient(e)) => {
|
||
warn!(
|
||
"PATCH /image/faces/{}: 503 — Apollo face client transient \
|
||
error during re-embed: {}",
|
||
id, e
|
||
);
|
||
return HttpResponse::ServiceUnavailable().body(format!("{}", e));
|
||
}
|
||
Err(FaceDetectError::Disabled) => {
|
||
warn!(
|
||
"PATCH /image/faces/{}: 503 — face client became disabled \
|
||
mid-flight",
|
||
id
|
||
);
|
||
return HttpResponse::ServiceUnavailable().body("face client disabled mid-flight");
|
||
}
|
||
}
|
||
}
|
||
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
let row = match dao.update_face(&span_context, id, person_patch, bbox_patch, new_embedding) {
|
||
Ok(r) => r,
|
||
Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
|
||
};
|
||
// Hydrate person_name so the response shape matches GET /image/faces
|
||
// — the carousel overlay does an optimistic replace on this row, and
|
||
// a bare FaceDetectionRow with no person_name would visibly drop the
|
||
// VFD label off the bbox even though the assignment didn't change.
|
||
match hydrate_face_with_person(&mut *dao, &span_context, row) {
|
||
Ok(joined) => HttpResponse::Ok().json(joined),
|
||
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
|
||
}
|
||
}
|
||
|
||
async fn delete_face_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
path: web::Path<i32>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("faces.delete", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
match dao.delete_face(&span_context, path.into_inner()) {
|
||
Ok(true) => HttpResponse::NoContent().finish(),
|
||
Ok(false) => HttpResponse::NotFound().finish(),
|
||
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
|
||
}
|
||
}
|
||
|
||
// ── Persons ─────────────────────────────────────────────────────────────────
|
||
|
||
async fn list_persons_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
app_state: web::Data<AppState>,
|
||
query: web::Query<ListPersonsQuery>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("persons.list", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
|
||
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
||
.ok()
|
||
.flatten()
|
||
.map(|l| l.id);
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
dao.list_persons(&span_context, library_id, query.include_ignored)
|
||
.map(|p| HttpResponse::Ok().json(p))
|
||
.into_http_internal_err()
|
||
}
|
||
|
||
async fn ignore_bucket_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("persons.ignore_bucket", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
dao.get_or_create_ignored_person(&span_context)
|
||
.map(|p| HttpResponse::Ok().json(p))
|
||
.into_http_internal_err()
|
||
}
|
||
|
||
async fn create_person_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
body: web::Json<CreatePersonReq>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("persons.create", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
if body.name.trim().is_empty() {
|
||
return HttpResponse::BadRequest().body("name required");
|
||
}
|
||
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
match dao.create_person(&span_context, &body, /*from_tag*/ false) {
|
||
Ok(p) => HttpResponse::Created().json(p),
|
||
Err(e) => {
|
||
// SQLite UNIQUE(name COLLATE NOCASE) → 409 Conflict so the UI
|
||
// can show "name already exists" without parsing. Use {:#} to
|
||
// include the source chain — anyhow's plain Display only shows
|
||
// the outermost context ("insert person ...") which hides the
|
||
// diesel "UNIQUE constraint failed" we're keying on.
|
||
if is_unique_violation(&e) {
|
||
HttpResponse::Conflict().body("person name already exists")
|
||
} else {
|
||
HttpResponse::InternalServerError().body(format!("{:#}", e))
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
async fn get_person_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
path: web::Path<i32>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("persons.get", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
match dao.get_person(&span_context, path.into_inner()) {
|
||
Ok(Some(p)) => HttpResponse::Ok().json(p),
|
||
Ok(None) => HttpResponse::NotFound().finish(),
|
||
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
|
||
}
|
||
}
|
||
|
||
async fn update_person_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
path: web::Path<i32>,
|
||
body: web::Json<UpdatePersonReq>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("persons.update", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
match dao.update_person(&span_context, path.into_inner(), &body) {
|
||
Ok(p) => HttpResponse::Ok().json(p),
|
||
Err(e) => {
|
||
if is_unique_violation(&e) {
|
||
HttpResponse::Conflict().body("person name already exists")
|
||
} else {
|
||
HttpResponse::InternalServerError().body(format!("{:#}", e))
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
async fn delete_person_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
path: web::Path<i32>,
|
||
query: web::Query<DeletePersonQuery>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("persons.delete", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
// Default cascade=set_null — don't destroy face history just because
|
||
// the user renamed/removed the identity.
|
||
let cascade = matches!(query.cascade.as_deref(), Some("delete"));
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
match dao.delete_person(&span_context, path.into_inner(), cascade) {
|
||
Ok(true) => HttpResponse::NoContent().finish(),
|
||
Ok(false) => HttpResponse::NotFound().finish(),
|
||
Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
|
||
}
|
||
}
|
||
|
||
async fn merge_persons_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
path: web::Path<i32>,
|
||
body: web::Json<MergePersonsReq>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("persons.merge", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
let src = path.into_inner();
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
match dao.merge_persons(&span_context, src, body.into) {
|
||
Ok(p) => HttpResponse::Ok().json(p),
|
||
Err(e) => {
|
||
let msg = format!("{:#}", e);
|
||
if msg.contains("itself") {
|
||
HttpResponse::BadRequest().body(msg)
|
||
} else {
|
||
HttpResponse::InternalServerError().body(msg)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
async fn person_faces_handler<D: FaceDao>(
|
||
_: Claims,
|
||
request: HttpRequest,
|
||
path: web::Path<i32>,
|
||
app_state: web::Data<AppState>,
|
||
query: web::Query<LibraryQuery>,
|
||
face_dao: web::Data<Mutex<D>>,
|
||
) -> impl Responder {
|
||
let context = extract_context_from_request(&request);
|
||
let span = global_tracer().start_with_context("persons.faces", &context);
|
||
let span_context = opentelemetry::Context::current_with_span(span);
|
||
let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
|
||
.ok()
|
||
.flatten()
|
||
.map(|l| l.id);
|
||
let mut dao = face_dao.lock().expect("face dao lock");
|
||
dao.list_for_person(&span_context, path.into_inner(), library_id)
|
||
.map(|faces| HttpResponse::Ok().json(faces))
|
||
.into_http_internal_err()
|
||
}
|
||
|
||
// ── Helpers ─────────────────────────────────────────────────────────────────
|
||
|
||
/// Crop `abs_path` to the normalized bbox and re-encode as JPEG for the
|
||
/// face service. `image::open` decodes most photo formats Apollo will see;
|
||
/// HEIC/RAW are out of scope for the manual flow (the user can't draw a
|
||
/// face on a thumbnail of a non-decodable file anyway).
|
||
fn crop_image_to_bbox(
|
||
abs_path: &std::path::Path,
|
||
nx: f32,
|
||
ny: f32,
|
||
nw: f32,
|
||
nh: f32,
|
||
) -> anyhow::Result<Vec<u8>> {
|
||
if !(0.0..=1.0).contains(&nx) || !(0.0..=1.0).contains(&ny) {
|
||
return Err(anyhow!("bbox xy out of [0,1]"));
|
||
}
|
||
if nw <= 0.0 || nh <= 0.0 || nx + nw > 1.001 || ny + nh > 1.001 {
|
||
return Err(anyhow!("bbox wh out of bounds or zero"));
|
||
}
|
||
let raw = image::open(abs_path).with_context(|| format!("open {:?}", abs_path))?;
|
||
// EXIF rotation: the bbox arrives in display space (the carousel /
|
||
// overlay are rendered post-rotation by the browser), but the
|
||
// `image` crate hands us raw pre-rotation pixels. For any phone
|
||
// photo with Orientation 6/8/etc., applying the bbox without
|
||
// rotating first lands the crop on a completely different region
|
||
// of the image — which is why manually-drawn bboxes basically
|
||
// never resolved a face on re-detection. Apply the orientation
|
||
// first, then index into the canonical-oriented dims. Photos with
|
||
// no EXIF rotation tag pay nothing (apply_orientation is a no-op).
|
||
let orientation = exif::read_orientation(abs_path).unwrap_or(1);
|
||
let img = exif::apply_orientation(raw, orientation);
|
||
let (w, h) = img.dimensions();
|
||
let px = (nx * w as f32).round().clamp(0.0, w as f32 - 1.0) as u32;
|
||
let py = (ny * h as f32).round().clamp(0.0, h as f32 - 1.0) as u32;
|
||
let pw = ((nw * w as f32).round() as u32).min(w.saturating_sub(px));
|
||
let ph = ((nh * h as f32).round() as u32).min(h.saturating_sub(py));
|
||
if pw == 0 || ph == 0 {
|
||
return Err(anyhow!("crop produced zero-dim image"));
|
||
}
|
||
// Generous padding so RetinaFace has anchor-friendly context.
|
||
// Insightface internally resizes to det_size=640 (square). A
|
||
// tightly-drawn 200×250 face bbox + 10 % padding becomes ~240×300,
|
||
// which after resize fills ~95 % of the input — near the upper
|
||
// edge of RetinaFace's anchor scales, where it routinely returns
|
||
// zero detections. Padding to 50 % on each side makes the crop
|
||
// 2× the bbox dims (face occupies ~50 % of the input), where
|
||
// anchors hit cleanly. Bbox is clamped to image bounds, so
|
||
// edge-of-image bboxes just get less padding on the clipped side.
|
||
let pad_x = (pw / 2).max(1);
|
||
let pad_y = (ph / 2).max(1);
|
||
let cx = px.saturating_sub(pad_x);
|
||
let cy = py.saturating_sub(pad_y);
|
||
let cw = (pw + 2 * pad_x).min(w - cx);
|
||
let ch = (ph + 2 * pad_y).min(h - cy);
|
||
let cropped = img.crop_imm(cx, cy, cw, ch);
|
||
let mut out = std::io::Cursor::new(Vec::new());
|
||
cropped
|
||
.write_to(&mut out, image::ImageFormat::Jpeg)
|
||
.with_context(|| "encode crop as JPEG")?;
|
||
Ok(out.into_inner())
|
||
}
|
||
|
||
/// Returns true if `err` (or anything in its source chain) is a SQLite
|
||
/// `UNIQUE constraint failed`. Walks the chain so callers don't have to
|
||
/// know the wrapping order — anyhow `with_context` plus diesel's own
|
||
/// error layering buries the database error two levels deep.
|
||
///
|
||
/// String matching on `format!("{:#}", e)` would also work but is
|
||
/// fragile (locale-dependent SQLite messages, false positives like
|
||
/// "uniquely identifies"). Downcasting to the actual diesel kind is
|
||
/// the contract-stable check.
|
||
fn is_unique_violation(err: &anyhow::Error) -> bool {
|
||
use diesel::result::{DatabaseErrorKind, Error as DieselError};
|
||
err.chain().any(|cause| {
|
||
cause.downcast_ref::<DieselError>().is_some_and(|de| {
|
||
matches!(
|
||
de,
|
||
DieselError::DatabaseError(DatabaseErrorKind::UniqueViolation, _)
|
||
)
|
||
})
|
||
})
|
||
}
|
||
|
||
// ── Tests ───────────────────────────────────────────────────────────────────
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
use crate::database::test::in_memory_db_connection;
|
||
|
||
fn fresh_dao() -> SqliteFaceDao {
|
||
SqliteFaceDao::from_connection(Arc::new(Mutex::new(in_memory_db_connection())))
|
||
}
|
||
|
||
fn ctx() -> opentelemetry::Context {
|
||
opentelemetry::Context::current()
|
||
}
|
||
|
||
#[test]
|
||
fn is_unique_violation_walks_chain() {
|
||
// The bug we hit in manual testing: anyhow's plain Display only
|
||
// shows the outermost context ("insert person Cameron"), so a
|
||
// naive `format!("{}", e).contains("unique")` check misses the
|
||
// diesel UNIQUE error nested below. Downcasting the source chain
|
||
// is the stable contract.
|
||
let mut dao = fresh_dao();
|
||
let _ = dao
|
||
.create_person(
|
||
&ctx(),
|
||
&CreatePersonReq {
|
||
name: "Cameron".into(),
|
||
notes: None,
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
false,
|
||
)
|
||
.expect("first insert");
|
||
let dup_err = dao
|
||
.create_person(
|
||
&ctx(),
|
||
&CreatePersonReq {
|
||
name: "Cameron".into(),
|
||
notes: None,
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
false,
|
||
)
|
||
.expect_err("second insert must fail");
|
||
|
||
// Plain Display hides the UNIQUE — that's the bug we're guarding
|
||
// against. We don't assert a specific outer message; we just
|
||
// confirm string-matching at the top level is unreliable.
|
||
let plain = format!("{}", dup_err);
|
||
assert!(
|
||
!plain.to_lowercase().contains("unique"),
|
||
"if Display starts surfacing UNIQUE we can drop the helper, but \
|
||
today it doesn't and the handler must downcast"
|
||
);
|
||
|
||
// Alt-Display walks the chain — useful for debug body content too.
|
||
let chained = format!("{:#}", dup_err);
|
||
assert!(
|
||
chained.to_uppercase().contains("UNIQUE"),
|
||
"chained display must surface the diesel error: {chained}"
|
||
);
|
||
|
||
// The contract-stable check the handler actually uses.
|
||
assert!(
|
||
is_unique_violation(&dup_err),
|
||
"is_unique_violation must downcast into the diesel chain"
|
||
);
|
||
}
|
||
|
||
// ── Phase 4: bootstrap heuristic + cosine + DAO support ─────────────
|
||
|
||
#[test]
|
||
fn is_plausible_name_token_filters_short_and_emoji() {
|
||
// Hard filter applied before grouping — emojis and tags shorter
|
||
// than 3 chars never make it into the candidate list, regardless
|
||
// of looks_like_person's later assessment.
|
||
assert!(is_plausible_name_token("Cameron"));
|
||
assert!(is_plausible_name_token("Sarah Smith"));
|
||
assert!(is_plausible_name_token("O'Brien"));
|
||
assert!(is_plausible_name_token("Jean-Luc"));
|
||
assert!(is_plausible_name_token("St. James"));
|
||
assert!(is_plausible_name_token("Renée"));
|
||
assert!(is_plausible_name_token("José"));
|
||
// Asian script names — the alphabetic/letter check covers any
|
||
// script, not just Latin.
|
||
assert!(is_plausible_name_token("田中太郎"));
|
||
|
||
// Below the 3-character floor.
|
||
assert!(!is_plausible_name_token(""));
|
||
assert!(!is_plausible_name_token(" "));
|
||
assert!(!is_plausible_name_token("Bo"));
|
||
assert!(!is_plausible_name_token("AB"));
|
||
// Trim before counting — surrounding whitespace doesn't count.
|
||
assert!(!is_plausible_name_token(" AB "));
|
||
|
||
// Emoji / symbol classes get the whole tag dropped.
|
||
assert!(!is_plausible_name_token("🐱cat"));
|
||
assert!(!is_plausible_name_token("Heart ❤"));
|
||
assert!(!is_plausible_name_token("📸Photo"));
|
||
assert!(!is_plausible_name_token("→ Trip"));
|
||
assert!(!is_plausible_name_token("★Vacation"));
|
||
|
||
// Digits are kept (handled by looks_like_person, not here).
|
||
assert!(is_plausible_name_token("Trip 2018"));
|
||
assert!(is_plausible_name_token("2024"));
|
||
}
|
||
|
||
#[test]
|
||
fn looks_like_person_accepts_typical_names() {
|
||
assert!(looks_like_person("Cameron"));
|
||
assert!(looks_like_person("Sarah Smith"));
|
||
assert!(looks_like_person("Mary Jane"));
|
||
// Non-ASCII title-cased single word still counts.
|
||
assert!(looks_like_person("Renée"));
|
||
}
|
||
|
||
#[test]
|
||
fn looks_like_person_rejects_obvious_non_people() {
|
||
// Digits, lowercase, three-or-more words, denylist hits.
|
||
assert!(!looks_like_person("2018"));
|
||
assert!(!looks_like_person("Trip 2018"));
|
||
assert!(!looks_like_person("trip"));
|
||
assert!(!looks_like_person("Birthday Party Cake"));
|
||
assert!(!looks_like_person("cat"));
|
||
assert!(!looks_like_person("Cat")); // denied even when title-cased
|
||
assert!(!looks_like_person("Christmas"));
|
||
assert!(!looks_like_person("home"));
|
||
assert!(!looks_like_person(""));
|
||
assert!(!looks_like_person(" "));
|
||
}
|
||
|
||
#[test]
|
||
fn looks_like_person_two_words_skips_denylist() {
|
||
// Two-word names get a pass on the single-word denylist —
|
||
// "Sunset Walk" is much more likely a real album than a person,
|
||
// but false-accepting is fine because the operator confirms.
|
||
// What matters is we don't false-reject "Sarah Smith".
|
||
assert!(looks_like_person("Sunset Walk"));
|
||
assert!(looks_like_person("Sarah Smith"));
|
||
}
|
||
|
||
#[test]
|
||
fn cosine_similarity_known_vectors() {
|
||
// Identical vectors → 1.0; orthogonal → 0.0; opposite → -1.0.
|
||
let a = vec![1.0, 0.0, 0.0];
|
||
let b = vec![1.0, 0.0, 0.0];
|
||
let c = vec![0.0, 1.0, 0.0];
|
||
let d = vec![-1.0, 0.0, 0.0];
|
||
assert!((cosine_similarity(&a, &b) - 1.0).abs() < 1e-6);
|
||
assert!(cosine_similarity(&a, &c).abs() < 1e-6);
|
||
assert!((cosine_similarity(&a, &d) - (-1.0)).abs() < 1e-6);
|
||
// Mismatched length → 0.0 (defensive, not NaN).
|
||
assert_eq!(cosine_similarity(&a, &[1.0, 0.0]), 0.0);
|
||
// Empty input → 0.0.
|
||
assert_eq!(cosine_similarity(&[], &[]), 0.0);
|
||
// Zero vector → 0.0 (denominator guard, not NaN).
|
||
let zero = vec![0.0, 0.0, 0.0];
|
||
assert_eq!(cosine_similarity(&a, &zero), 0.0);
|
||
}
|
||
|
||
#[test]
|
||
fn decode_embedding_bytes_round_trip() {
|
||
// 512×f32 LE = 2048 bytes. Anything else returns None.
|
||
let v: Vec<f32> = (0..512).map(|i| i as f32 * 0.001).collect();
|
||
let mut bytes = Vec::with_capacity(2048);
|
||
for f in &v {
|
||
bytes.extend_from_slice(&f.to_le_bytes());
|
||
}
|
||
let decoded = decode_embedding_bytes(&bytes).expect("decode");
|
||
assert_eq!(decoded.len(), 512);
|
||
for (a, b) in v.iter().zip(decoded.iter()) {
|
||
assert!((a - b).abs() < 1e-9);
|
||
}
|
||
assert_eq!(decode_embedding_bytes(&[0u8; 100]), None);
|
||
assert_eq!(decode_embedding_bytes(&[0u8; 4096]), None);
|
||
}
|
||
|
||
#[test]
|
||
fn find_persons_by_names_ci_groups_case() {
|
||
let mut dao = fresh_dao();
|
||
let _ = dao
|
||
.create_person(
|
||
&ctx(),
|
||
&CreatePersonReq {
|
||
name: "Alice".into(),
|
||
notes: None,
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
false,
|
||
)
|
||
.unwrap();
|
||
let _ = dao
|
||
.create_person(
|
||
&ctx(),
|
||
&CreatePersonReq {
|
||
name: "Bob".into(),
|
||
notes: None,
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
false,
|
||
)
|
||
.unwrap();
|
||
|
||
// Mix of cases + a name that has no person row.
|
||
let m = dao
|
||
.find_persons_by_names_ci(&ctx(), &["alice".into(), "BOB".into(), "charlie".into()])
|
||
.expect("lookup");
|
||
assert!(m.contains_key("alice"));
|
||
assert!(m.contains_key("bob"));
|
||
assert!(!m.contains_key("charlie"));
|
||
// Empty input is a no-op (don't fire a SQL with zero binds).
|
||
assert!(
|
||
dao.find_persons_by_names_ci(&ctx(), &[])
|
||
.unwrap()
|
||
.is_empty()
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn person_reference_embedding_filters_by_model_version() {
|
||
// A person with embeddings from buffalo_l shouldn't have its
|
||
// reference contaminated by a future buffalo_xl row. The auto-
|
||
// bind path passes the candidate's model_version so old rows
|
||
// never reach the average.
|
||
let mut dao = fresh_dao();
|
||
diesel::sql_query(
|
||
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
|
||
VALUES (1, 'main', '/tmp', 0)",
|
||
)
|
||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||
.expect("seed libraries");
|
||
let p = dao
|
||
.create_person(
|
||
&ctx(),
|
||
&CreatePersonReq {
|
||
name: "Subject".into(),
|
||
notes: None,
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
false,
|
||
)
|
||
.unwrap();
|
||
|
||
// 512-d unit vector along axis 0, written for buffalo_l.
|
||
let mut emb_l: Vec<f32> = vec![0.0; 512];
|
||
emb_l[0] = 1.0;
|
||
let mut emb_l_bytes = Vec::with_capacity(2048);
|
||
for f in &emb_l {
|
||
emb_l_bytes.extend_from_slice(&f.to_le_bytes());
|
||
}
|
||
// 512-d unit vector along axis 1, written for some-other model.
|
||
let mut emb_xl: Vec<f32> = vec![0.0; 512];
|
||
emb_xl[1] = 1.0;
|
||
let mut emb_xl_bytes = Vec::with_capacity(2048);
|
||
for f in &emb_xl {
|
||
emb_xl_bytes.extend_from_slice(&f.to_le_bytes());
|
||
}
|
||
|
||
for (bytes, mv) in [(emb_l_bytes, "buffalo_l"), (emb_xl_bytes, "buffalo_xl")] {
|
||
let _ = dao
|
||
.store_detection(
|
||
&ctx(),
|
||
InsertFaceDetectionInput {
|
||
library_id: 1,
|
||
content_hash: format!("h-{mv}"),
|
||
rel_path: format!("p-{mv}.jpg"),
|
||
bbox: Some((0.1, 0.1, 0.2, 0.2)),
|
||
embedding: Some(bytes),
|
||
confidence: Some(0.9),
|
||
source: "auto".into(),
|
||
person_id: Some(p.id),
|
||
status: "detected".into(),
|
||
model_version: mv.into(),
|
||
},
|
||
)
|
||
.unwrap();
|
||
}
|
||
|
||
let ref_l = dao
|
||
.person_reference_embedding(&ctx(), p.id, "buffalo_l")
|
||
.unwrap()
|
||
.expect("buffalo_l ref");
|
||
// Reference for buffalo_l should match emb_l (axis-0 unit).
|
||
assert!((ref_l[0] - 1.0).abs() < 1e-5, "axis 0 should be ~1.0");
|
||
assert!(ref_l[1].abs() < 1e-5, "axis 1 should be ~0.0");
|
||
|
||
// Unknown model_version → None, not a cross-version average.
|
||
assert!(
|
||
dao.person_reference_embedding(&ctx(), p.id, "buffalo_xxxl")
|
||
.unwrap()
|
||
.is_none()
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn assign_face_to_person_sets_cover_when_unset() {
|
||
let mut dao = fresh_dao();
|
||
diesel::sql_query(
|
||
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
|
||
VALUES (1, 'main', '/tmp', 0)",
|
||
)
|
||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||
.expect("seed libraries");
|
||
let p = dao
|
||
.create_person(
|
||
&ctx(),
|
||
&CreatePersonReq {
|
||
name: "Cover".into(),
|
||
notes: None,
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
false,
|
||
)
|
||
.unwrap();
|
||
assert!(p.cover_face_id.is_none());
|
||
|
||
// Insert two faces unbound.
|
||
let face1 = dao
|
||
.store_detection(
|
||
&ctx(),
|
||
InsertFaceDetectionInput {
|
||
library_id: 1,
|
||
content_hash: "h1".into(),
|
||
rel_path: "p1.jpg".into(),
|
||
bbox: Some((0.1, 0.1, 0.2, 0.2)),
|
||
embedding: Some(vec![0u8; 2048]),
|
||
confidence: Some(0.9),
|
||
source: "auto".into(),
|
||
person_id: None,
|
||
status: "detected".into(),
|
||
model_version: "buffalo_l".into(),
|
||
},
|
||
)
|
||
.unwrap();
|
||
let face2 = dao
|
||
.store_detection(
|
||
&ctx(),
|
||
InsertFaceDetectionInput {
|
||
library_id: 1,
|
||
content_hash: "h2".into(),
|
||
rel_path: "p2.jpg".into(),
|
||
bbox: Some((0.1, 0.1, 0.2, 0.2)),
|
||
embedding: Some(vec![0u8; 2048]),
|
||
confidence: Some(0.9),
|
||
source: "auto".into(),
|
||
person_id: None,
|
||
status: "detected".into(),
|
||
model_version: "buffalo_l".into(),
|
||
},
|
||
)
|
||
.unwrap();
|
||
|
||
// First assignment claims the cover.
|
||
dao.assign_face_to_person(&ctx(), face1.id, p.id).unwrap();
|
||
let p_after_first = dao.get_person(&ctx(), p.id).unwrap().unwrap();
|
||
assert_eq!(p_after_first.cover_face_id, Some(face1.id));
|
||
|
||
// Second assignment must NOT overwrite — operator may have
|
||
// hand-picked the cover after the first auto-bind.
|
||
dao.assign_face_to_person(&ctx(), face2.id, p.id).unwrap();
|
||
let p_after_second = dao.get_person(&ctx(), p.id).unwrap().unwrap();
|
||
assert_eq!(
|
||
p_after_second.cover_face_id,
|
||
Some(face1.id),
|
||
"cover must remain face1 after second auto-bind"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn person_crud_roundtrip() {
|
||
let mut dao = fresh_dao();
|
||
let p = dao
|
||
.create_person(
|
||
&ctx(),
|
||
&CreatePersonReq {
|
||
name: "Alice".into(),
|
||
notes: Some("the boss".into()),
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
false,
|
||
)
|
||
.expect("create person");
|
||
assert_eq!(p.name, "Alice");
|
||
assert_eq!(p.notes.as_deref(), Some("the boss"));
|
||
assert!(!p.created_from_tag);
|
||
|
||
// Case-insensitive uniqueness — second create with same name in
|
||
// different case must fail with a UNIQUE violation, surfacing
|
||
// as 409 Conflict at the handler layer.
|
||
let dup = dao.create_person(
|
||
&ctx(),
|
||
&CreatePersonReq {
|
||
name: "alice".into(),
|
||
notes: None,
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
false,
|
||
);
|
||
assert!(dup.is_err(), "case-insensitive UNIQUE must reject 'alice'");
|
||
|
||
// Update notes; verify updated_at moves forward.
|
||
let prev_updated = p.updated_at;
|
||
std::thread::sleep(std::time::Duration::from_millis(1100)); // boundary cross
|
||
let updated = dao
|
||
.update_person(
|
||
&ctx(),
|
||
p.id,
|
||
&UpdatePersonReq {
|
||
name: None,
|
||
notes: Some("a new note".into()),
|
||
cover_face_id: None,
|
||
entity_id: None,
|
||
is_ignored: None,
|
||
},
|
||
)
|
||
.expect("update");
|
||
assert_eq!(updated.notes.as_deref(), Some("a new note"));
|
||
assert!(updated.updated_at >= prev_updated);
|
||
|
||
// List + delete.
|
||
let listed = dao.list_persons(&ctx(), None, false).expect("list");
|
||
assert_eq!(listed.len(), 1);
|
||
assert_eq!(listed[0].face_count, 0);
|
||
assert!(dao.delete_person(&ctx(), p.id, false).expect("delete"));
|
||
assert!(
|
||
dao.list_persons(&ctx(), None, false)
|
||
.expect("list")
|
||
.is_empty()
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn ignore_bucket_idempotent_and_filters_auto_bind() {
|
||
// First call creates the bucket; second returns the same row.
|
||
// Once it exists, find_persons_by_names_ci must skip it even if
|
||
// the search term matches its name — the auto-bind path must
|
||
// NEVER target the IGNORE/junk bucket.
|
||
let mut dao = fresh_dao();
|
||
let first = dao
|
||
.get_or_create_ignored_person(&ctx())
|
||
.expect("create bucket");
|
||
assert!(first.is_ignored);
|
||
let second = dao
|
||
.get_or_create_ignored_person(&ctx())
|
||
.expect("re-fetch bucket");
|
||
assert_eq!(first.id, second.id, "bucket must be idempotent");
|
||
|
||
// Searching by the bucket's name must return nothing — the
|
||
// auto-bind look-up filters is_ignored=true.
|
||
let m = dao
|
||
.find_persons_by_names_ci(&ctx(), &["ignored".into()])
|
||
.expect("name lookup");
|
||
assert!(
|
||
!m.contains_key("ignored"),
|
||
"find_persons_by_names_ci must skip the ignore bucket: {m:?}"
|
||
);
|
||
|
||
// Default list_persons hides it; include_ignored=true surfaces it.
|
||
let visible = dao.list_persons(&ctx(), None, false).expect("list");
|
||
assert!(visible.iter().all(|p| !p.is_ignored));
|
||
let all = dao.list_persons(&ctx(), None, true).expect("list all");
|
||
assert!(all.iter().any(|p| p.is_ignored && p.id == first.id));
|
||
}
|
||
|
||
#[test]
|
||
fn marker_rows_idempotent() {
|
||
let mut dao = fresh_dao();
|
||
// Need a libraries row to satisfy face_detections.library_id FK
|
||
// without DEFERRED — SQLite enforces FK immediately by default.
|
||
// The :memory: DB already has the libraries seed via
|
||
// seed_or_patch_from_env? No — in_memory_db_connection just runs
|
||
// migrations; the libraries seed is a runtime path. Insert one
|
||
// manually for the test.
|
||
// Migrations may seed libraries(id=1); INSERT OR IGNORE keeps the
|
||
// test runnable either way.
|
||
diesel::sql_query(
|
||
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
|
||
VALUES (1, 'main', '/tmp', 0)",
|
||
)
|
||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||
.expect("seed libraries");
|
||
|
||
// Marker insert.
|
||
dao.mark_status(&ctx(), 1, "abc123", "x.jpg", "no_faces", "buffalo_l")
|
||
.expect("first mark");
|
||
assert!(
|
||
dao.already_scanned(&ctx(), "abc123").expect("scan"),
|
||
"already_scanned should report true after marker"
|
||
);
|
||
|
||
// Second mark for the same hash is a no-op (the partial UNIQUE
|
||
// index would otherwise reject; the DAO short-circuits before the
|
||
// insert).
|
||
dao.mark_status(&ctx(), 1, "abc123", "x.jpg", "no_faces", "buffalo_l")
|
||
.expect("second mark idempotent");
|
||
|
||
// Stats reflect the no_faces marker.
|
||
let stats = dao.stats(&ctx(), Some(1)).expect("stats");
|
||
assert_eq!(stats.no_faces, 1);
|
||
assert_eq!(stats.scanned, 1);
|
||
assert_eq!(stats.with_faces, 0);
|
||
}
|
||
|
||
#[test]
|
||
fn stats_total_photos_excludes_videos() {
|
||
// SCANNED counts content_hashes in face_detections; total_photos
|
||
// must apply the same image-extension filter as the watcher
|
||
// backlog query so the percentage can reach 100%. Without this,
|
||
// videos sit in image_exif but never produce a face_detections
|
||
// row (Apollo decodes images only) and the bar caps below 100%.
|
||
let mut dao = fresh_dao();
|
||
diesel::sql_query(
|
||
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
|
||
VALUES (1, 'main', '/tmp', 0)",
|
||
)
|
||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||
.expect("seed libraries");
|
||
|
||
diesel::sql_query(
|
||
"INSERT INTO image_exif \
|
||
(library_id, rel_path, content_hash, created_time, last_modified) VALUES \
|
||
(1, 'a.jpg', 'h-a', 0, 0), \
|
||
(1, 'b.JPEG', 'h-b', 0, 0), \
|
||
(1, 'movie.mp4', 'h-mp4', 0, 0), \
|
||
(1, 'clip.MOV', 'h-mov', 0, 0)",
|
||
)
|
||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||
.expect("seed image_exif");
|
||
|
||
let stats = dao.stats(&ctx(), Some(1)).expect("stats");
|
||
assert_eq!(
|
||
stats.total_photos, 2,
|
||
"videos should not count toward total"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn merge_persons_repoints_faces() {
|
||
let mut dao = fresh_dao();
|
||
// Migrations may seed libraries(id=1); INSERT OR IGNORE keeps the
|
||
// test runnable either way.
|
||
diesel::sql_query(
|
||
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
|
||
VALUES (1, 'main', '/tmp', 0)",
|
||
)
|
||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||
.expect("seed libraries");
|
||
|
||
let alice = dao
|
||
.create_person(
|
||
&ctx(),
|
||
&CreatePersonReq {
|
||
name: "Alice".into(),
|
||
notes: None,
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
false,
|
||
)
|
||
.unwrap();
|
||
let alyse = dao
|
||
.create_person(
|
||
&ctx(),
|
||
&CreatePersonReq {
|
||
name: "Alyse".into(),
|
||
notes: Some("dup of alice".into()),
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
false,
|
||
)
|
||
.unwrap();
|
||
|
||
// Insert a detected face row owned by `alyse`.
|
||
let _ = dao
|
||
.store_detection(
|
||
&ctx(),
|
||
InsertFaceDetectionInput {
|
||
library_id: 1,
|
||
content_hash: "h1".into(),
|
||
rel_path: "p1.jpg".into(),
|
||
bbox: Some((0.1, 0.1, 0.2, 0.2)),
|
||
embedding: Some(vec![0u8; 2048]),
|
||
confidence: Some(0.9),
|
||
source: "auto".into(),
|
||
person_id: Some(alyse.id),
|
||
status: "detected".into(),
|
||
model_version: "buffalo_l".into(),
|
||
},
|
||
)
|
||
.unwrap();
|
||
|
||
// Merge alyse → alice. Notes from src copy when target empty.
|
||
let merged = dao.merge_persons(&ctx(), alyse.id, alice.id).unwrap();
|
||
assert_eq!(merged.id, alice.id);
|
||
assert_eq!(merged.notes.as_deref(), Some("dup of alice"));
|
||
|
||
// alyse is gone.
|
||
assert!(dao.get_person(&ctx(), alyse.id).unwrap().is_none());
|
||
|
||
// The face is now alice's.
|
||
let faces = dao.list_for_person(&ctx(), alice.id, Some(1)).unwrap();
|
||
assert_eq!(faces.len(), 1);
|
||
assert_eq!(faces[0].person_id, Some(alice.id));
|
||
}
|
||
|
||
// ── crop_image_to_bbox ──────────────────────────────────────────────
|
||
// Pure helper used by the manual face-create handler. Generate a tiny
|
||
// image in memory, write it to a temp file, then exercise the bbox
|
||
// validation + crop math.
|
||
|
||
fn write_solid_image(w: u32, h: u32) -> tempfile::NamedTempFile {
|
||
let mut img = image::RgbImage::new(w, h);
|
||
for p in img.pixels_mut() {
|
||
*p = image::Rgb([200, 200, 200]);
|
||
}
|
||
let f = tempfile::Builder::new()
|
||
.suffix(".jpg")
|
||
.tempfile()
|
||
.expect("tempfile");
|
||
image::DynamicImage::ImageRgb8(img)
|
||
.save(f.path())
|
||
.expect("save jpg");
|
||
f
|
||
}
|
||
|
||
#[test]
|
||
fn crop_rejects_invalid_bbox() {
|
||
let f = write_solid_image(64, 64);
|
||
// x out of [0,1]
|
||
assert!(crop_image_to_bbox(f.path(), -0.1, 0.0, 0.5, 0.5).is_err());
|
||
assert!(crop_image_to_bbox(f.path(), 1.5, 0.0, 0.5, 0.5).is_err());
|
||
// zero / negative dimensions
|
||
assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.0, 0.5).is_err());
|
||
assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.5, -0.1).is_err());
|
||
// overflows the image
|
||
assert!(crop_image_to_bbox(f.path(), 0.7, 0.0, 0.5, 0.5).is_err());
|
||
}
|
||
|
||
#[test]
|
||
fn crop_returns_decodable_jpeg() {
|
||
let f = write_solid_image(200, 200);
|
||
let bytes = crop_image_to_bbox(f.path(), 0.25, 0.25, 0.5, 0.5).expect("center crop");
|
||
// Re-decode to confirm the pipeline produced a valid JPEG. Exact
|
||
// dimensions depend on the 10% padding clamp, so just assert
|
||
// sanity bounds rather than pinning numbers (padding math can
|
||
// legitimately drift if we tweak the heuristic later).
|
||
let img = image::load_from_memory(&bytes).expect("decode crop");
|
||
let (w, h) = (img.width(), img.height());
|
||
assert!((80..=200).contains(&w), "unexpected crop width: {w}");
|
||
assert!((80..=200).contains(&h), "unexpected crop height: {h}");
|
||
}
|
||
|
||
#[test]
|
||
fn crop_padding_clamps_to_image_bounds() {
|
||
// A bbox right at the corner should pad inward as far as it can,
|
||
// never outside the image — otherwise we'd pass invalid coords
|
||
// to the embedding service.
|
||
let f = write_solid_image(100, 100);
|
||
let bytes = crop_image_to_bbox(f.path(), 0.9, 0.9, 0.1, 0.1).expect("corner crop");
|
||
let img = image::load_from_memory(&bytes).expect("decode corner crop");
|
||
// Padded crop must fit within the source's 100x100.
|
||
assert!(img.width() <= 100);
|
||
assert!(img.height() <= 100);
|
||
assert!(img.width() > 0 && img.height() > 0);
|
||
}
|
||
|
||
// ── hydrate_face_with_person — PATCH/POST /image/faces response shape ──
|
||
|
||
fn seed_library_and_face(dao: &mut SqliteFaceDao, person_id: Option<i32>) -> FaceDetectionRow {
|
||
diesel::sql_query(
|
||
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
|
||
VALUES (1, 'main', '/tmp', 0)",
|
||
)
|
||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||
.expect("seed libraries");
|
||
dao.store_detection(
|
||
&ctx(),
|
||
InsertFaceDetectionInput {
|
||
library_id: 1,
|
||
content_hash: "h-hydrate".into(),
|
||
rel_path: "p.jpg".into(),
|
||
bbox: Some((0.1, 0.2, 0.3, 0.4)),
|
||
embedding: Some(vec![0u8; 2048]),
|
||
confidence: Some(0.9),
|
||
source: "manual".into(),
|
||
person_id,
|
||
status: "detected".into(),
|
||
model_version: "buffalo_l".into(),
|
||
},
|
||
)
|
||
.unwrap()
|
||
}
|
||
|
||
#[test]
|
||
fn hydrate_face_carries_person_name_when_assigned() {
|
||
// Regression guard for the bug where PATCH /image/faces/{id}
|
||
// returned a bare FaceDetectionRow (no person_name), causing
|
||
// the carousel overlay's optimistic replace to drop the VFD
|
||
// label off the bbox after every save. The handler hydrates
|
||
// via this helper; if anyone refactors the helper to skip the
|
||
// persons join, this test fails.
|
||
let mut dao = fresh_dao();
|
||
let p = dao
|
||
.create_person(
|
||
&ctx(),
|
||
&CreatePersonReq {
|
||
name: "Alice".into(),
|
||
notes: None,
|
||
entity_id: None,
|
||
is_ignored: false,
|
||
},
|
||
false,
|
||
)
|
||
.unwrap();
|
||
let row = seed_library_and_face(&mut dao, Some(p.id));
|
||
let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate assigned");
|
||
assert_eq!(joined.person_id, Some(p.id));
|
||
assert_eq!(joined.person_name.as_deref(), Some("Alice"));
|
||
// Bbox + confidence + source must round-trip — these are what
|
||
// the optimistic-replace also keys on.
|
||
assert!((joined.bbox_x - 0.1).abs() < 1e-6);
|
||
assert!((joined.bbox_y - 0.2).abs() < 1e-6);
|
||
assert!((joined.bbox_w - 0.3).abs() < 1e-6);
|
||
assert!((joined.bbox_h - 0.4).abs() < 1e-6);
|
||
assert_eq!(joined.source, "manual");
|
||
}
|
||
|
||
#[test]
|
||
fn hydrate_face_leaves_person_name_null_when_unassigned() {
|
||
// Mirror branch: an unassigned face must hydrate cleanly with
|
||
// person_name = None, not a stale value left over from a
|
||
// previously-assigned row's serialization.
|
||
let mut dao = fresh_dao();
|
||
let row = seed_library_and_face(&mut dao, None);
|
||
let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate unassigned");
|
||
assert!(joined.person_id.is_none());
|
||
assert!(joined.person_name.is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn list_unscanned_candidates_filters_to_hashed_unscanned_in_library() {
|
||
// The watcher's per-tick backlog drain depends on this query
|
||
// returning *only* image_exif rows with a populated
|
||
// content_hash and no matching face_detections row in the
|
||
// requested library. A regression here would either silently
|
||
// re-scan files (waste of inference) or skip files that need
|
||
// scanning (the symptom we just shipped a fix for).
|
||
let mut dao = fresh_dao();
|
||
diesel::sql_query(
|
||
"INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
|
||
VALUES (1, 'main', '/tmp', 0), (2, 'other', '/tmp2', 0)",
|
||
)
|
||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||
.expect("seed libraries");
|
||
|
||
// Seed image_exif: mix of hashed/unhashed/scanned/cross-library,
|
||
// plus a video and a mixed-case image extension. Videos register
|
||
// in image_exif but can never produce a face_detections row, so
|
||
// the SQL must filter them out — otherwise the per-tick backlog
|
||
// drain re-pulls them every tick (no marker is ever written, so
|
||
// they loop forever) and the SCANNED stat is permanently capped.
|
||
diesel::sql_query(
|
||
"INSERT INTO image_exif \
|
||
(library_id, rel_path, content_hash, created_time, last_modified) VALUES \
|
||
(1, 'a.jpg', 'h-a', 0, 0), \
|
||
(1, 'b.jpg', 'h-b', 0, 0), \
|
||
(1, 'c.jpg', NULL, 0, 0), \
|
||
(1, 'd.jpg', 'h-d', 0, 0), \
|
||
(1, 'movie.mp4', 'h-mp4', 0, 0), \
|
||
(1, 'clip.MOV', 'h-mov', 0, 0), \
|
||
(1, 'photo.JPG', 'h-jpg-upper', 0, 0), \
|
||
(2, 'e.jpg', 'h-e', 0, 0)",
|
||
)
|
||
.execute(dao.connection.lock().unwrap().deref_mut())
|
||
.expect("seed image_exif");
|
||
|
||
// 'b' has been scanned (no_faces marker) — expect it filtered out.
|
||
dao.mark_status(&ctx(), 1, "h-b", "b.jpg", "no_faces", "buffalo_l")
|
||
.expect("scanned marker");
|
||
|
||
let cands = dao
|
||
.list_unscanned_candidates(&ctx(), 1, 10)
|
||
.expect("list unscanned");
|
||
|
||
let hashes: std::collections::HashSet<_> = cands.iter().map(|(_, h)| h.clone()).collect();
|
||
|
||
// Should contain a, d, and the upper-case .JPG (image-extension
|
||
// match is case-insensitive).
|
||
assert!(hashes.contains("h-a"), "missing h-a: {:?}", hashes);
|
||
assert!(hashes.contains("h-d"), "missing h-d: {:?}", hashes);
|
||
assert!(
|
||
hashes.contains("h-jpg-upper"),
|
||
"missing h-jpg-upper: {:?}",
|
||
hashes
|
||
);
|
||
// Should NOT contain b (scanned), c (no hash), e (other library),
|
||
// or videos (mp4/mov are not image extensions).
|
||
assert!(!hashes.contains("h-b"), "expected h-b filtered (scanned)");
|
||
assert!(
|
||
!hashes.contains("h-e"),
|
||
"expected h-e filtered (other library)"
|
||
);
|
||
assert!(!hashes.contains("h-mp4"), "expected h-mp4 filtered (video)");
|
||
assert!(!hashes.contains("h-mov"), "expected h-mov filtered (video)");
|
||
assert_eq!(cands.len(), 3, "unexpected candidates: {:?}", cands);
|
||
}
|
||
}
|