ImageApi/src/faces.rs

//! Local face recognition: data layer + HTTP surface.
//!
//! Phase 2 ships the persistence model and the manual CRUD endpoints; the
//! file-watch hook that drives automatic detection lives in `process_new_files`
//! (Phase 3) and is not registered yet. Inference is delegated to Apollo over
//! HTTP via [`crate::ai::face_client`]; this module never imports onnxruntime.
//!
//! Data model:
//! - `persons` are visual identities (the "who" of a face).
//! - `face_detections` rows are either real detections (`status='detected'`)
//!   or markers (`status='no_faces' | 'failed'`). Both are keyed on
//!   `content_hash` so the same JPEG in two libraries is scanned once.
//! - The `(library_id, rel_path)` pair is the *display* lookup; we resolve
//!   it through `image_exif.content_hash` on every read so renames don't
//!   strand face rows.
//!
//! The `FaceDao` trait abstracts persistence; `SqliteFaceDao` is the
//! production impl. The Phase 2 endpoints use it directly. A test impl
//! (in-memory) lives at the bottom of the module behind `#[cfg(test)]`.

use crate::Claims;
use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError};
use crate::database::schema::{face_detections, image_exif, persons};
use crate::error::IntoHttpError;
use crate::exif;
use crate::file_types;
use crate::libraries::{self, Library};
use crate::otel::{extract_context_from_request, global_tracer, trace_db_call};
use crate::state::AppState;
use crate::utils::normalize_path;
use crate::{ThumbnailRequest, connect};
use actix_web::dev::{ServiceFactory, ServiceRequest};
use actix_web::{App, HttpRequest, HttpResponse, Responder, web};
use anyhow::{Context, anyhow};
use chrono::Utc;
use diesel::prelude::*;
use image::GenericImageView;
use log::{info, warn};
use opentelemetry::KeyValue;
use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
use serde::{Deserialize, Serialize};
use std::ops::DerefMut;
use std::sync::{Arc, Mutex};

// ── Wire types ──────────────────────────────────────────────────────────────

/// Visual identity. The optional `entity_id` bridges this person to an
/// LLM-extracted knowledge-graph entity (textual side). Persons are NOT
/// auto-bridged at creation — only when the user explicitly links them in
/// the management UI, or when bootstrap finds an exact-name match.
#[derive(Serialize, Queryable, Clone, Debug)]
pub struct Person {
    pub id: i32,
    pub name: String,
    pub cover_face_id: Option<i32>,
    pub entity_id: Option<i32>,
    pub created_from_tag: bool,
    pub notes: Option<String>,
    pub created_at: i64,
    pub updated_at: i64,
    /// True for the IGNORE / junk bucket. Hidden from the default
    /// persons list, skipped by `find_persons_by_names_ci` (so a tag
    /// match can never auto-bind a real face into the ignore bucket),
    /// and excluded from cluster suggestions because cluster-suggest
    /// already filters by `person_id IS NULL` and ignored faces have
    /// a non-null person_id.
    pub is_ignored: bool,
}

#[derive(Insertable, Debug)]
#[diesel(table_name = persons)]
struct InsertPerson {
    name: String,
    notes: Option<String>,
    created_from_tag: bool,
    is_ignored: bool,
    created_at: i64,
    updated_at: i64,
}

#[derive(Serialize, Queryable, Clone, Debug)]
pub struct FaceDetectionRow {
    pub id: i32,
    pub library_id: i32,
    pub content_hash: String,
    pub rel_path: String,
    pub bbox_x: Option<f32>,
    pub bbox_y: Option<f32>,
    pub bbox_w: Option<f32>,
    pub bbox_h: Option<f32>,
    /// Skip on the wire — clients call /faces/embeddings explicitly when
    /// they need it. Saves ~2 KB per face on every list response.
    #[serde(skip_serializing)]
    pub embedding: Option<Vec<u8>>,
    pub confidence: Option<f32>,
    pub source: String,
    pub person_id: Option<i32>,
    pub status: String,
    pub model_version: String,
    pub created_at: i64,
}

/// SQL fragment restricting an `image_exif.rel_path` (or `face_detections.rel_path`)
/// column to image extensions. Videos register in `image_exif` with a
/// populated `content_hash` but can never produce a `face_detections` row
/// — applying this filter at query time keeps videos out of the per-tick
/// backlog drain (which would otherwise loop forever — `filter_excluded`
/// drops them client-side without writing a marker) and out of the SCANNED
/// stat denominator (so 100% is reachable).
fn image_path_predicate(col: &str) -> String {
    let clauses: Vec<String> = file_types::IMAGE_EXTENSIONS
        .iter()
        .map(|ext| format!("lower({col}) LIKE '%.{ext}'"))
        .collect();
    format!("({})", clauses.join(" OR "))
}

/// Row shape for `list_unscanned_candidates`'s raw SQL. Diesel's
/// `sql_query` requires a `QueryableByName` row type with explicit
/// column SQL types; using a tuple isn't supported.
#[derive(diesel::QueryableByName, Debug)]
struct CountRow {
    #[diesel(sql_type = diesel::sql_types::BigInt)]
    count: i64,
}

#[derive(diesel::QueryableByName, Debug)]
struct UnscannedRow {
    #[diesel(sql_type = diesel::sql_types::Text)]
    rel_path: String,
    #[diesel(sql_type = diesel::sql_types::Text)]
    content_hash: String,
}

#[derive(Insertable, Debug)]
#[diesel(table_name = face_detections)]
struct InsertFaceDetection {
    library_id: i32,
    content_hash: String,
    rel_path: String,
    bbox_x: Option<f32>,
    bbox_y: Option<f32>,
    bbox_w: Option<f32>,
    bbox_h: Option<f32>,
    embedding: Option<Vec<u8>>,
    confidence: Option<f32>,
    source: String,
    person_id: Option<i32>,
    status: String,
    model_version: String,
    created_at: i64,
}

/// Build a [`FaceWithPerson`] from a freshly-mutated row by resolving the
/// person name via [`FaceDao::get_person`]. Used by `create_face_handler`
/// and `update_face_handler` so PATCH/POST responses match the join shape
/// `/image/faces` returns — without this the carousel overlay's
/// optimistic-replace would clobber the rendered name (the bare
/// [`FaceDetectionRow`] doesn't carry it).
fn hydrate_face_with_person<D: FaceDao>(
    dao: &mut D,
    ctx: &opentelemetry::Context,
    row: FaceDetectionRow,
) -> anyhow::Result<FaceWithPerson> {
    let person_name = match row.person_id {
        Some(pid) => dao.get_person(ctx, pid)?.map(|p| p.name),
        None => None,
    };
    Ok(FaceWithPerson {
        id: row.id,
        bbox_x: row.bbox_x.unwrap_or(0.0),
        bbox_y: row.bbox_y.unwrap_or(0.0),
        bbox_w: row.bbox_w.unwrap_or(0.0),
        bbox_h: row.bbox_h.unwrap_or(0.0),
        confidence: row.confidence.unwrap_or(0.0),
        source: row.source,
        person_id: row.person_id,
        person_name,
        model_version: row.model_version,
    })
}

/// Face row decorated with its assigned person's name. Returned by
/// `/image/faces` for the rendering side (carousel overlay, person chips).
#[derive(Serialize, Debug, Clone)]
pub struct FaceWithPerson {
    pub id: i32,
    pub bbox_x: f32,
    pub bbox_y: f32,
    pub bbox_w: f32,
    pub bbox_h: f32,
    pub confidence: f32,
    pub source: String,
    pub person_id: Option<i32>,
    pub person_name: Option<String>,
    pub model_version: String,
}

/// Face row plus the photo it lives on. Powers the per-person photo grid
/// (`GET /persons/{id}/faces`) and unassigned-cluster surfacing in Apollo.
#[derive(Serialize, Debug, Clone)]
pub struct FaceWithPath {
    pub id: i32,
    pub library_id: i32,
    pub rel_path: String,
    pub bbox_x: f32,
    pub bbox_y: f32,
    pub bbox_w: f32,
    pub bbox_h: f32,
    pub confidence: f32,
    pub person_id: Option<i32>,
    pub model_version: String,
}

/// Embedding-bearing face row. Returned by `/faces/embeddings` for Apollo's
/// clustering layer; embedding is base64-encoded so the JSON payload is
/// self-contained (Apollo's DBSCAN runs over numpy arrays decoded from this).
#[derive(Serialize, Debug, Clone)]
pub struct FaceEmbeddingRow {
    pub id: i32,
    pub library_id: i32,
    pub rel_path: String,
    pub content_hash: String,
    pub person_id: Option<i32>,
    pub model_version: String,
    /// base64 of 2048 bytes (512×f32 LE).
    pub embedding: String,
    /// Normalized bbox 0..1, included so the cluster suggester UI can
    /// crop a face thumbnail without an extra round-trip per cluster.
    /// Shouldn't be NULL for `status='detected'` rows (CHECK constraint
    /// in the migration), but the DB type is nullable so we mirror it.
    pub bbox_x: Option<f32>,
    pub bbox_y: Option<f32>,
    pub bbox_w: Option<f32>,
    pub bbox_h: Option<f32>,
}

#[derive(Serialize, Debug, Default)]
pub struct FaceStats {
    pub library_id: Option<i32>,
    pub total_photos: i64,
    pub scanned: i64,
    pub with_faces: i64,
    pub no_faces: i64,
    pub failed: i64,
    pub persons_count: i64,
    pub unassigned_faces: i64,
}

#[derive(Serialize, Debug, Clone)]
pub struct PersonSummary {
    pub id: i32,
    pub name: String,
    pub cover_face_id: Option<i32>,
    pub entity_id: Option<i32>,
    pub created_from_tag: bool,
    pub notes: Option<String>,
    pub is_ignored: bool,
    pub face_count: i64,
}

// ── Request bodies ──────────────────────────────────────────────────────────

#[derive(Deserialize, Debug)]
pub struct CreatePersonReq {
    pub name: String,
    #[serde(default)]
    pub notes: Option<String>,
    /// Optional bridge to an existing entity. NULL/missing leaves it
    /// unbridged; set explicitly to wire the person to LLM-extracted facts.
    #[serde(default)]
    pub entity_id: Option<i32>,
    /// True for the IGNORE / junk bucket. The frontend sets this when
    /// lazily creating the Ignored person via the dedicated endpoint;
    /// hand-rolled callers leave it false.
    #[serde(default)]
    pub is_ignored: bool,
}

#[derive(Deserialize, Debug)]
pub struct UpdatePersonReq {
    #[serde(default)]
    pub name: Option<String>,
    #[serde(default)]
    pub notes: Option<String>,
    #[serde(default)]
    pub cover_face_id: Option<i32>,
    #[serde(default)]
    pub entity_id: Option<i32>,
    /// Toggle the ignore flag. Mostly used by the UI to "un-ignore" a
    /// person that was previously bound to the bucket.
    #[serde(default)]
    pub is_ignored: Option<bool>,
}

#[derive(Deserialize, Debug)]
pub struct MergePersonsReq {
    /// Person id to merge *into*. The source (`{id}` in the path) is
    /// re-pointed to this id, then deleted.
    pub into: i32,
}

#[derive(Deserialize, Debug)]
pub struct DeletePersonQuery {
    /// `set_null` (default) leaves face rows orphaned (person_id NULL);
    /// `delete` cascades through and removes the face rows entirely.
    /// Default is set_null because deleting the person almost never means
    /// "delete every photo of them ever existed."
    #[serde(default)]
    pub cascade: Option<String>,
}

#[derive(Deserialize, Debug)]
pub struct CreateFaceReq {
    /// Photo path (library-relative). Resolved to content_hash via
    /// image_exif before any face row is inserted.
    pub path: String,
    pub library: Option<i32>,
    pub bbox: BboxReq,
    /// Optional initial person assignment. Use this when the user draws a
    /// box and immediately picks a name from the autocomplete.
    #[serde(default)]
    pub person_id: Option<i32>,
    /// Skip the embedding step. Set when the user wants to tag a region
    /// the detector can't find a face in (back of head, profile partly
    /// occluded, etc.). The row is stored with a zero-vector embedding,
    /// which the cluster suggester filters on `norm <= 0` and auto-bind
    /// cosine resolves to 0 against — so the row participates only as a
    /// browse-by-person tag, not in similarity matching. The frontend
    /// only sets this after a 422 from a strict create plus an explicit
    /// operator confirmation.
    #[serde(default)]
    pub force: bool,
}

#[derive(Deserialize, Debug)]
pub struct BboxReq {
    pub x: f32,
    pub y: f32,
    pub w: f32,
    pub h: f32,
}

#[derive(Deserialize, Debug)]
pub struct UpdateFaceReq {
    /// `null` literally clears the assignment; missing leaves it alone.
    /// Distinguish via `Option<Option<…>>` is tricky in serde without
    /// custom deserialization; encode "clear" as `clear_person: true`
    /// instead.
    #[serde(default)]
    pub person_id: Option<i32>,
    #[serde(default)]
    pub clear_person: bool,
    #[serde(default)]
    pub bbox: Option<BboxReq>,
}

#[derive(Deserialize, Debug)]
pub struct EmbeddingsQuery {
    pub library: Option<i32>,
    /// Default true — clustering only cares about unassigned faces. Set
    /// false to dump all embeddings (e.g. for re-clustering everything).
    #[serde(default = "default_unassigned")]
    pub unassigned: bool,
    #[serde(default = "default_embeddings_limit")]
    pub limit: i64,
    #[serde(default)]
    pub offset: i64,
}

fn default_unassigned() -> bool {
    true
}
fn default_embeddings_limit() -> i64 {
    500
}

// ── DAO trait ───────────────────────────────────────────────────────────────

// File-watch hook (Phase 3) and the rerun handler (Phase 6) consume the
// methods the Phase 2 routes don't. Allow dead_code on the trait so we
// don't have to sprinkle attributes on every method that's wired up later.
#[allow(dead_code)]
pub trait FaceDao: Send + Sync {
    fn already_scanned(
        &mut self,
        ctx: &opentelemetry::Context,
        content_hash: &str,
    ) -> anyhow::Result<bool>;
    /// Find image_exif rows in `library_id` that have a populated
    /// content_hash but no matching face_detections row yet. Used by
    /// the watcher's quick-scan path to drain the backlog without
    /// re-walking the filesystem. Returns `(rel_path, content_hash)`
    /// pairs, capped at `limit`. Distinct on content_hash so the same
    /// hash that lives at multiple rel_paths only fires one detection.
    fn list_unscanned_candidates(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: i32,
        limit: i64,
    ) -> anyhow::Result<Vec<(String, String)>>;
    fn store_detection(
        &mut self,
        ctx: &opentelemetry::Context,
        row: InsertFaceDetectionInput,
    ) -> anyhow::Result<FaceDetectionRow>;
    fn mark_status(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: i32,
        content_hash: &str,
        rel_path: &str,
        status: &str,
        model_version: &str,
    ) -> anyhow::Result<()>;
    fn list_for_content_hash(
        &mut self,
        ctx: &opentelemetry::Context,
        content_hash: &str,
    ) -> anyhow::Result<Vec<FaceWithPerson>>;
    fn list_for_person(
        &mut self,
        ctx: &opentelemetry::Context,
        person_id: i32,
        library_id: Option<i32>,
    ) -> anyhow::Result<Vec<FaceWithPath>>;
    fn list_embeddings(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: Option<i32>,
        unassigned: bool,
        limit: i64,
        offset: i64,
    ) -> anyhow::Result<Vec<(FaceDetectionRow, String)>>;
    fn get_face(
        &mut self,
        ctx: &opentelemetry::Context,
        id: i32,
    ) -> anyhow::Result<Option<FaceDetectionRow>>;
    fn update_face(
        &mut self,
        ctx: &opentelemetry::Context,
        id: i32,
        person_id: Option<Option<i32>>, // None=leave; Some(None)=clear; Some(Some(id))=set
        bbox: Option<(f32, f32, f32, f32)>,
        embedding: Option<Vec<u8>>,
    ) -> anyhow::Result<FaceDetectionRow>;
    fn delete_face(&mut self, ctx: &opentelemetry::Context, id: i32) -> anyhow::Result<bool>;
    fn delete_auto_for_hash(
        &mut self,
        ctx: &opentelemetry::Context,
        content_hash: &str,
    ) -> anyhow::Result<usize>;
    fn stats(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: Option<i32>,
    ) -> anyhow::Result<FaceStats>;

    // ── Persons ─────────────────────────────────────────────────────────
    fn create_person(
        &mut self,
        ctx: &opentelemetry::Context,
        req: &CreatePersonReq,
        from_tag: bool,
    ) -> anyhow::Result<Person>;
    fn get_person(
        &mut self,
        ctx: &opentelemetry::Context,
        id: i32,
    ) -> anyhow::Result<Option<Person>>;
    fn list_persons(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: Option<i32>,
        include_ignored: bool,
    ) -> anyhow::Result<Vec<PersonSummary>>;
    /// Get the IGNORE/junk bucket, creating it lazily on first call.
    /// Idempotent — returns the same row across calls. Single global
    /// bucket per database; the frontend never sees the literal name.
    fn get_or_create_ignored_person(
        &mut self,
        ctx: &opentelemetry::Context,
    ) -> anyhow::Result<Person>;
    fn update_person(
        &mut self,
        ctx: &opentelemetry::Context,
        id: i32,
        patch: &UpdatePersonReq,
    ) -> anyhow::Result<Person>;
    /// Delete a person. `cascade=true` removes face rows; otherwise the
    /// rows have their `person_id` set NULL by the FK constraint.
    fn delete_person(
        &mut self,
        ctx: &opentelemetry::Context,
        id: i32,
        cascade_delete_faces: bool,
    ) -> anyhow::Result<bool>;
    fn merge_persons(
        &mut self,
        ctx: &opentelemetry::Context,
        src: i32,
        into: i32,
    ) -> anyhow::Result<Person>;

    /// Resolve `(library_id, rel_path)` → `content_hash` via image_exif.
    /// Returns None when the photo hasn't been EXIF-indexed yet (no row
    /// in image_exif) or when the row exists but content_hash is NULL.
    fn resolve_content_hash(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: i32,
        rel_path: &str,
    ) -> anyhow::Result<Option<String>>;

    // ── Auto-bind support (Phase 4) ─────────────────────────────────────

    /// Map case-insensitive person names → person id. Used by the
    /// auto-bind path to look up "is this tag a known person?". Names
    /// passed in are matched LOWER(persons.name); collisions resolve to
    /// the person with the lowest id (stable, but the UNIQUE constraint
    /// on persons.name COLLATE NOCASE prevents collisions in practice).
    fn find_persons_by_names_ci(
        &mut self,
        ctx: &opentelemetry::Context,
        names: &[String],
    ) -> anyhow::Result<std::collections::HashMap<String, i32>>;

    /// Mean of a person's existing face embeddings. Returns the L2-
    /// normalized 512-d reference vector, or None when the person has
    /// no detected faces yet (auto-bind treats that as "first face wins
    /// unconditionally"). Filters by the same model_version that produced
    /// the candidate embedding so cross-model averaging never happens.
    fn person_reference_embedding(
        &mut self,
        ctx: &opentelemetry::Context,
        person_id: i32,
        model_version: &str,
    ) -> anyhow::Result<Option<Vec<f32>>>;

    /// Set face_detections.person_id and, when the target person has no
    /// cover_face_id yet, set it to this face. One transaction so a
    /// half-bound state can't survive a SQLite write error.
    fn assign_face_to_person(
        &mut self,
        ctx: &opentelemetry::Context,
        face_id: i32,
        person_id: i32,
    ) -> anyhow::Result<()>;
}

/// Free-standing input struct; the DAO copies it into [`InsertFaceDetection`]
/// so callers don't need to import the diesel-derived insertable.
#[derive(Debug, Clone)]
pub struct InsertFaceDetectionInput {
    pub library_id: i32,
    pub content_hash: String,
    pub rel_path: String,
    pub bbox: Option<(f32, f32, f32, f32)>,
    pub embedding: Option<Vec<u8>>,
    pub confidence: Option<f32>,
    pub source: String,
    pub person_id: Option<i32>,
    pub status: String,
    pub model_version: String,
}

// ── SqliteFaceDao impl ──────────────────────────────────────────────────────

pub struct SqliteFaceDao {
    connection: Arc<Mutex<SqliteConnection>>,
}

impl SqliteFaceDao {
    pub fn new() -> Self {
        Self {
            connection: Arc::new(Mutex::new(connect())),
        }
    }

    /// Test helper — bind to a pre-built (typically in-memory) connection.
    #[cfg(test)]
    pub fn from_connection(connection: Arc<Mutex<SqliteConnection>>) -> Self {
        Self { connection }
    }
}

impl Default for SqliteFaceDao {
    fn default() -> Self {
        Self::new()
    }
}

impl FaceDao for SqliteFaceDao {
    fn already_scanned(
        &mut self,
        ctx: &opentelemetry::Context,
        content_hash: &str,
    ) -> anyhow::Result<bool> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "face_already_scanned", |span| {
            span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
            face_detections::table
                .filter(face_detections::content_hash.eq(content_hash))
                .select(face_detections::id)
                .first::<i32>(conn.deref_mut())
                .optional()
                .map(|x| x.is_some())
                .with_context(|| "already_scanned query")
        })
    }

    fn list_unscanned_candidates(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: i32,
        limit: i64,
    ) -> anyhow::Result<Vec<(String, String)>> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "list_unscanned_candidates", |span| {
            span.set_attribute(KeyValue::new("library_id", library_id as i64));
            // Pick the smallest-id rel_path per content_hash so we don't
            // fire multiple detect calls for the same hash if it lives
            // under several rel_paths in the same library. The
            // anti-join (NOT EXISTS) drains hashes that have no row in
            // face_detections at all. The image-extension predicate
            // keeps videos out of the candidate set; without it they'd
            // be filtered client-side and re-pulled every tick forever
            // because no marker row is written for excluded paths.
            let ext_predicate = image_path_predicate("rel_path");
            let sql = format!(
                "SELECT rel_path, content_hash \
                 FROM image_exif e \
                 WHERE library_id = ? \
                   AND content_hash IS NOT NULL \
                   AND {ext_predicate} \
                   AND NOT EXISTS ( \
                     SELECT 1 FROM face_detections f \
                     WHERE f.content_hash = e.content_hash \
                   ) \
                 GROUP BY content_hash \
                 LIMIT ?"
            );
            let rows: Vec<(String, String)> = diesel::sql_query(sql)
                .bind::<diesel::sql_types::Integer, _>(library_id)
                .bind::<diesel::sql_types::BigInt, _>(limit)
                .load::<UnscannedRow>(conn.deref_mut())
                .with_context(|| "list_unscanned_candidates")?
                .into_iter()
                .map(|r| (r.rel_path, r.content_hash))
                .collect();
            Ok(rows)
        })
    }

    fn store_detection(
        &mut self,
        ctx: &opentelemetry::Context,
        row: InsertFaceDetectionInput,
    ) -> anyhow::Result<FaceDetectionRow> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "insert", "store_detection", |span| {
            span.set_attribute(KeyValue::new("status", row.status.clone()));
            span.set_attribute(KeyValue::new("source", row.source.clone()));
            let now = Utc::now().timestamp();
            let (bx, by, bw, bh) = match row.bbox {
                Some((x, y, w, h)) => (Some(x), Some(y), Some(w), Some(h)),
                None => (None, None, None, None),
            };
            let insert = InsertFaceDetection {
                library_id: row.library_id,
                content_hash: row.content_hash,
                rel_path: row.rel_path,
                bbox_x: bx,
                bbox_y: by,
                bbox_w: bw,
                bbox_h: bh,
                embedding: row.embedding,
                confidence: row.confidence,
                source: row.source,
                person_id: row.person_id,
                status: row.status,
                model_version: row.model_version,
                created_at: now,
            };
            diesel::insert_into(face_detections::table)
                .values(&insert)
                .execute(conn.deref_mut())
                .with_context(|| "insert face_detection")?;
            define_sql_function! { fn last_insert_rowid() -> diesel::sql_types::Integer; }
            let id = diesel::select(last_insert_rowid())
                .get_result::<i32>(conn.deref_mut())
                .with_context(|| "last_insert_rowid")?;
            face_detections::table
                .find(id)
                .first::<FaceDetectionRow>(conn.deref_mut())
                .with_context(|| "fetch inserted face")
        })
    }

    fn mark_status(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: i32,
        content_hash: &str,
        rel_path: &str,
        status: &str,
        model_version: &str,
    ) -> anyhow::Result<()> {
        // Marker rows have NULL bbox + NULL embedding (CHECK enforces
        // this). We let the UNIQUE partial index on (content_hash) WHERE
        // status='no_faces' guard against double-marking; for 'failed' we
        // do a manual exists-check.
        let exists = self.already_scanned(ctx, content_hash)?;
        if exists {
            // Don't write a second marker if any row already exists for
            // this hash — that includes detected rows from a prior run
            // that succeeded; the file watcher's already_scanned() check
            // should have caught this, but stay idempotent.
            return Ok(());
        }
        self.store_detection(
            ctx,
            InsertFaceDetectionInput {
                library_id,
                content_hash: content_hash.to_string(),
                rel_path: rel_path.to_string(),
                bbox: None,
                embedding: None,
                confidence: None,
                source: "auto".to_string(),
                person_id: None,
                status: status.to_string(),
                model_version: model_version.to_string(),
            },
        )?;
        Ok(())
    }

    fn list_for_content_hash(
        &mut self,
        ctx: &opentelemetry::Context,
        content_hash: &str,
    ) -> anyhow::Result<Vec<FaceWithPerson>> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "faces_for_hash", |span| {
            span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
            face_detections::table
                .left_join(persons::table.on(persons::id.nullable().eq(face_detections::person_id)))
                .filter(face_detections::content_hash.eq(content_hash))
                .filter(face_detections::status.eq("detected"))
                .select((
                    face_detections::id,
                    face_detections::bbox_x,
                    face_detections::bbox_y,
                    face_detections::bbox_w,
                    face_detections::bbox_h,
                    face_detections::confidence,
                    face_detections::source,
                    face_detections::person_id,
                    persons::name.nullable(),
                    face_detections::model_version,
                ))
                .load::<(
                    i32,
                    Option<f32>,
                    Option<f32>,
                    Option<f32>,
                    Option<f32>,
                    Option<f32>,
                    String,
                    Option<i32>,
                    Option<String>,
                    String,
                )>(conn.deref_mut())
                .with_context(|| "list faces for hash")
                .map(|rows| {
                    rows.into_iter()
                        .map(|r| FaceWithPerson {
                            id: r.0,
                            bbox_x: r.1.unwrap_or(0.0),
                            bbox_y: r.2.unwrap_or(0.0),
                            bbox_w: r.3.unwrap_or(0.0),
                            bbox_h: r.4.unwrap_or(0.0),
                            confidence: r.5.unwrap_or(0.0),
                            source: r.6,
                            person_id: r.7,
                            person_name: r.8,
                            model_version: r.9,
                        })
                        .collect()
                })
        })
    }

    fn list_for_person(
        &mut self,
        ctx: &opentelemetry::Context,
        person_id: i32,
        library_id: Option<i32>,
    ) -> anyhow::Result<Vec<FaceWithPath>> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "faces_for_person", |span| {
            span.set_attribute(KeyValue::new("person_id", person_id as i64));
            let mut query = face_detections::table
                .filter(face_detections::person_id.eq(person_id))
                .filter(face_detections::status.eq("detected"))
                .into_boxed();
            if let Some(lib) = library_id {
                query = query.filter(face_detections::library_id.eq(lib));
            }
            query
                .select((
                    face_detections::id,
                    face_detections::library_id,
                    face_detections::rel_path,
                    face_detections::bbox_x,
                    face_detections::bbox_y,
                    face_detections::bbox_w,
                    face_detections::bbox_h,
                    face_detections::confidence,
                    face_detections::person_id,
                    face_detections::model_version,
                ))
                .load::<(
                    i32,
                    i32,
                    String,
                    Option<f32>,
                    Option<f32>,
                    Option<f32>,
                    Option<f32>,
                    Option<f32>,
                    Option<i32>,
                    String,
                )>(conn.deref_mut())
                .with_context(|| "list faces for person")
                .map(|rows| {
                    rows.into_iter()
                        .map(|r| FaceWithPath {
                            id: r.0,
                            library_id: r.1,
                            rel_path: r.2,
                            bbox_x: r.3.unwrap_or(0.0),
                            bbox_y: r.4.unwrap_or(0.0),
                            bbox_w: r.5.unwrap_or(0.0),
                            bbox_h: r.6.unwrap_or(0.0),
                            confidence: r.7.unwrap_or(0.0),
                            person_id: r.8,
                            model_version: r.9,
                        })
                        .collect()
                })
        })
    }

    fn list_embeddings(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: Option<i32>,
        unassigned: bool,
        limit: i64,
        offset: i64,
    ) -> anyhow::Result<Vec<(FaceDetectionRow, String)>> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "list_embeddings", |span| {
            span.set_attribute(KeyValue::new("limit", limit));
            span.set_attribute(KeyValue::new("offset", offset));
            let mut query = face_detections::table
                .filter(face_detections::status.eq("detected"))
                .into_boxed();
            if let Some(lib) = library_id {
                query = query.filter(face_detections::library_id.eq(lib));
            }
            if unassigned {
                query = query.filter(face_detections::person_id.is_null());
            }
            let rows = query
                .order(face_detections::id.asc())
                .limit(limit)
                .offset(offset)
                .load::<FaceDetectionRow>(conn.deref_mut())
                .with_context(|| "list embeddings")?;
            // Pair with the base64-encoded embedding string so the handler
            // doesn't need to know the wire format. Skip rows with NULL
            // embedding (shouldn't happen on detected rows, but defensive).
            // `embedding.take()` moves the bytes out of the row so we can
            // hand the (now-empty-embedding) row plus the encoded string
            // back to the caller without cloning the whole row — at 20k
            // rows × 2 KB that clone was 40 MB of pointless heap traffic
            // per cluster-suggest run.
            use base64::Engine;
            Ok(rows
                .into_iter()
                .filter_map(|mut r| {
                    let bytes = r.embedding.take()?;
                    let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
                    Some((r, b64))
                })
                .collect())
        })
    }

    fn get_face(
        &mut self,
        ctx: &opentelemetry::Context,
        id: i32,
    ) -> anyhow::Result<Option<FaceDetectionRow>> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "get_face", |span| {
            span.set_attribute(KeyValue::new("id", id as i64));
            face_detections::table
                .find(id)
                .first::<FaceDetectionRow>(conn.deref_mut())
                .optional()
                .with_context(|| "get_face")
        })
    }

    fn update_face(
        &mut self,
        ctx: &opentelemetry::Context,
        id: i32,
        person_id: Option<Option<i32>>,
        bbox: Option<(f32, f32, f32, f32)>,
        embedding: Option<Vec<u8>>,
    ) -> anyhow::Result<FaceDetectionRow> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "update", "update_face", |span| {
            span.set_attribute(KeyValue::new("id", id as i64));
            // Apply patches one at a time so each set() has the right type.
            // Diesel's update DSL is type-driven and combining heterogeneous
            // optional sets in one statement is awkward.
            if let Some(pid) = person_id {
                diesel::update(face_detections::table.find(id))
                    .set(face_detections::person_id.eq(pid))
                    .execute(conn.deref_mut())
                    .with_context(|| "update person_id")?;
            }
            if let Some((x, y, w, h)) = bbox {
                diesel::update(face_detections::table.find(id))
                    .set((
                        face_detections::bbox_x.eq(x),
                        face_detections::bbox_y.eq(y),
                        face_detections::bbox_w.eq(w),
                        face_detections::bbox_h.eq(h),
                    ))
                    .execute(conn.deref_mut())
                    .with_context(|| "update bbox")?;
            }
            if let Some(emb) = embedding {
                diesel::update(face_detections::table.find(id))
                    .set(face_detections::embedding.eq(emb))
                    .execute(conn.deref_mut())
                    .with_context(|| "update embedding")?;
            }
            face_detections::table
                .find(id)
                .first::<FaceDetectionRow>(conn.deref_mut())
                .with_context(|| "fetch updated face")
        })
    }

    fn delete_face(&mut self, ctx: &opentelemetry::Context, id: i32) -> anyhow::Result<bool> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "delete", "delete_face", |span| {
            span.set_attribute(KeyValue::new("id", id as i64));
            let n = diesel::delete(face_detections::table.find(id))
                .execute(conn.deref_mut())
                .with_context(|| "delete face")?;
            Ok(n > 0)
        })
    }

    fn delete_auto_for_hash(
        &mut self,
        ctx: &opentelemetry::Context,
        content_hash: &str,
    ) -> anyhow::Result<usize> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "delete", "delete_auto_for_hash", |span| {
            span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
            diesel::delete(
                face_detections::table
                    .filter(face_detections::content_hash.eq(content_hash))
                    .filter(face_detections::source.eq("auto")),
            )
            .execute(conn.deref_mut())
            .with_context(|| "delete auto rows")
        })
    }

    fn stats(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: Option<i32>,
    ) -> anyhow::Result<FaceStats> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "face_stats", |span| {
            if let Some(lib) = library_id {
                span.set_attribute(KeyValue::new("library_id", lib as i64));
            }
            // Count distinct content_hashes per status by status — one
            // hash can have many rows (multiple detected faces) but we
            // want it counted once.
            let scanned: i64 = {
                let mut q = face_detections::table.into_boxed();
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
                    .first(conn.deref_mut())
                    .with_context(|| "stats: scanned")?
            };
            let with_faces: i64 = {
                let mut q = face_detections::table
                    .filter(face_detections::status.eq("detected"))
                    .into_boxed();
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
                    .first(conn.deref_mut())
                    .with_context(|| "stats: with_faces")?
            };
            let no_faces: i64 = {
                let mut q = face_detections::table
                    .filter(face_detections::status.eq("no_faces"))
                    .into_boxed();
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
                    .first(conn.deref_mut())
                    .with_context(|| "stats: no_faces")?
            };
            let failed: i64 = {
                let mut q = face_detections::table
                    .filter(face_detections::status.eq("failed"))
                    .into_boxed();
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
                    .first(conn.deref_mut())
                    .with_context(|| "stats: failed")?
            };
            // Image-extension filter mirrors `list_unscanned_candidates` so
            // SCANNED can actually reach 100%: videos sit in `image_exif` but
            // never get a `face_detections` row, so counting them here
            // permanently caps the percentage below 100%.
            //
            // Count DISTINCT content_hash (not rows) so the numerator
            // (`scanned`, also distinct-content_hash) and denominator live
            // in the same domain. Without this, a file present at multiple
            // rel_paths or across libraries inflates total_photos by one
            // per duplicate row while face_detections — keyed on
            // content_hash — counts the bytes once, leaving a permanent
            // gap (e.g. 1101/1103 with nothing actually pending). Rows
            // with NULL content_hash are excluded; they're held in the
            // hash-backfill backlog and counting them would pin the bar
            // below 100% for the duration of that backfill.
            let total_photos: i64 = {
                let ext_predicate = image_path_predicate("rel_path");
                let row: CountRow = if let Some(lib) = library_id {
                    let sql = format!(
                        "SELECT COUNT(DISTINCT content_hash) AS count FROM image_exif \
                         WHERE library_id = ? AND content_hash IS NOT NULL AND {ext_predicate}"
                    );
                    diesel::sql_query(sql)
                        .bind::<diesel::sql_types::Integer, _>(lib)
                        .get_result(conn.deref_mut())
                        .with_context(|| "stats: total_photos")?
                } else {
                    let sql = format!(
                        "SELECT COUNT(DISTINCT content_hash) AS count FROM image_exif \
                         WHERE content_hash IS NOT NULL AND {ext_predicate}"
                    );
                    diesel::sql_query(sql)
                        .get_result(conn.deref_mut())
                        .with_context(|| "stats: total_photos")?
                };
                row.count
            };
            let persons_count: i64 = persons::table
                .select(diesel::dsl::count_star())
                .first(conn.deref_mut())
                .with_context(|| "stats: persons")?;
            let unassigned_faces: i64 = {
                let mut q = face_detections::table
                    .filter(face_detections::status.eq("detected"))
                    .filter(face_detections::person_id.is_null())
                    .into_boxed();
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
                q.select(diesel::dsl::count_star())
                    .first(conn.deref_mut())
                    .with_context(|| "stats: unassigned")?
            };

            Ok(FaceStats {
                library_id,
                total_photos,
                scanned,
                with_faces,
                no_faces,
                failed,
                persons_count,
                unassigned_faces,
            })
        })
    }

    fn create_person(
        &mut self,
        ctx: &opentelemetry::Context,
        req: &CreatePersonReq,
        from_tag: bool,
    ) -> anyhow::Result<Person> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "insert", "create_person", |span| {
            span.set_attribute(KeyValue::new("name", req.name.clone()));
            let now = Utc::now().timestamp();
            let insert = InsertPerson {
                name: req.name.clone(),
                notes: req.notes.clone(),
                created_from_tag: from_tag,
                is_ignored: req.is_ignored,
                created_at: now,
                updated_at: now,
            };
            diesel::insert_into(persons::table)
                .values(&insert)
                .execute(conn.deref_mut())
                .with_context(|| format!("insert person {}", req.name))?;
            define_sql_function! { fn last_insert_rowid() -> diesel::sql_types::Integer; }
            let id = diesel::select(last_insert_rowid())
                .get_result::<i32>(conn.deref_mut())
                .with_context(|| "last_insert_rowid persons")?;
            // Optional entity bridge — do this as a follow-up update so
            // schema's UNIQUE(name COLLATE NOCASE) can fire on insert
            // before we touch entity_id.
            if let Some(entity_id) = req.entity_id {
                diesel::update(persons::table.find(id))
                    .set(persons::entity_id.eq(entity_id))
                    .execute(conn.deref_mut())
                    .with_context(|| "set entity_id on new person")?;
            }
            persons::table
                .find(id)
                .first::<Person>(conn.deref_mut())
                .with_context(|| "fetch new person")
        })
    }

    fn get_or_create_ignored_person(
        &mut self,
        ctx: &opentelemetry::Context,
    ) -> anyhow::Result<Person> {
        // Fast path: there's already an is_ignored row → return it.
        // Slow path on first use: create one with a stable display name
        // ("Ignored"). Race-safe because the UNIQUE(name COLLATE NOCASE)
        // index forces only one ever to exist (we trip and look up).
        {
            let mut conn = self.connection.lock().expect("face dao lock");
            if let Some(p) = persons::table
                .filter(persons::is_ignored.eq(true))
                .order(persons::id.asc())
                .first::<Person>(conn.deref_mut())
                .optional()
                .with_context(|| "lookup ignored person")?
            {
                return Ok(p);
            }
        }
        // Drop the lock before delegating to create_person — that
        // method takes its own lock.
        match self.create_person(
            ctx,
            &CreatePersonReq {
                name: "Ignored".to_string(),
                notes: Some(
                    "Bucket for strangers, false detections, and faces \
                     you don't want bound to a real person."
                        .to_string(),
                ),
                entity_id: None,
                is_ignored: true,
            },
            /*from_tag*/ false,
        ) {
            Ok(p) => Ok(p),
            Err(e) if is_unique_violation(&e) => {
                // Race: someone else created the row. Re-read.
                let mut conn = self.connection.lock().expect("face dao lock");
                persons::table
                    .filter(persons::is_ignored.eq(true))
                    .order(persons::id.asc())
                    .first::<Person>(conn.deref_mut())
                    .with_context(|| "load ignored person after race")
            }
            Err(e) => Err(e),
        }
    }

    fn get_person(
        &mut self,
        ctx: &opentelemetry::Context,
        id: i32,
    ) -> anyhow::Result<Option<Person>> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "get_person", |span| {
            span.set_attribute(KeyValue::new("id", id as i64));
            persons::table
                .find(id)
                .first::<Person>(conn.deref_mut())
                .optional()
                .with_context(|| "get_person")
        })
    }

    fn list_persons(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: Option<i32>,
        include_ignored: bool,
    ) -> anyhow::Result<Vec<PersonSummary>> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "list_persons", |_| {
            // Two-step: load all persons, then a single grouped count
            // query for face counts. Using a LEFT JOIN + GROUP BY in
            // Diesel here gets noisy with the optional library filter; a
            // second roundtrip is cheap and clearer.
            let mut person_query = persons::table.into_boxed();
            if !include_ignored {
                // Default — hide the IGNORE/junk bucket from the list.
                // The frontend asks include_ignored=true explicitly when
                // it needs to surface ignored persons (e.g. a "show
                // ignored" toggle in the management UI).
                person_query = person_query.filter(persons::is_ignored.eq(false));
            }
            let person_rows: Vec<Person> = person_query
                .order(persons::name.asc())
                .load::<Person>(conn.deref_mut())
                .with_context(|| "load persons")?;

            // Diesel's BoxedSelectStatement + group_by trips the trait
            // resolver into recursion, so this aggregation goes through
            // sql_query. The shape is small and the bind list is at most
            // one parameter — readability isn't really worse than the DSL.
            let counts: Vec<(i32, i64)> = {
                use diesel::sql_types::*;
                #[derive(QueryableByName)]
                struct PersonCountRow {
                    #[diesel(sql_type = Integer)]
                    person_id: i32,
                    #[diesel(sql_type = BigInt)]
                    count: i64,
                }
                let sql = if library_id.is_some() {
                    "SELECT person_id, COUNT(*) AS count FROM face_detections \
                     WHERE status='detected' AND person_id IS NOT NULL AND library_id = ? \
                     GROUP BY person_id"
                } else {
                    "SELECT person_id, COUNT(*) AS count FROM face_detections \
                     WHERE status='detected' AND person_id IS NOT NULL \
                     GROUP BY person_id"
                };
                let mut q = diesel::sql_query(sql).into_boxed();
                if let Some(lib) = library_id {
                    q = q.bind::<Integer, _>(lib);
                }
                q.load::<PersonCountRow>(conn.deref_mut())
                    .with_context(|| "person face counts")?
                    .into_iter()
                    .map(|r| (r.person_id, r.count))
                    .collect()
            };
            use std::collections::HashMap;
            let count_map: HashMap<i32, i64> = counts.into_iter().collect();

            Ok(person_rows
                .into_iter()
                .map(|p| {
                    let face_count = count_map.get(&p.id).copied().unwrap_or(0);
                    PersonSummary {
                        id: p.id,
                        name: p.name,
                        cover_face_id: p.cover_face_id,
                        entity_id: p.entity_id,
                        created_from_tag: p.created_from_tag,
                        notes: p.notes,
                        is_ignored: p.is_ignored,
                        face_count,
                    }
                })
                .collect())
        })
    }

    fn update_person(
        &mut self,
        ctx: &opentelemetry::Context,
        id: i32,
        patch: &UpdatePersonReq,
    ) -> anyhow::Result<Person> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "update", "update_person", |span| {
            span.set_attribute(KeyValue::new("id", id as i64));
            let now = Utc::now().timestamp();
            // Apply each patched column individually for the same
            // reason as update_face — heterogeneous optional sets are
            // painful in Diesel's type-driven update DSL.
            if let Some(name) = &patch.name {
                diesel::update(persons::table.find(id))
                    .set((persons::name.eq(name), persons::updated_at.eq(now)))
                    .execute(conn.deref_mut())
                    .with_context(|| "update person name")?;
            }
            if let Some(notes) = &patch.notes {
                diesel::update(persons::table.find(id))
                    .set((persons::notes.eq(notes), persons::updated_at.eq(now)))
                    .execute(conn.deref_mut())
                    .with_context(|| "update person notes")?;
            }
            if let Some(cover) = patch.cover_face_id {
                diesel::update(persons::table.find(id))
                    .set((
                        persons::cover_face_id.eq(cover),
                        persons::updated_at.eq(now),
                    ))
                    .execute(conn.deref_mut())
                    .with_context(|| "update person cover")?;
            }
            if let Some(eid) = patch.entity_id {
                diesel::update(persons::table.find(id))
                    .set((persons::entity_id.eq(eid), persons::updated_at.eq(now)))
                    .execute(conn.deref_mut())
                    .with_context(|| "update person entity_id")?;
            }
            if let Some(flag) = patch.is_ignored {
                diesel::update(persons::table.find(id))
                    .set((persons::is_ignored.eq(flag), persons::updated_at.eq(now)))
                    .execute(conn.deref_mut())
                    .with_context(|| "update person is_ignored")?;
            }
            persons::table
                .find(id)
                .first::<Person>(conn.deref_mut())
                .with_context(|| "fetch updated person")
        })
    }

    fn delete_person(
        &mut self,
        ctx: &opentelemetry::Context,
        id: i32,
        cascade_delete_faces: bool,
    ) -> anyhow::Result<bool> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "delete", "delete_person", |span| {
            span.set_attribute(KeyValue::new("id", id as i64));
            span.set_attribute(KeyValue::new("cascade", cascade_delete_faces));
            if cascade_delete_faces {
                diesel::delete(face_detections::table.filter(face_detections::person_id.eq(id)))
                    .execute(conn.deref_mut())
                    .with_context(|| "cascade delete faces for person")?;
            }
            // Always clear cover_face_id pointers that referenced this
            // person's faces (otherwise the FK from persons.cover_face_id
            // could hang). cover_face_id has no FK constraint in SQLite
            // so this is documentation-only — the explicit nuke is on
            // the face rows above.
            let n = diesel::delete(persons::table.find(id))
                .execute(conn.deref_mut())
                .with_context(|| "delete person")?;
            Ok(n > 0)
        })
    }

    fn merge_persons(
        &mut self,
        ctx: &opentelemetry::Context,
        src: i32,
        into: i32,
    ) -> anyhow::Result<Person> {
        if src == into {
            anyhow::bail!("cannot merge a person into itself");
        }
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "update", "merge_persons", |span| {
            span.set_attribute(KeyValue::new("src", src as i64));
            span.set_attribute(KeyValue::new("into", into as i64));
            // Wrap in a transaction so a half-merged state can't survive
            // a SQLite write error mid-operation.
            conn.deref_mut().transaction::<_, anyhow::Error, _>(|tx| {
                // Re-point face_detections.
                diesel::update(face_detections::table.filter(face_detections::person_id.eq(src)))
                    .set(face_detections::person_id.eq(into))
                    .execute(tx)
                    .with_context(|| "repoint faces on merge")?;
                // Copy notes from src into target if the target is empty.
                let src_person: Person = persons::table
                    .find(src)
                    .first(tx)
                    .with_context(|| "load src person for merge")?;
                let into_person: Person = persons::table
                    .find(into)
                    .first(tx)
                    .with_context(|| "load target person for merge")?;
                if into_person.notes.as_deref().unwrap_or("").is_empty()
                    && src_person
                        .notes
                        .as_deref()
                        .map(|s| !s.is_empty())
                        .unwrap_or(false)
                {
                    diesel::update(persons::table.find(into))
                        .set(persons::notes.eq(src_person.notes))
                        .execute(tx)
                        .with_context(|| "copy notes on merge")?;
                }
                diesel::delete(persons::table.find(src))
                    .execute(tx)
                    .with_context(|| "delete src person on merge")?;
                persons::table
                    .find(into)
                    .first::<Person>(tx)
                    .with_context(|| "fetch merged person")
            })
        })
    }

    fn resolve_content_hash(
        &mut self,
        ctx: &opentelemetry::Context,
        library_id: i32,
        rel_path: &str,
    ) -> anyhow::Result<Option<String>> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "resolve_content_hash", |_| {
            image_exif::table
                .filter(image_exif::library_id.eq(library_id))
                .filter(image_exif::rel_path.eq(rel_path))
                .select(image_exif::content_hash)
                .first::<Option<String>>(conn.deref_mut())
                .optional()
                .map(|outer| outer.and_then(|inner| inner))
                .with_context(|| "resolve content_hash")
        })
    }

    fn find_persons_by_names_ci(
        &mut self,
        ctx: &opentelemetry::Context,
        names: &[String],
    ) -> anyhow::Result<std::collections::HashMap<String, i32>> {
        if names.is_empty() {
            return Ok(std::collections::HashMap::new());
        }
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "find_persons_by_names_ci", |span| {
            span.set_attribute(KeyValue::new("count", names.len() as i64));
            // Lowercase comparison both sides. Use sql_query to keep the
            // bind list dynamic without fighting Diesel's type system on
            // the LOWER() function.
            use diesel::sql_types::*;
            let placeholders = std::iter::repeat_n("?", names.len())
                .collect::<Vec<_>>()
                .join(",");
            // Filter out is_ignored persons so the auto-bind path can
            // never target the IGNORE/junk bucket — even if a tag name
            // happens to match it (e.g. someone tags photos as "Ignored"
            // by hand). Ignore-bucket assignment is an explicit operator
            // action through the dedicated endpoint, never a heuristic.
            let sql = format!(
                "SELECT id, LOWER(name) AS lower_name FROM persons \
                 WHERE is_ignored = 0 AND LOWER(name) IN ({}) \
                 ORDER BY id ASC",
                placeholders
            );
            #[derive(QueryableByName)]
            struct Row {
                #[diesel(sql_type = Integer)]
                id: i32,
                #[diesel(sql_type = Text)]
                lower_name: String,
            }
            let mut q = diesel::sql_query(sql).into_boxed();
            for n in names {
                q = q.bind::<Text, _>(n.to_lowercase());
            }
            let rows = q
                .load::<Row>(conn.deref_mut())
                .with_context(|| "find_persons_by_names_ci")?;
            // Lowest id wins on collision (UNIQUE COLLATE NOCASE on the
            // table prevents that today, but the deduplication is a
            // defensive belt-and-braces).
            let mut out = std::collections::HashMap::with_capacity(rows.len());
            for r in rows {
                out.entry(r.lower_name).or_insert(r.id);
            }
            Ok(out)
        })
    }

    fn person_reference_embedding(
        &mut self,
        ctx: &opentelemetry::Context,
        person_id: i32,
        model_version: &str,
    ) -> anyhow::Result<Option<Vec<f32>>> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "query", "person_reference_embedding", |span| {
            span.set_attribute(KeyValue::new("person_id", person_id as i64));
            span.set_attribute(KeyValue::new("model_version", model_version.to_string()));
            // Pull only the embedding bytes; we average them in Rust. A
            // SQL aggregate over 512-d vectors isn't meaningfully faster
            // and would tie us to a specific embedding length.
            let blobs: Vec<Option<Vec<u8>>> = face_detections::table
                .filter(face_detections::person_id.eq(person_id))
                .filter(face_detections::status.eq("detected"))
                .filter(face_detections::model_version.eq(model_version))
                .select(face_detections::embedding)
                .load(conn.deref_mut())
                .with_context(|| "load person embeddings")?;
            let vectors: Vec<Vec<f32>> = blobs
                .into_iter()
                .filter_map(|b| b.and_then(|bytes| decode_embedding_bytes(&bytes)))
                .collect();
            if vectors.is_empty() {
                return Ok(None);
            }
            Ok(Some(mean_normalized(&vectors)))
        })
    }

    fn assign_face_to_person(
        &mut self,
        ctx: &opentelemetry::Context,
        face_id: i32,
        person_id: i32,
    ) -> anyhow::Result<()> {
        let mut conn = self.connection.lock().expect("face dao lock");
        trace_db_call(ctx, "update", "assign_face_to_person", |span| {
            span.set_attribute(KeyValue::new("face_id", face_id as i64));
            span.set_attribute(KeyValue::new("person_id", person_id as i64));
            conn.deref_mut().transaction::<_, anyhow::Error, _>(|tx| {
                diesel::update(face_detections::table.find(face_id))
                    .set(face_detections::person_id.eq(person_id))
                    .execute(tx)
                    .with_context(|| "set face person_id")?;
                // If this person has no cover yet, claim this face.
                // Don't overwrite an existing cover — the user may have
                // hand-picked one in the UI.
                let cover: Option<i32> = persons::table
                    .find(person_id)
                    .select(persons::cover_face_id)
                    .first::<Option<i32>>(tx)
                    .with_context(|| "load person cover")?;
                if cover.is_none() {
                    diesel::update(persons::table.find(person_id))
                        .set(persons::cover_face_id.eq(face_id))
                        .execute(tx)
                        .with_context(|| "set cover_face_id")?;
                }
                Ok(())
            })
        })
    }
}

// ── Embedding helpers ───────────────────────────────────────────────────────

/// Decode a 2048-byte little-endian f32 BLOB into a Vec<f32> of length 512.
/// Returns None on malformed input rather than erroring — the caller treats
/// "no usable embedding" the same as "no embedding at all" (skip averaging).
pub(crate) fn decode_embedding_bytes(bytes: &[u8]) -> Option<Vec<f32>> {
    if bytes.len() != 2048 {
        return None;
    }
    let mut out = Vec::with_capacity(512);
    for chunk in bytes.chunks_exact(4) {
        out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
    }
    Some(out)
}

/// Mean of L2-normalized vectors, then re-normalize. ArcFace embeddings
/// from insightface are already L2-normalized, so re-normalizing the
/// average is a one-step "average direction" operation.
fn mean_normalized(vectors: &[Vec<f32>]) -> Vec<f32> {
    debug_assert!(
        !vectors.is_empty(),
        "mean_normalized requires non-empty input"
    );
    let dim = vectors[0].len();
    let mut acc = vec![0.0f32; dim];
    for v in vectors {
        debug_assert_eq!(v.len(), dim, "mismatched embedding dim");
        for (i, x) in v.iter().enumerate() {
            acc[i] += *x;
        }
    }
    let n = vectors.len() as f32;
    for x in &mut acc {
        *x /= n;
    }
    let norm = acc.iter().map(|x| x * x).sum::<f32>().sqrt();
    if norm > 0.0 {
        for x in &mut acc {
            *x /= norm;
        }
    }
    acc
}

/// Cosine similarity of two embeddings. Both must be the same length;
/// neither needs to be pre-normalized. Returns 0.0 on length mismatch
/// or zero-magnitude input rather than NaN — the auto-bind path
/// interprets 0.0 as "no useful similarity, leave unassigned".
pub(crate) fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
    if a.len() != b.len() || a.is_empty() {
        return 0.0;
    }
    let mut dot = 0.0f32;
    let mut na = 0.0f32;
    let mut nb = 0.0f32;
    for (x, y) in a.iter().zip(b.iter()) {
        dot += x * y;
        na += x * x;
        nb += y * y;
    }
    let denom = na.sqrt() * nb.sqrt();
    if denom <= 0.0 { 0.0 } else { dot / denom }
}

// ── Handlers ────────────────────────────────────────────────────────────────

pub fn add_face_services<T, D: FaceDao + 'static>(app: App<T>) -> App<T>
where
    T: ServiceFactory<ServiceRequest, Config = (), Error = actix_web::Error, InitError = ()>,
{
    app.service(web::resource("/faces/stats").route(web::get().to(stats_handler::<D>)))
        .service(web::resource("/faces/embeddings").route(web::get().to(embeddings_handler::<D>)))
        .service(
            web::resource("/image/faces")
                .route(web::get().to(list_faces_handler::<D>))
                .route(web::post().to(create_face_handler::<D>)),
        )
        .service(
            web::resource("/image/faces/{id}")
                .route(web::patch().to(update_face_handler::<D>))
                .route(web::delete().to(delete_face_handler::<D>)),
        )
        .service(
            web::resource("/persons")
                .route(web::get().to(list_persons_handler::<D>))
                .route(web::post().to(create_person_handler::<D>)),
        )
        .service(
            web::resource("/persons/bootstrap")
                .route(web::post().to(bootstrap_persons_handler::<D>)),
        )
        .service(
            web::resource("/persons/ignore-bucket")
                .route(web::post().to(ignore_bucket_handler::<D>)),
        )
        .service(
            web::resource("/tags/people-bootstrap-candidates")
                .route(web::get().to(bootstrap_candidates_handler::<D>)),
        )
        .service(
            web::resource("/persons/{id}")
                .route(web::get().to(get_person_handler::<D>))
                .route(web::patch().to(update_person_handler::<D>))
                .route(web::delete().to(delete_person_handler::<D>)),
        )
        .service(
            web::resource("/persons/{id}/merge").route(web::post().to(merge_persons_handler::<D>)),
        )
        .service(
            web::resource("/persons/{id}/faces").route(web::get().to(person_faces_handler::<D>)),
        )
}

// ── Bootstrap (Phase 4) ─────────────────────────────────────────────────────

#[derive(Serialize, Debug, Clone)]
pub struct BootstrapCandidate {
    /// Display name — most-frequent capitalization across the case-insensitive
    /// group, or simply the first one seen if it's a tie.
    pub name: String,
    /// Lowercased name; the stable key for grouping and the auto-bind path.
    pub normalized_name: String,
    /// Sum of `tagged_photo` counts across all capitalizations of this name.
    pub usage_count: i64,
    /// Heuristic suggestion; the UI defaults this to checked but the user
    /// confirms before [`bootstrap_persons_handler`] actually creates rows.
    pub looks_like_person: bool,
    /// True when a `persons` row already exists for this name (any case).
    /// The UI hides these — re-running bootstrap is idempotent so it's fine
    /// either way, but the noise isn't worth showing.
    pub already_exists: bool,
}

#[derive(Serialize, Debug)]
pub struct BootstrapCandidatesResponse {
    pub candidates: Vec<BootstrapCandidate>,
}

#[derive(Deserialize, Debug)]
pub struct BootstrapPersonsReq {
    pub names: Vec<String>,
}

#[derive(Serialize, Debug)]
pub struct BootstrapPersonsResponse {
    pub created: Vec<Person>,
    pub skipped: Vec<BootstrapSkipped>,
}

#[derive(Serialize, Debug)]
pub struct BootstrapSkipped {
    pub name: String,
    pub reason: String,
}

/// Hard filter for the bootstrap candidate list. Returns true if the tag
/// could plausibly be a person name; returns false to drop it from the
/// candidates entirely (not just leave looks_like_person=false).
///
/// Rules — all required:
/// - At least 3 characters after trimming. Two-letter tags ("AB", "OK")
///   are almost always abbreviations or markers, not names.
/// - No emoji or symbol-class characters. SQL-side string sort already
///   surfaces those at the top of the tag list; filtering them keeps
///   the candidate UI focused on names rather than chart-junk.
/// - No control characters or null bytes.
pub(crate) fn is_plausible_name_token(raw: &str) -> bool {
    let trimmed = raw.trim();
    if trimmed.chars().count() < 3 {
        return false;
    }
    for c in trimmed.chars() {
        // Letter / mark / decimal-digit / connector-punctuation /
        // dash / apostrophe / period / whitespace are all plausible in a
        // name. Anything else (emoji, symbols, math operators, arrows,
        // box drawing, control codes) disqualifies the whole tag.
        if c.is_alphabetic()
            || c.is_whitespace()
            || matches!(c, '\'' | '-' | '.' | '_' | '\u{2019}')
        {
            continue;
        }
        if c.is_ascii_digit() {
            // Digits don't disqualify here — `looks_like_person` rejects
            // them later, but `is_plausible_name_token` is just about
            // "could this be in the candidate list at all?". A tag like
            // "Sarah2" stays as a candidate (display-flagged not-a-person
            // by looks_like_person) so the operator can still spot and
            // confirm it manually if it's an alias.
            continue;
        }
        return false;
    }
    true
}

/// Conservative "this tag *might* be a person name" heuristic. False
/// negatives are fine — the operator confirms in the UI before any row
/// is created. False positives are also fine for the same reason; the
/// goal is just to default sensible candidates to checked.
///
/// Rules:
/// - 1–2 whitespace-separated words
/// - Each word starts with an uppercase character
/// - No digits anywhere (rejects "Trip 2018", "2024", etc.)
/// - Single-word names not on a small denylist of common non-person
///   tags (cat, christmas, beach, ...). Two-word names skip the
///   denylist because a real two-word person name is the dominant
///   case ("Sarah Smith") and false-blocking it is worse than false-
///   accepting "Sunset Walk".
pub(crate) fn looks_like_person(raw: &str) -> bool {
    let trimmed = raw.trim();
    if trimmed.is_empty() {
        return false;
    }
    let words: Vec<&str> = trimmed.split_whitespace().collect();
    if !(1..=2).contains(&words.len()) {
        return false;
    }
    for w in &words {
        let Some(first) = w.chars().next() else {
            return false;
        };
        if !first.is_uppercase() {
            return false;
        }
        if w.chars().any(|c| c.is_ascii_digit()) {
            return false;
        }
    }
    if words.len() == 1 {
        const DENY: &[&str] = &[
            // Pets / animals
            "cat",
            "dog",
            "kitten",
            "puppy",
            "bird",
            "fish",
            "pet",
            "pets",
            // Events / occasions
            "birthday",
            "christmas",
            "halloween",
            "easter",
            "thanksgiving",
            "wedding",
            "anniversary",
            "vacation",
            "holiday",
            "party",
            "trip",
            "graduation",
            "concert",
            // Places (generic)
            "home",
            "work",
            "beach",
            "park",
            "hotel",
            "restaurant",
            "office",
            "house",
            "garden",
            // Subjects / styles
            "food",
            "sunset",
            "sunrise",
            "landscape",
            "portrait",
            "selfie",
            "nature",
            "flowers",
            "flower",
            "snow",
            "rain",
            "sky",
            // Buckets
            "untagged",
            "favorites",
            "favourites",
            "misc",
            "other",
            "random",
        ];
        let lower = trimmed.to_lowercase();
        if DENY.iter().any(|w| *w == lower) {
            return false;
        }
    }
    true
}

async fn bootstrap_candidates_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    face_dao: web::Data<Mutex<D>>,
    tag_dao: web::Data<Mutex<crate::tags::SqliteTagDao>>,
) -> impl Responder {
    use std::collections::HashMap;
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("faces.bootstrap_candidates", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

    // All tags + their counts. Path filter unused — bootstrap is library-wide.
    let tags_with_counts = {
        let mut td = tag_dao.lock().expect("tag dao lock");
        match crate::tags::TagDao::get_all_tags(&mut *td, &span_context, None) {
            Ok(t) => t,
            Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
        }
    };

    // Group by lowercase name. Pick the most-frequent capitalization
    // for the display name (ties broken by first-seen). Filter out
    // short tags and tags carrying non-name characters (emojis, symbols)
    // before grouping — they're noise no operator would tick, so showing
    // them just makes the candidate list harder to scan.
    struct Group {
        display: String,
        display_freq: i64,
        total_count: i64,
    }
    let mut groups: HashMap<String, Group> = HashMap::new();
    for (count, tag) in tags_with_counts {
        if !is_plausible_name_token(&tag.name) {
            continue;
        }
        let lower = tag.name.to_lowercase();
        let g = groups.entry(lower).or_insert_with(|| Group {
            display: tag.name.clone(),
            display_freq: 0,
            total_count: 0,
        });
        g.total_count += count;
        if count > g.display_freq {
            g.display = tag.name.clone();
            g.display_freq = count;
        }
    }

    // Cross-reference against existing persons (bulk one-query lookup).
    let lower_names: Vec<String> = groups.keys().cloned().collect();
    let existing = {
        let mut fd = face_dao.lock().expect("face dao lock");
        match fd.find_persons_by_names_ci(&span_context, &lower_names) {
            Ok(m) => m,
            Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
        }
    };

    let mut candidates: Vec<BootstrapCandidate> = groups
        .into_iter()
        .map(|(lower, g)| BootstrapCandidate {
            looks_like_person: looks_like_person(&g.display),
            already_exists: existing.contains_key(&lower),
            name: g.display,
            normalized_name: lower,
            usage_count: g.total_count,
        })
        .collect();
    // Sort: persons-first heuristic by descending count, then alphabetical.
    // Persons-likely candidates surface near the top so the user doesn't
    // scroll past dozens of "vacation"-style tags to find them.
    candidates.sort_by(|a, b| {
        b.looks_like_person
            .cmp(&a.looks_like_person)
            .then(b.usage_count.cmp(&a.usage_count))
            .then(a.normalized_name.cmp(&b.normalized_name))
    });

    HttpResponse::Ok().json(BootstrapCandidatesResponse { candidates })
}

async fn bootstrap_persons_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    body: web::Json<BootstrapPersonsReq>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("faces.bootstrap_persons", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

    let mut created: Vec<Person> = Vec::new();
    let mut skipped: Vec<BootstrapSkipped> = Vec::new();

    let mut dao = face_dao.lock().expect("face dao lock");

    // Pre-fetch the existing-name set so a duplicate request reports
    // "already exists" (skipped) rather than firing N inserts that all
    // 409 against the UNIQUE COLLATE NOCASE constraint.
    let lower_names: Vec<String> = body.names.iter().map(|n| n.to_lowercase()).collect();
    let existing = match dao.find_persons_by_names_ci(&span_context, &lower_names) {
        Ok(m) => m,
        Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
    };

    for name in &body.names {
        let trimmed = name.trim();
        if trimmed.is_empty() {
            skipped.push(BootstrapSkipped {
                name: name.clone(),
                reason: "empty name".into(),
            });
            continue;
        }
        let lower = trimmed.to_lowercase();
        if existing.contains_key(&lower) {
            skipped.push(BootstrapSkipped {
                name: trimmed.to_string(),
                reason: "person already exists".into(),
            });
            continue;
        }
        match dao.create_person(
            &span_context,
            &CreatePersonReq {
                name: trimmed.to_string(),
                notes: None,
                entity_id: None,
                is_ignored: false,
            },
            /*from_tag*/ true,
        ) {
            Ok(p) => created.push(p),
            Err(e) => {
                if is_unique_violation(&e) {
                    // Race with a concurrent create; treat as skipped.
                    skipped.push(BootstrapSkipped {
                        name: trimmed.to_string(),
                        reason: "person already exists".into(),
                    });
                } else {
                    skipped.push(BootstrapSkipped {
                        name: trimmed.to_string(),
                        reason: format!("{:#}", e),
                    });
                }
            }
        }
    }

    HttpResponse::Ok().json(BootstrapPersonsResponse { created, skipped })
}

// ── Stats / list ────────────────────────────────────────────────────────────

#[derive(Deserialize)]
pub struct LibraryQuery {
    pub library: Option<String>,
}

/// `GET /persons` query: optional library scope, optional include of
/// the IGNORE/junk bucket. The bucket is hidden by default so the
/// management UI shows only "real" persons; the persons-management
/// screen requests it explicitly when it needs to surface ignored.
#[derive(Deserialize)]
pub struct ListPersonsQuery {
    pub library: Option<String>,
    #[serde(default)]
    pub include_ignored: bool,
}

async fn stats_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    app_state: web::Data<AppState>,
    query: web::Query<LibraryQuery>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("faces.stats", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .map(|l| l.id);
    let mut dao = face_dao.lock().expect("face dao lock");
    dao.stats(&span_context, library_id)
        .map(|s| {
            span_context.span().set_status(Status::Ok);
            HttpResponse::Ok().json(s)
        })
        .into_http_internal_err()
}

async fn list_faces_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    query: web::Query<ThumbnailRequest>,
    app_state: web::Data<AppState>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("faces.list", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

    let normalized_path = normalize_path(&query.path);
    // resolve_library_param returns Option<&Library>; clone so the result
    // is owned (matching the primary_library fallback's type).
    let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .cloned()
        .unwrap_or_else(|| app_state.primary_library().clone());

    let mut dao = face_dao.lock().expect("face dao lock");
    let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
        Ok(Some(h)) => h,
        Ok(None) => {
            // Photo not yet hashed — empty face list is a graceful answer.
            // The carousel falls back to "no overlay" which is fine until
            // the watcher catches up.
            return HttpResponse::Ok().json(Vec::<FaceWithPerson>::new());
        }
        Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
    };
    match dao.list_for_content_hash(&span_context, &hash) {
        Ok(faces) => HttpResponse::Ok().json(faces),
        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
    }
}

async fn embeddings_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    query: web::Query<EmbeddingsQuery>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("faces.embeddings", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

    let limit = query.limit.clamp(1, 5_000);
    let offset = query.offset.max(0);
    let mut dao = face_dao.lock().expect("face dao lock");
    dao.list_embeddings(
        &span_context,
        query.library,
        query.unassigned,
        limit,
        offset,
    )
    .map(|rows| {
        let out: Vec<FaceEmbeddingRow> = rows
            .into_iter()
            .map(|(r, b64)| FaceEmbeddingRow {
                id: r.id,
                library_id: r.library_id,
                rel_path: r.rel_path,
                content_hash: r.content_hash,
                person_id: r.person_id,
                model_version: r.model_version,
                embedding: b64,
                bbox_x: r.bbox_x,
                bbox_y: r.bbox_y,
                bbox_w: r.bbox_w,
                bbox_h: r.bbox_h,
            })
            .collect();
        HttpResponse::Ok().json(out)
    })
    .into_http_internal_err()
}

// ── Manual face create / update / delete ────────────────────────────────────

async fn create_face_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    body: web::Json<CreateFaceReq>,
    app_state: web::Data<AppState>,
    face_client: web::Data<FaceClient>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("faces.create_manual", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

    // The force path doesn't need Apollo at all (no embed call); the
    // strict path does. Surface the disabled state only when we'd
    // actually use the client.
    if !body.force && !face_client.is_enabled() {
        return HttpResponse::ServiceUnavailable().body("face client disabled");
    }

    let normalized_path = normalize_path(&body.path);
    let library: Library = match libraries::resolve_library_param(
        &app_state,
        body.library.as_ref().map(|i| i.to_string()).as_deref(),
    ) {
        Ok(Some(lib)) => lib.clone(),
        _ => app_state.primary_library().clone(),
    };

    // 1. Resolve content_hash for the photo.
    let hash = {
        let mut dao = face_dao.lock().expect("face dao lock");
        match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
            Ok(Some(h)) => h,
            Ok(None) => {
                return HttpResponse::Conflict()
                    .body("photo not yet hashed; wait for next watcher pass");
            }
            Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
        }
    };

    // 2 + 3. Crop + embed via Apollo (strict path), or skip both (force).
    //
    // Force is the "tag a face the detector can't see" path — back of
    // head, heavily-occluded profile, etc. We store a zero-vector
    // embedding under a sentinel model_version so the row participates
    // only as a browse-by-person tag: clustering filters norm<=0 (see
    // face_clustering._decode_b64_embedding) and auto-bind cosine
    // resolves to 0 / NaN, never crossing the threshold. Cluster
    // suggester also groups by model_version so this sentinel never
    // mixes with real buffalo_l rows.
    let (embedding_bytes, model_version, confidence) = if body.force {
        info!(
            "manual face (force): skipping detection for {:?} bbox=({},{},{},{})",
            normalized_path, body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h
        );
        (vec![0u8; 2048], "manual_no_embed".to_string(), 0.0_f32)
    } else {
        let abs_path = library.resolve(&normalized_path);
        let crop_bytes = match crop_image_to_bbox(
            &abs_path,
            body.bbox.x,
            body.bbox.y,
            body.bbox.w,
            body.bbox.h,
        ) {
            Ok(b) => b,
            Err(e) => {
                warn!("crop_image_to_bbox failed for {:?}: {:?}", abs_path, e);
                return HttpResponse::BadRequest().body(format!("cannot crop photo: {}", e));
            }
        };

        let meta = DetectMeta {
            content_hash: hash.clone(),
            library_id: library.id,
            rel_path: normalized_path.clone(),
            orientation: None,
            model_version: None,
        };
        let detect = match face_client.embed(crop_bytes, meta).await {
            Ok(r) => r,
            Err(FaceDetectError::Permanent(e)) => {
                return HttpResponse::UnprocessableEntity().body(format!("{}", e));
            }
            Err(FaceDetectError::Transient(e)) => {
                return HttpResponse::ServiceUnavailable().body(format!("{}", e));
            }
            Err(FaceDetectError::Disabled) => {
                return HttpResponse::ServiceUnavailable().body("face client disabled");
            }
        };

        let detected = match detect.faces.first() {
            Some(f) => f.clone(),
            None => {
                // Apollo would have returned 422 on no_face_in_crop; defensive.
                return HttpResponse::UnprocessableEntity().body("no face in crop");
            }
        };
        let bytes = match detected.decode_embedding() {
            Ok(b) => b,
            Err(e) => {
                warn!("manual face: decode embedding failed: {:?}", e);
                return HttpResponse::BadGateway().body("invalid embedding from face service");
            }
        };
        (bytes, detect.model_version, detected.confidence)
    };

    // 4. Insert the manual row using the bbox the user drew (NOT the
    //    detector's tighter box around their drawing — they get what they
    //    asked for; cluster matching uses the embedding which is from the
    //    detector's true box anyway).
    let mut dao = face_dao.lock().expect("face dao lock");
    let row = match dao.store_detection(
        &span_context,
        InsertFaceDetectionInput {
            library_id: library.id,
            content_hash: hash,
            rel_path: normalized_path,
            bbox: Some((body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h)),
            embedding: Some(embedding_bytes),
            confidence: Some(confidence),
            source: "manual".to_string(),
            person_id: body.person_id,
            status: "detected".to_string(),
            model_version,
        },
    ) {
        Ok(r) => r,
        Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
    };
    info!(
        "Created manual face id={} library={} hash={} person_id={:?}",
        row.id, row.library_id, row.content_hash, row.person_id
    );
    match hydrate_face_with_person(&mut *dao, &span_context, row) {
        Ok(joined) => HttpResponse::Created().json(joined),
        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
    }
}

async fn update_face_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    path: web::Path<i32>,
    body: web::Json<UpdateFaceReq>,
    app_state: web::Data<AppState>,
    face_client: web::Data<FaceClient>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("faces.update", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
    let id = path.into_inner();

    let person_patch: Option<Option<i32>> = if body.clear_person {
        Some(None)
    } else {
        body.person_id.map(Some)
    };
    let bbox_patch = body.bbox.as_ref().map(|b| (b.x, b.y, b.w, b.h));

    // Bbox change → re-embed. The embedding is what auto-bind and the
    // cluster suggester key on, so leaving it stale would silently
    // corrupt every downstream similarity match. We crop the new bbox,
    // pass it through face_client.embed, and store the fresh vector.
    // Net cost: one Apollo round-trip per bbox edit (~100-500ms on
    // CPU); acceptable for a manual operator action.
    let mut new_embedding: Option<Vec<u8>> = None;
    if let Some((bx, by, bw, bh)) = bbox_patch {
        if !face_client.is_enabled() {
            warn!(
                "PATCH /image/faces/{}: 503 — face client not enabled \
                 (APOLLO_FACE_API_BASE_URL / APOLLO_API_BASE_URL both unset). \
                 Bbox edit requires Apollo to re-embed.",
                id
            );
            return HttpResponse::ServiceUnavailable()
                .body("face client disabled — bbox edit requires Apollo");
        }
        // Look up the current row so we know which photo to crop.
        let current = {
            let mut dao = face_dao.lock().expect("face dao lock");
            match dao.get_face(&span_context, id) {
                Ok(Some(r)) => r,
                Ok(None) => return HttpResponse::NotFound().finish(),
                Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
            }
        };
        let library = match app_state.library_by_id(current.library_id) {
            Some(l) => l.clone(),
            None => {
                return HttpResponse::InternalServerError().body(format!(
                    "face row references unknown library_id {}",
                    current.library_id
                ));
            }
        };
        let abs_path = library.resolve(&current.rel_path);
        let crop_bytes = match crop_image_to_bbox(&abs_path, bx, by, bw, bh) {
            Ok(b) => b,
            Err(e) => {
                warn!(
                    "PATCH /image/faces/{}: crop failed for {:?}: {:?}",
                    id, abs_path, e
                );
                return HttpResponse::BadRequest().body(format!("cannot crop new bbox: {}", e));
            }
        };
        let meta = DetectMeta {
            content_hash: current.content_hash.clone(),
            library_id: current.library_id,
            rel_path: current.rel_path.clone(),
            orientation: None,
            model_version: Some(current.model_version.clone()),
        };
        // Soft contract on the re-embed: we'd LIKE a fresh ArcFace
        // vector for the new crop, but the operator's bbox edit is
        // sacred. If detection finds no face in the new region (they
        // dragged the box slightly off-center, or moved it to a back-
        // of-head shot they've already manually tagged), or returns a
        // bad embedding, we keep the old embedding and apply the bbox
        // anyway. Cost: stale embedding for that row, which slightly
        // pollutes clustering for files re-detected against this
        // person — accepted because dropping the user's drag is a
        // worse UX. Transient failures (cuda_oom, engine unavailable)
        // still 503 so the operator can retry once Apollo recovers.
        match face_client.embed(crop_bytes, meta).await {
            Ok(resp) => {
                if let Some(face) = resp.faces.first() {
                    match face.decode_embedding() {
                        Ok(b) => new_embedding = Some(b),
                        Err(e) => {
                            warn!(
                                "PATCH /image/faces/{}: bad embedding from face service ({:?}); keeping old embedding, bbox still applied",
                                id, e
                            );
                        }
                    }
                } else {
                    info!(
                        "PATCH /image/faces/{}: no face detected in new bbox — keeping old embedding, bbox still applied",
                        id
                    );
                }
            }
            Err(FaceDetectError::Permanent(e)) => {
                info!(
                    "PATCH /image/faces/{}: embed permanent error ({}); keeping old embedding, bbox still applied",
                    id, e
                );
            }
            Err(FaceDetectError::Transient(e)) => {
                warn!(
                    "PATCH /image/faces/{}: 503 — Apollo face client transient \
                     error during re-embed: {}",
                    id, e
                );
                return HttpResponse::ServiceUnavailable().body(format!("{}", e));
            }
            Err(FaceDetectError::Disabled) => {
                warn!(
                    "PATCH /image/faces/{}: 503 — face client became disabled \
                     mid-flight",
                    id
                );
                return HttpResponse::ServiceUnavailable().body("face client disabled mid-flight");
            }
        }
    }

    let mut dao = face_dao.lock().expect("face dao lock");
    let row = match dao.update_face(&span_context, id, person_patch, bbox_patch, new_embedding) {
        Ok(r) => r,
        Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
    };
    // Hydrate person_name so the response shape matches GET /image/faces
    // — the carousel overlay does an optimistic replace on this row, and
    // a bare FaceDetectionRow with no person_name would visibly drop the
    // VFD label off the bbox even though the assignment didn't change.
    match hydrate_face_with_person(&mut *dao, &span_context, row) {
        Ok(joined) => HttpResponse::Ok().json(joined),
        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
    }
}

async fn delete_face_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    path: web::Path<i32>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("faces.delete", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

    let mut dao = face_dao.lock().expect("face dao lock");
    match dao.delete_face(&span_context, path.into_inner()) {
        Ok(true) => HttpResponse::NoContent().finish(),
        Ok(false) => HttpResponse::NotFound().finish(),
        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
    }
}

// ── Persons ─────────────────────────────────────────────────────────────────

async fn list_persons_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    app_state: web::Data<AppState>,
    query: web::Query<ListPersonsQuery>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("persons.list", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .map(|l| l.id);
    let mut dao = face_dao.lock().expect("face dao lock");
    dao.list_persons(&span_context, library_id, query.include_ignored)
        .map(|p| HttpResponse::Ok().json(p))
        .into_http_internal_err()
}

async fn ignore_bucket_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("persons.ignore_bucket", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
    let mut dao = face_dao.lock().expect("face dao lock");
    dao.get_or_create_ignored_person(&span_context)
        .map(|p| HttpResponse::Ok().json(p))
        .into_http_internal_err()
}

async fn create_person_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    body: web::Json<CreatePersonReq>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("persons.create", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
    if body.name.trim().is_empty() {
        return HttpResponse::BadRequest().body("name required");
    }

    let mut dao = face_dao.lock().expect("face dao lock");
    match dao.create_person(&span_context, &body, /*from_tag*/ false) {
        Ok(p) => HttpResponse::Created().json(p),
        Err(e) => {
            // SQLite UNIQUE(name COLLATE NOCASE) → 409 Conflict so the UI
            // can show "name already exists" without parsing. Use {:#} to
            // include the source chain — anyhow's plain Display only shows
            // the outermost context ("insert person ...") which hides the
            // diesel "UNIQUE constraint failed" we're keying on.
            if is_unique_violation(&e) {
                HttpResponse::Conflict().body("person name already exists")
            } else {
                HttpResponse::InternalServerError().body(format!("{:#}", e))
            }
        }
    }
}

async fn get_person_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    path: web::Path<i32>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("persons.get", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

    let mut dao = face_dao.lock().expect("face dao lock");
    match dao.get_person(&span_context, path.into_inner()) {
        Ok(Some(p)) => HttpResponse::Ok().json(p),
        Ok(None) => HttpResponse::NotFound().finish(),
        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
    }
}

async fn update_person_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    path: web::Path<i32>,
    body: web::Json<UpdatePersonReq>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("persons.update", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
    let mut dao = face_dao.lock().expect("face dao lock");
    match dao.update_person(&span_context, path.into_inner(), &body) {
        Ok(p) => HttpResponse::Ok().json(p),
        Err(e) => {
            if is_unique_violation(&e) {
                HttpResponse::Conflict().body("person name already exists")
            } else {
                HttpResponse::InternalServerError().body(format!("{:#}", e))
            }
        }
    }
}

async fn delete_person_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    path: web::Path<i32>,
    query: web::Query<DeletePersonQuery>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("persons.delete", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
    // Default cascade=set_null — don't destroy face history just because
    // the user renamed/removed the identity.
    let cascade = matches!(query.cascade.as_deref(), Some("delete"));
    let mut dao = face_dao.lock().expect("face dao lock");
    match dao.delete_person(&span_context, path.into_inner(), cascade) {
        Ok(true) => HttpResponse::NoContent().finish(),
        Ok(false) => HttpResponse::NotFound().finish(),
        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
    }
}

async fn merge_persons_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    path: web::Path<i32>,
    body: web::Json<MergePersonsReq>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("persons.merge", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
    let src = path.into_inner();
    let mut dao = face_dao.lock().expect("face dao lock");
    match dao.merge_persons(&span_context, src, body.into) {
        Ok(p) => HttpResponse::Ok().json(p),
        Err(e) => {
            let msg = format!("{:#}", e);
            if msg.contains("itself") {
                HttpResponse::BadRequest().body(msg)
            } else {
                HttpResponse::InternalServerError().body(msg)
            }
        }
    }
}

async fn person_faces_handler<D: FaceDao>(
    _: Claims,
    request: HttpRequest,
    path: web::Path<i32>,
    app_state: web::Data<AppState>,
    query: web::Query<LibraryQuery>,
    face_dao: web::Data<Mutex<D>>,
) -> impl Responder {
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("persons.faces", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .map(|l| l.id);
    let mut dao = face_dao.lock().expect("face dao lock");
    dao.list_for_person(&span_context, path.into_inner(), library_id)
        .map(|faces| HttpResponse::Ok().json(faces))
        .into_http_internal_err()
}

// ── Helpers ─────────────────────────────────────────────────────────────────

/// Crop `abs_path` to the normalized bbox and re-encode as JPEG for the
/// face service. `image::open` decodes most photo formats Apollo will see;
/// HEIC/RAW are out of scope for the manual flow (the user can't draw a
/// face on a thumbnail of a non-decodable file anyway).
fn crop_image_to_bbox(
    abs_path: &std::path::Path,
    nx: f32,
    ny: f32,
    nw: f32,
    nh: f32,
) -> anyhow::Result<Vec<u8>> {
    if !(0.0..=1.0).contains(&nx) || !(0.0..=1.0).contains(&ny) {
        return Err(anyhow!("bbox xy out of [0,1]"));
    }
    if nw <= 0.0 || nh <= 0.0 || nx + nw > 1.001 || ny + nh > 1.001 {
        return Err(anyhow!("bbox wh out of bounds or zero"));
    }
    let raw = image::open(abs_path).with_context(|| format!("open {:?}", abs_path))?;
    // EXIF rotation: the bbox arrives in display space (the carousel /
    // overlay are rendered post-rotation by the browser), but the
    // `image` crate hands us raw pre-rotation pixels. For any phone
    // photo with Orientation 6/8/etc., applying the bbox without
    // rotating first lands the crop on a completely different region
    // of the image — which is why manually-drawn bboxes basically
    // never resolved a face on re-detection. Apply the orientation
    // first, then index into the canonical-oriented dims. Photos with
    // no EXIF rotation tag pay nothing (apply_orientation is a no-op).
    let orientation = exif::read_orientation(abs_path).unwrap_or(1);
    let img = exif::apply_orientation(raw, orientation);
    let (w, h) = img.dimensions();
    let px = (nx * w as f32).round().clamp(0.0, w as f32 - 1.0) as u32;
    let py = (ny * h as f32).round().clamp(0.0, h as f32 - 1.0) as u32;
    let pw = ((nw * w as f32).round() as u32).min(w.saturating_sub(px));
    let ph = ((nh * h as f32).round() as u32).min(h.saturating_sub(py));
    if pw == 0 || ph == 0 {
        return Err(anyhow!("crop produced zero-dim image"));
    }
    // Generous padding so RetinaFace has anchor-friendly context.
    // Insightface internally resizes to det_size=640 (square). A
    // tightly-drawn 200×250 face bbox + 10 % padding becomes ~240×300,
    // which after resize fills ~95 % of the input — near the upper
    // edge of RetinaFace's anchor scales, where it routinely returns
    // zero detections. Padding to 50 % on each side makes the crop
    // 2× the bbox dims (face occupies ~50 % of the input), where
    // anchors hit cleanly. Bbox is clamped to image bounds, so
    // edge-of-image bboxes just get less padding on the clipped side.
    let pad_x = (pw / 2).max(1);
    let pad_y = (ph / 2).max(1);
    let cx = px.saturating_sub(pad_x);
    let cy = py.saturating_sub(pad_y);
    let cw = (pw + 2 * pad_x).min(w - cx);
    let ch = (ph + 2 * pad_y).min(h - cy);
    let cropped = img.crop_imm(cx, cy, cw, ch);
    let mut out = std::io::Cursor::new(Vec::new());
    cropped
        .write_to(&mut out, image::ImageFormat::Jpeg)
        .with_context(|| "encode crop as JPEG")?;
    Ok(out.into_inner())
}

/// Returns true if `err` (or anything in its source chain) is a SQLite
/// `UNIQUE constraint failed`. Walks the chain so callers don't have to
/// know the wrapping order — anyhow `with_context` plus diesel's own
/// error layering buries the database error two levels deep.
///
/// String matching on `format!("{:#}", e)` would also work but is
/// fragile (locale-dependent SQLite messages, false positives like
/// "uniquely identifies"). Downcasting to the actual diesel kind is
/// the contract-stable check.
fn is_unique_violation(err: &anyhow::Error) -> bool {
    use diesel::result::{DatabaseErrorKind, Error as DieselError};
    err.chain().any(|cause| {
        cause.downcast_ref::<DieselError>().is_some_and(|de| {
            matches!(
                de,
                DieselError::DatabaseError(DatabaseErrorKind::UniqueViolation, _)
            )
        })
    })
}

// ── Tests ───────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use crate::database::test::in_memory_db_connection;

    fn fresh_dao() -> SqliteFaceDao {
        SqliteFaceDao::from_connection(Arc::new(Mutex::new(in_memory_db_connection())))
    }

    fn ctx() -> opentelemetry::Context {
        opentelemetry::Context::current()
    }

    #[test]
    fn is_unique_violation_walks_chain() {
        // The bug we hit in manual testing: anyhow's plain Display only
        // shows the outermost context ("insert person Cameron"), so a
        // naive `format!("{}", e).contains("unique")` check misses the
        // diesel UNIQUE error nested below. Downcasting the source chain
        // is the stable contract.
        let mut dao = fresh_dao();
        let _ = dao
            .create_person(
                &ctx(),
                &CreatePersonReq {
                    name: "Cameron".into(),
                    notes: None,
                    entity_id: None,
                    is_ignored: false,
                },
                false,
            )
            .expect("first insert");
        let dup_err = dao
            .create_person(
                &ctx(),
                &CreatePersonReq {
                    name: "Cameron".into(),
                    notes: None,
                    entity_id: None,
                    is_ignored: false,
                },
                false,
            )
            .expect_err("second insert must fail");

        // Plain Display hides the UNIQUE — that's the bug we're guarding
        // against. We don't assert a specific outer message; we just
        // confirm string-matching at the top level is unreliable.
        let plain = format!("{}", dup_err);
        assert!(
            !plain.to_lowercase().contains("unique"),
            "if Display starts surfacing UNIQUE we can drop the helper, but \
             today it doesn't and the handler must downcast"
        );

        // Alt-Display walks the chain — useful for debug body content too.
        let chained = format!("{:#}", dup_err);
        assert!(
            chained.to_uppercase().contains("UNIQUE"),
            "chained display must surface the diesel error: {chained}"
        );

        // The contract-stable check the handler actually uses.
        assert!(
            is_unique_violation(&dup_err),
            "is_unique_violation must downcast into the diesel chain"
        );
    }

    // ── Phase 4: bootstrap heuristic + cosine + DAO support ─────────────

    #[test]
    fn is_plausible_name_token_filters_short_and_emoji() {
        // Hard filter applied before grouping — emojis and tags shorter
        // than 3 chars never make it into the candidate list, regardless
        // of looks_like_person's later assessment.
        assert!(is_plausible_name_token("Cameron"));
        assert!(is_plausible_name_token("Sarah Smith"));
        assert!(is_plausible_name_token("O'Brien"));
        assert!(is_plausible_name_token("Jean-Luc"));
        assert!(is_plausible_name_token("St. James"));
        assert!(is_plausible_name_token("Renée"));
        assert!(is_plausible_name_token("José"));
        // Asian script names — the alphabetic/letter check covers any
        // script, not just Latin.
        assert!(is_plausible_name_token("田中太郎"));

        // Below the 3-character floor.
        assert!(!is_plausible_name_token(""));
        assert!(!is_plausible_name_token(" "));
        assert!(!is_plausible_name_token("Bo"));
        assert!(!is_plausible_name_token("AB"));
        // Trim before counting — surrounding whitespace doesn't count.
        assert!(!is_plausible_name_token("  AB  "));

        // Emoji / symbol classes get the whole tag dropped.
        assert!(!is_plausible_name_token("🐱cat"));
        assert!(!is_plausible_name_token("Heart ❤"));
        assert!(!is_plausible_name_token("📸Photo"));
        assert!(!is_plausible_name_token("→ Trip"));
        assert!(!is_plausible_name_token("★Vacation"));

        // Digits are kept (handled by looks_like_person, not here).
        assert!(is_plausible_name_token("Trip 2018"));
        assert!(is_plausible_name_token("2024"));
    }

    #[test]
    fn looks_like_person_accepts_typical_names() {
        assert!(looks_like_person("Cameron"));
        assert!(looks_like_person("Sarah Smith"));
        assert!(looks_like_person("Mary Jane"));
        // Non-ASCII title-cased single word still counts.
        assert!(looks_like_person("Renée"));
    }

    #[test]
    fn looks_like_person_rejects_obvious_non_people() {
        // Digits, lowercase, three-or-more words, denylist hits.
        assert!(!looks_like_person("2018"));
        assert!(!looks_like_person("Trip 2018"));
        assert!(!looks_like_person("trip"));
        assert!(!looks_like_person("Birthday Party Cake"));
        assert!(!looks_like_person("cat"));
        assert!(!looks_like_person("Cat")); // denied even when title-cased
        assert!(!looks_like_person("Christmas"));
        assert!(!looks_like_person("home"));
        assert!(!looks_like_person(""));
        assert!(!looks_like_person("   "));
    }

    #[test]
    fn looks_like_person_two_words_skips_denylist() {
        // Two-word names get a pass on the single-word denylist —
        // "Sunset Walk" is much more likely a real album than a person,
        // but false-accepting is fine because the operator confirms.
        // What matters is we don't false-reject "Sarah Smith".
        assert!(looks_like_person("Sunset Walk"));
        assert!(looks_like_person("Sarah Smith"));
    }

    #[test]
    fn cosine_similarity_known_vectors() {
        // Identical vectors → 1.0; orthogonal → 0.0; opposite → -1.0.
        let a = vec![1.0, 0.0, 0.0];
        let b = vec![1.0, 0.0, 0.0];
        let c = vec![0.0, 1.0, 0.0];
        let d = vec![-1.0, 0.0, 0.0];
        assert!((cosine_similarity(&a, &b) - 1.0).abs() < 1e-6);
        assert!(cosine_similarity(&a, &c).abs() < 1e-6);
        assert!((cosine_similarity(&a, &d) - (-1.0)).abs() < 1e-6);
        // Mismatched length → 0.0 (defensive, not NaN).
        assert_eq!(cosine_similarity(&a, &[1.0, 0.0]), 0.0);
        // Empty input → 0.0.
        assert_eq!(cosine_similarity(&[], &[]), 0.0);
        // Zero vector → 0.0 (denominator guard, not NaN).
        let zero = vec![0.0, 0.0, 0.0];
        assert_eq!(cosine_similarity(&a, &zero), 0.0);
    }

    #[test]
    fn decode_embedding_bytes_round_trip() {
        // 512×f32 LE = 2048 bytes. Anything else returns None.
        let v: Vec<f32> = (0..512).map(|i| i as f32 * 0.001).collect();
        let mut bytes = Vec::with_capacity(2048);
        for f in &v {
            bytes.extend_from_slice(&f.to_le_bytes());
        }
        let decoded = decode_embedding_bytes(&bytes).expect("decode");
        assert_eq!(decoded.len(), 512);
        for (a, b) in v.iter().zip(decoded.iter()) {
            assert!((a - b).abs() < 1e-9);
        }
        assert_eq!(decode_embedding_bytes(&[0u8; 100]), None);
        assert_eq!(decode_embedding_bytes(&[0u8; 4096]), None);
    }

    #[test]
    fn find_persons_by_names_ci_groups_case() {
        let mut dao = fresh_dao();
        let _ = dao
            .create_person(
                &ctx(),
                &CreatePersonReq {
                    name: "Alice".into(),
                    notes: None,
                    entity_id: None,
                    is_ignored: false,
                },
                false,
            )
            .unwrap();
        let _ = dao
            .create_person(
                &ctx(),
                &CreatePersonReq {
                    name: "Bob".into(),
                    notes: None,
                    entity_id: None,
                    is_ignored: false,
                },
                false,
            )
            .unwrap();

        // Mix of cases + a name that has no person row.
        let m = dao
            .find_persons_by_names_ci(&ctx(), &["alice".into(), "BOB".into(), "charlie".into()])
            .expect("lookup");
        assert!(m.contains_key("alice"));
        assert!(m.contains_key("bob"));
        assert!(!m.contains_key("charlie"));
        // Empty input is a no-op (don't fire a SQL with zero binds).
        assert!(
            dao.find_persons_by_names_ci(&ctx(), &[])
                .unwrap()
                .is_empty()
        );
    }

    #[test]
    fn person_reference_embedding_filters_by_model_version() {
        // A person with embeddings from buffalo_l shouldn't have its
        // reference contaminated by a future buffalo_xl row. The auto-
        // bind path passes the candidate's model_version so old rows
        // never reach the average.
        let mut dao = fresh_dao();
        diesel::sql_query(
            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
             VALUES (1, 'main', '/tmp', 0)",
        )
        .execute(dao.connection.lock().unwrap().deref_mut())
        .expect("seed libraries");
        let p = dao
            .create_person(
                &ctx(),
                &CreatePersonReq {
                    name: "Subject".into(),
                    notes: None,
                    entity_id: None,
                    is_ignored: false,
                },
                false,
            )
            .unwrap();

        // 512-d unit vector along axis 0, written for buffalo_l.
        let mut emb_l: Vec<f32> = vec![0.0; 512];
        emb_l[0] = 1.0;
        let mut emb_l_bytes = Vec::with_capacity(2048);
        for f in &emb_l {
            emb_l_bytes.extend_from_slice(&f.to_le_bytes());
        }
        // 512-d unit vector along axis 1, written for some-other model.
        let mut emb_xl: Vec<f32> = vec![0.0; 512];
        emb_xl[1] = 1.0;
        let mut emb_xl_bytes = Vec::with_capacity(2048);
        for f in &emb_xl {
            emb_xl_bytes.extend_from_slice(&f.to_le_bytes());
        }

        for (bytes, mv) in [(emb_l_bytes, "buffalo_l"), (emb_xl_bytes, "buffalo_xl")] {
            let _ = dao
                .store_detection(
                    &ctx(),
                    InsertFaceDetectionInput {
                        library_id: 1,
                        content_hash: format!("h-{mv}"),
                        rel_path: format!("p-{mv}.jpg"),
                        bbox: Some((0.1, 0.1, 0.2, 0.2)),
                        embedding: Some(bytes),
                        confidence: Some(0.9),
                        source: "auto".into(),
                        person_id: Some(p.id),
                        status: "detected".into(),
                        model_version: mv.into(),
                    },
                )
                .unwrap();
        }

        let ref_l = dao
            .person_reference_embedding(&ctx(), p.id, "buffalo_l")
            .unwrap()
            .expect("buffalo_l ref");
        // Reference for buffalo_l should match emb_l (axis-0 unit).
        assert!((ref_l[0] - 1.0).abs() < 1e-5, "axis 0 should be ~1.0");
        assert!(ref_l[1].abs() < 1e-5, "axis 1 should be ~0.0");

        // Unknown model_version → None, not a cross-version average.
        assert!(
            dao.person_reference_embedding(&ctx(), p.id, "buffalo_xxxl")
                .unwrap()
                .is_none()
        );
    }

    #[test]
    fn assign_face_to_person_sets_cover_when_unset() {
        let mut dao = fresh_dao();
        diesel::sql_query(
            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
             VALUES (1, 'main', '/tmp', 0)",
        )
        .execute(dao.connection.lock().unwrap().deref_mut())
        .expect("seed libraries");
        let p = dao
            .create_person(
                &ctx(),
                &CreatePersonReq {
                    name: "Cover".into(),
                    notes: None,
                    entity_id: None,
                    is_ignored: false,
                },
                false,
            )
            .unwrap();
        assert!(p.cover_face_id.is_none());

        // Insert two faces unbound.
        let face1 = dao
            .store_detection(
                &ctx(),
                InsertFaceDetectionInput {
                    library_id: 1,
                    content_hash: "h1".into(),
                    rel_path: "p1.jpg".into(),
                    bbox: Some((0.1, 0.1, 0.2, 0.2)),
                    embedding: Some(vec![0u8; 2048]),
                    confidence: Some(0.9),
                    source: "auto".into(),
                    person_id: None,
                    status: "detected".into(),
                    model_version: "buffalo_l".into(),
                },
            )
            .unwrap();
        let face2 = dao
            .store_detection(
                &ctx(),
                InsertFaceDetectionInput {
                    library_id: 1,
                    content_hash: "h2".into(),
                    rel_path: "p2.jpg".into(),
                    bbox: Some((0.1, 0.1, 0.2, 0.2)),
                    embedding: Some(vec![0u8; 2048]),
                    confidence: Some(0.9),
                    source: "auto".into(),
                    person_id: None,
                    status: "detected".into(),
                    model_version: "buffalo_l".into(),
                },
            )
            .unwrap();

        // First assignment claims the cover.
        dao.assign_face_to_person(&ctx(), face1.id, p.id).unwrap();
        let p_after_first = dao.get_person(&ctx(), p.id).unwrap().unwrap();
        assert_eq!(p_after_first.cover_face_id, Some(face1.id));

        // Second assignment must NOT overwrite — operator may have
        // hand-picked the cover after the first auto-bind.
        dao.assign_face_to_person(&ctx(), face2.id, p.id).unwrap();
        let p_after_second = dao.get_person(&ctx(), p.id).unwrap().unwrap();
        assert_eq!(
            p_after_second.cover_face_id,
            Some(face1.id),
            "cover must remain face1 after second auto-bind"
        );
    }

    #[test]
    fn person_crud_roundtrip() {
        let mut dao = fresh_dao();
        let p = dao
            .create_person(
                &ctx(),
                &CreatePersonReq {
                    name: "Alice".into(),
                    notes: Some("the boss".into()),
                    entity_id: None,
                    is_ignored: false,
                },
                false,
            )
            .expect("create person");
        assert_eq!(p.name, "Alice");
        assert_eq!(p.notes.as_deref(), Some("the boss"));
        assert!(!p.created_from_tag);

        // Case-insensitive uniqueness — second create with same name in
        // different case must fail with a UNIQUE violation, surfacing
        // as 409 Conflict at the handler layer.
        let dup = dao.create_person(
            &ctx(),
            &CreatePersonReq {
                name: "alice".into(),
                notes: None,
                entity_id: None,
                is_ignored: false,
            },
            false,
        );
        assert!(dup.is_err(), "case-insensitive UNIQUE must reject 'alice'");

        // Update notes; verify updated_at moves forward.
        let prev_updated = p.updated_at;
        std::thread::sleep(std::time::Duration::from_millis(1100)); // boundary cross
        let updated = dao
            .update_person(
                &ctx(),
                p.id,
                &UpdatePersonReq {
                    name: None,
                    notes: Some("a new note".into()),
                    cover_face_id: None,
                    entity_id: None,
                    is_ignored: None,
                },
            )
            .expect("update");
        assert_eq!(updated.notes.as_deref(), Some("a new note"));
        assert!(updated.updated_at >= prev_updated);

        // List + delete.
        let listed = dao.list_persons(&ctx(), None, false).expect("list");
        assert_eq!(listed.len(), 1);
        assert_eq!(listed[0].face_count, 0);
        assert!(dao.delete_person(&ctx(), p.id, false).expect("delete"));
        assert!(
            dao.list_persons(&ctx(), None, false)
                .expect("list")
                .is_empty()
        );
    }

    #[test]
    fn ignore_bucket_idempotent_and_filters_auto_bind() {
        // First call creates the bucket; second returns the same row.
        // Once it exists, find_persons_by_names_ci must skip it even if
        // the search term matches its name — the auto-bind path must
        // NEVER target the IGNORE/junk bucket.
        let mut dao = fresh_dao();
        let first = dao
            .get_or_create_ignored_person(&ctx())
            .expect("create bucket");
        assert!(first.is_ignored);
        let second = dao
            .get_or_create_ignored_person(&ctx())
            .expect("re-fetch bucket");
        assert_eq!(first.id, second.id, "bucket must be idempotent");

        // Searching by the bucket's name must return nothing — the
        // auto-bind look-up filters is_ignored=true.
        let m = dao
            .find_persons_by_names_ci(&ctx(), &["ignored".into()])
            .expect("name lookup");
        assert!(
            !m.contains_key("ignored"),
            "find_persons_by_names_ci must skip the ignore bucket: {m:?}"
        );

        // Default list_persons hides it; include_ignored=true surfaces it.
        let visible = dao.list_persons(&ctx(), None, false).expect("list");
        assert!(visible.iter().all(|p| !p.is_ignored));
        let all = dao.list_persons(&ctx(), None, true).expect("list all");
        assert!(all.iter().any(|p| p.is_ignored && p.id == first.id));
    }

    #[test]
    fn marker_rows_idempotent() {
        let mut dao = fresh_dao();
        // Need a libraries row to satisfy face_detections.library_id FK
        // without DEFERRED — SQLite enforces FK immediately by default.
        // The :memory: DB already has the libraries seed via
        // seed_or_patch_from_env? No — in_memory_db_connection just runs
        // migrations; the libraries seed is a runtime path. Insert one
        // manually for the test.
        // Migrations may seed libraries(id=1); INSERT OR IGNORE keeps the
        // test runnable either way.
        diesel::sql_query(
            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
             VALUES (1, 'main', '/tmp', 0)",
        )
        .execute(dao.connection.lock().unwrap().deref_mut())
        .expect("seed libraries");

        // Marker insert.
        dao.mark_status(&ctx(), 1, "abc123", "x.jpg", "no_faces", "buffalo_l")
            .expect("first mark");
        assert!(
            dao.already_scanned(&ctx(), "abc123").expect("scan"),
            "already_scanned should report true after marker"
        );

        // Second mark for the same hash is a no-op (the partial UNIQUE
        // index would otherwise reject; the DAO short-circuits before the
        // insert).
        dao.mark_status(&ctx(), 1, "abc123", "x.jpg", "no_faces", "buffalo_l")
            .expect("second mark idempotent");

        // Stats reflect the no_faces marker.
        let stats = dao.stats(&ctx(), Some(1)).expect("stats");
        assert_eq!(stats.no_faces, 1);
        assert_eq!(stats.scanned, 1);
        assert_eq!(stats.with_faces, 0);
    }

    #[test]
    fn stats_total_photos_excludes_videos() {
        // SCANNED counts content_hashes in face_detections; total_photos
        // must apply the same image-extension filter as the watcher
        // backlog query so the percentage can reach 100%. Without this,
        // videos sit in image_exif but never produce a face_detections
        // row (Apollo decodes images only) and the bar caps below 100%.
        let mut dao = fresh_dao();
        diesel::sql_query(
            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
             VALUES (1, 'main', '/tmp', 0)",
        )
        .execute(dao.connection.lock().unwrap().deref_mut())
        .expect("seed libraries");

        diesel::sql_query(
            "INSERT INTO image_exif \
             (library_id, rel_path, content_hash, created_time, last_modified) VALUES \
             (1, 'a.jpg',     'h-a',   0, 0), \
             (1, 'b.JPEG',    'h-b',   0, 0), \
             (1, 'movie.mp4', 'h-mp4', 0, 0), \
             (1, 'clip.MOV',  'h-mov', 0, 0)",
        )
        .execute(dao.connection.lock().unwrap().deref_mut())
        .expect("seed image_exif");

        let stats = dao.stats(&ctx(), Some(1)).expect("stats");
        assert_eq!(
            stats.total_photos, 2,
            "videos should not count toward total"
        );
    }

    #[test]
    fn merge_persons_repoints_faces() {
        let mut dao = fresh_dao();
        // Migrations may seed libraries(id=1); INSERT OR IGNORE keeps the
        // test runnable either way.
        diesel::sql_query(
            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
             VALUES (1, 'main', '/tmp', 0)",
        )
        .execute(dao.connection.lock().unwrap().deref_mut())
        .expect("seed libraries");

        let alice = dao
            .create_person(
                &ctx(),
                &CreatePersonReq {
                    name: "Alice".into(),
                    notes: None,
                    entity_id: None,
                    is_ignored: false,
                },
                false,
            )
            .unwrap();
        let alyse = dao
            .create_person(
                &ctx(),
                &CreatePersonReq {
                    name: "Alyse".into(),
                    notes: Some("dup of alice".into()),
                    entity_id: None,
                    is_ignored: false,
                },
                false,
            )
            .unwrap();

        // Insert a detected face row owned by `alyse`.
        let _ = dao
            .store_detection(
                &ctx(),
                InsertFaceDetectionInput {
                    library_id: 1,
                    content_hash: "h1".into(),
                    rel_path: "p1.jpg".into(),
                    bbox: Some((0.1, 0.1, 0.2, 0.2)),
                    embedding: Some(vec![0u8; 2048]),
                    confidence: Some(0.9),
                    source: "auto".into(),
                    person_id: Some(alyse.id),
                    status: "detected".into(),
                    model_version: "buffalo_l".into(),
                },
            )
            .unwrap();

        // Merge alyse → alice. Notes from src copy when target empty.
        let merged = dao.merge_persons(&ctx(), alyse.id, alice.id).unwrap();
        assert_eq!(merged.id, alice.id);
        assert_eq!(merged.notes.as_deref(), Some("dup of alice"));

        // alyse is gone.
        assert!(dao.get_person(&ctx(), alyse.id).unwrap().is_none());

        // The face is now alice's.
        let faces = dao.list_for_person(&ctx(), alice.id, Some(1)).unwrap();
        assert_eq!(faces.len(), 1);
        assert_eq!(faces[0].person_id, Some(alice.id));
    }

    // ── crop_image_to_bbox ──────────────────────────────────────────────
    // Pure helper used by the manual face-create handler. Generate a tiny
    // image in memory, write it to a temp file, then exercise the bbox
    // validation + crop math.

    fn write_solid_image(w: u32, h: u32) -> tempfile::NamedTempFile {
        let mut img = image::RgbImage::new(w, h);
        for p in img.pixels_mut() {
            *p = image::Rgb([200, 200, 200]);
        }
        let f = tempfile::Builder::new()
            .suffix(".jpg")
            .tempfile()
            .expect("tempfile");
        image::DynamicImage::ImageRgb8(img)
            .save(f.path())
            .expect("save jpg");
        f
    }

    #[test]
    fn crop_rejects_invalid_bbox() {
        let f = write_solid_image(64, 64);
        // x out of [0,1]
        assert!(crop_image_to_bbox(f.path(), -0.1, 0.0, 0.5, 0.5).is_err());
        assert!(crop_image_to_bbox(f.path(), 1.5, 0.0, 0.5, 0.5).is_err());
        // zero / negative dimensions
        assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.0, 0.5).is_err());
        assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.5, -0.1).is_err());
        // overflows the image
        assert!(crop_image_to_bbox(f.path(), 0.7, 0.0, 0.5, 0.5).is_err());
    }

    #[test]
    fn crop_returns_decodable_jpeg() {
        let f = write_solid_image(200, 200);
        let bytes = crop_image_to_bbox(f.path(), 0.25, 0.25, 0.5, 0.5).expect("center crop");
        // Re-decode to confirm the pipeline produced a valid JPEG. Exact
        // dimensions depend on the 10% padding clamp, so just assert
        // sanity bounds rather than pinning numbers (padding math can
        // legitimately drift if we tweak the heuristic later).
        let img = image::load_from_memory(&bytes).expect("decode crop");
        let (w, h) = (img.width(), img.height());
        assert!((80..=200).contains(&w), "unexpected crop width: {w}");
        assert!((80..=200).contains(&h), "unexpected crop height: {h}");
    }

    #[test]
    fn crop_padding_clamps_to_image_bounds() {
        // A bbox right at the corner should pad inward as far as it can,
        // never outside the image — otherwise we'd pass invalid coords
        // to the embedding service.
        let f = write_solid_image(100, 100);
        let bytes = crop_image_to_bbox(f.path(), 0.9, 0.9, 0.1, 0.1).expect("corner crop");
        let img = image::load_from_memory(&bytes).expect("decode corner crop");
        // Padded crop must fit within the source's 100x100.
        assert!(img.width() <= 100);
        assert!(img.height() <= 100);
        assert!(img.width() > 0 && img.height() > 0);
    }

    // ── hydrate_face_with_person — PATCH/POST /image/faces response shape ──

    fn seed_library_and_face(dao: &mut SqliteFaceDao, person_id: Option<i32>) -> FaceDetectionRow {
        diesel::sql_query(
            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
             VALUES (1, 'main', '/tmp', 0)",
        )
        .execute(dao.connection.lock().unwrap().deref_mut())
        .expect("seed libraries");
        dao.store_detection(
            &ctx(),
            InsertFaceDetectionInput {
                library_id: 1,
                content_hash: "h-hydrate".into(),
                rel_path: "p.jpg".into(),
                bbox: Some((0.1, 0.2, 0.3, 0.4)),
                embedding: Some(vec![0u8; 2048]),
                confidence: Some(0.9),
                source: "manual".into(),
                person_id,
                status: "detected".into(),
                model_version: "buffalo_l".into(),
            },
        )
        .unwrap()
    }

    #[test]
    fn hydrate_face_carries_person_name_when_assigned() {
        // Regression guard for the bug where PATCH /image/faces/{id}
        // returned a bare FaceDetectionRow (no person_name), causing
        // the carousel overlay's optimistic replace to drop the VFD
        // label off the bbox after every save. The handler hydrates
        // via this helper; if anyone refactors the helper to skip the
        // persons join, this test fails.
        let mut dao = fresh_dao();
        let p = dao
            .create_person(
                &ctx(),
                &CreatePersonReq {
                    name: "Alice".into(),
                    notes: None,
                    entity_id: None,
                    is_ignored: false,
                },
                false,
            )
            .unwrap();
        let row = seed_library_and_face(&mut dao, Some(p.id));
        let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate assigned");
        assert_eq!(joined.person_id, Some(p.id));
        assert_eq!(joined.person_name.as_deref(), Some("Alice"));
        // Bbox + confidence + source must round-trip — these are what
        // the optimistic-replace also keys on.
        assert!((joined.bbox_x - 0.1).abs() < 1e-6);
        assert!((joined.bbox_y - 0.2).abs() < 1e-6);
        assert!((joined.bbox_w - 0.3).abs() < 1e-6);
        assert!((joined.bbox_h - 0.4).abs() < 1e-6);
        assert_eq!(joined.source, "manual");
    }

    #[test]
    fn hydrate_face_leaves_person_name_null_when_unassigned() {
        // Mirror branch: an unassigned face must hydrate cleanly with
        // person_name = None, not a stale value left over from a
        // previously-assigned row's serialization.
        let mut dao = fresh_dao();
        let row = seed_library_and_face(&mut dao, None);
        let joined = hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate unassigned");
        assert!(joined.person_id.is_none());
        assert!(joined.person_name.is_none());
    }

    #[test]
    fn list_unscanned_candidates_filters_to_hashed_unscanned_in_library() {
        // The watcher's per-tick backlog drain depends on this query
        // returning *only* image_exif rows with a populated
        // content_hash and no matching face_detections row in the
        // requested library. A regression here would either silently
        // re-scan files (waste of inference) or skip files that need
        // scanning (the symptom we just shipped a fix for).
        let mut dao = fresh_dao();
        diesel::sql_query(
            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
             VALUES (1, 'main', '/tmp', 0), (2, 'other', '/tmp2', 0)",
        )
        .execute(dao.connection.lock().unwrap().deref_mut())
        .expect("seed libraries");

        // Seed image_exif: mix of hashed/unhashed/scanned/cross-library,
        // plus a video and a mixed-case image extension. Videos register
        // in image_exif but can never produce a face_detections row, so
        // the SQL must filter them out — otherwise the per-tick backlog
        // drain re-pulls them every tick (no marker is ever written, so
        // they loop forever) and the SCANNED stat is permanently capped.
        diesel::sql_query(
            "INSERT INTO image_exif \
             (library_id, rel_path, content_hash, created_time, last_modified) VALUES \
             (1, 'a.jpg', 'h-a', 0, 0), \
             (1, 'b.jpg', 'h-b', 0, 0), \
             (1, 'c.jpg', NULL,  0, 0), \
             (1, 'd.jpg', 'h-d', 0, 0), \
             (1, 'movie.mp4', 'h-mp4', 0, 0), \
             (1, 'clip.MOV',  'h-mov', 0, 0), \
             (1, 'photo.JPG', 'h-jpg-upper', 0, 0), \
             (2, 'e.jpg', 'h-e', 0, 0)",
        )
        .execute(dao.connection.lock().unwrap().deref_mut())
        .expect("seed image_exif");

        // 'b' has been scanned (no_faces marker) — expect it filtered out.
        dao.mark_status(&ctx(), 1, "h-b", "b.jpg", "no_faces", "buffalo_l")
            .expect("scanned marker");

        let cands = dao
            .list_unscanned_candidates(&ctx(), 1, 10)
            .expect("list unscanned");

        let hashes: std::collections::HashSet<_> = cands.iter().map(|(_, h)| h.clone()).collect();

        // Should contain a, d, and the upper-case .JPG (image-extension
        // match is case-insensitive).
        assert!(hashes.contains("h-a"), "missing h-a: {:?}", hashes);
        assert!(hashes.contains("h-d"), "missing h-d: {:?}", hashes);
        assert!(
            hashes.contains("h-jpg-upper"),
            "missing h-jpg-upper: {:?}",
            hashes
        );
        // Should NOT contain b (scanned), c (no hash), e (other library),
        // or videos (mp4/mov are not image extensions).
        assert!(!hashes.contains("h-b"), "expected h-b filtered (scanned)");
        assert!(
            !hashes.contains("h-e"),
            "expected h-e filtered (other library)"
        );
        assert!(!hashes.contains("h-mp4"), "expected h-mp4 filtered (video)");
        assert!(!hashes.contains("h-mov"), "expected h-mov filtered (video)");
        assert_eq!(cands.len(), 3, "unexpected candidates: {:?}", cands);
    }
}