diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..b520940
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,85 @@
+# ImageApi configuration template. Copy to `.env` and fill in for your
+# deploy. Comments mirror the canonical docs in CLAUDE.md — see there
+# for the full picture (especially the AI-Insights / Apollo / face
+# integration sections).
+
+# ── Required ────────────────────────────────────────────────────────────
+DATABASE_URL=./database.db
+BASE_PATH=/path/to/media
+THUMBNAILS=/path/to/thumbnails
+VIDEO_PATH=/path/to/video/hls
+GIFS_DIRECTORY=/path/to/gifs
+PREVIEW_CLIPS_DIRECTORY=/path/to/preview-clips
+BIND_URL=0.0.0.0:8080
+CORS_ALLOWED_ORIGINS=http://localhost:3000
+SECRET_KEY=replace-me-with-a-long-random-secret
+RUST_LOG=info
+
+# ── File watching ───────────────────────────────────────────────────────
+# Quick scan = recently-modified-files only; full scan = comprehensive walk.
+WATCH_QUICK_INTERVAL_SECONDS=60
+WATCH_FULL_INTERVAL_SECONDS=3600
+# Comma-separated path prefixes / component names to skip in /memories
+# AND in face detection (e.g. @eaDir, .thumbnails, /private).
+EXCLUDED_DIRS=
+
+# ── Video / HLS ─────────────────────────────────────────────────────────
+HLS_CONCURRENCY=2
+HLS_TIMEOUT_SECONDS=900
+PLAYLIST_CLEANUP_INTERVAL_SECONDS=86400
+
+# ── Telemetry (release builds only) ─────────────────────────────────────
+# OTLP_OTLS_ENDPOINT=http://localhost:4317
+
+# ── AI Insights — Ollama (local LLM) ────────────────────────────────────
+OLLAMA_PRIMARY_URL=http://localhost:11434
+OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b
+# Optional fallback server tried on connection failure.
+# OLLAMA_FALLBACK_URL=http://server:11434
+# OLLAMA_FALLBACK_MODEL=llama3.2:3b
+OLLAMA_REQUEST_TIMEOUT_SECONDS=120
+# Cap on tool-calling iterations per chat turn / agentic insight.
+AGENTIC_MAX_ITERATIONS=6
+AGENTIC_CHAT_MAX_ITERATIONS=6
+
+# ── AI Insights — OpenRouter (hybrid backend, optional) ─────────────────
+# Set OPENROUTER_API_KEY to enable the hybrid backend (vision stays
+# local on Ollama, chat routes to OpenRouter).
+# OPENROUTER_API_KEY=sk-or-...
+# OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4
+# OPENROUTER_ALLOWED_MODELS=openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash
+# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
+# OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small
+# OPENROUTER_HTTP_REFERER=https://your-site.example
+# OPENROUTER_APP_TITLE=ImageApi
+
+# ── AI Insights — sibling services (optional) ───────────────────────────
+# Apollo (places + face inference). Single Apollo deploys typically set
+# only APOLLO_API_BASE_URL and let the face client fall back to it.
+# APOLLO_API_BASE_URL=http://apollo.lan:8000
+# APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000
+# SMS_API_URL=http://localhost:8000
+# SMS_API_TOKEN=
+
+# Display name used in agentic prompts when the LLM refers to "you".
+USER_NAME=
+
+# ── Face detection (Phase 3+) ───────────────────────────────────────────
+# Cosine-sim floor for auto-binding a detected face to an existing
+# same-named person on detection. 0.4 ≈ moderate-confidence match.
+FACE_AUTOBIND_MIN_COS=0.4
+# Per-scan-tick fan-out into Apollo's detect endpoint. Apollo's GPU
+# pool serializes server-side; this just overlaps file-IO with
+# inference RTT.
+FACE_DETECT_CONCURRENCY=8
+# Per-detect HTTP timeout. CPU-only Apollo deploys may need higher.
+FACE_DETECT_TIMEOUT_SEC=60
+# Per-tick caps on the two backlog drains (independent of WATCH_*
+# quick / full scans). Tune up if you have a large unscanned backlog
+# and want it to clear faster; tune down if Apollo is overloaded.
+FACE_BACKLOG_MAX_PER_TICK=64
+FACE_HASH_BACKFILL_MAX_PER_TICK=2000
+
+# ── RAG / search ────────────────────────────────────────────────────────
+# Set to `1` to enable cross-encoder reranking on /search results.
+SEARCH_RAG_RERANK=0
diff --git a/CLAUDE.md b/CLAUDE.md
index 86515d2..6d4a751 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -210,7 +210,34 @@ Centralized in `file_types.rs` with constants `IMAGE_EXTENSIONS` and `VIDEO_EXTE
 All database operations and HTTP handlers wrapped in spans. In release builds, exports to OTLP endpoint via `OTLP_OTLS_ENDPOINT`. Debug builds use basic logger.
 
 **Memory Exclusion:**
-`PathExcluder` in `memories.rs` filters out directories from memories API via `EXCLUDED_DIRS` environment variable (comma-separated paths or substring patterns).
+`PathExcluder` in `memories.rs` filters out directories from memories API via `EXCLUDED_DIRS` environment variable (comma-separated paths or substring patterns). The same excluder is applied to face-detection candidates (`face_watch::filter_excluded`) so junk directories like `@eaDir` / `.thumbnails` don't burn detect calls on Apollo.
+
+### Face detection system
+
+ImageApi owns the face data; Apollo (sibling repo) hosts the insightface inference service. Inference is triggered automatically by the file watcher and persisted into two tables:
+
+- `persons(id, name UNIQUE COLLATE NOCASE, cover_face_id, entity_id, created_from_tag, notes, ...)` — operator-managed, name is the user-visible identity.
+- `face_detections(id, library_id, content_hash, rel_path, bbox_*, embedding BLOB, confidence, source, person_id, status, model_version, ...)` — keyed on `content_hash` so a photo duplicated across libraries is detected once. Marker rows for `status IN ('no_faces','failed')` carry NULL bbox/embedding (CHECK constraint enforces this).
+
+**Why content_hash and not (library_id, rel_path):** ties face data to the bytes, not the path. A backup mount that copies files from the primary library naturally inherits the existing detections without re-running inference.
+
+**File-watch hook** (`src/main.rs::process_new_files`): for each photo with a populated `content_hash`, check `FaceDao::already_scanned(hash)`; if not, send bytes (or embedded JPEG preview for RAW via `exif::extract_embedded_jpeg_preview`) to Apollo's `/api/internal/faces/detect`. K=`FACE_DETECT_CONCURRENCY` (default 8) parallel calls per scan tick; Apollo serializes them via its single-worker GPU pool. `face_watch.rs` is the Tokio orchestration layer.
+
+**Per-tick backlog drain** (also `src/main.rs`): two passes that run on every watcher tick regardless of quick-vs-full scan:
+- `backfill_unhashed_backlog` — populates `image_exif.content_hash` for photos that arrived before the hash field was retroactive. Capped by `FACE_HASH_BACKFILL_MAX_PER_TICK` (default 2000); errors don't burn the cap.
+- `process_face_backlog` — runs detection on photos that have a hash but no `face_detections` row. Capped by `FACE_BACKLOG_MAX_PER_TICK` (default 64). Selected via a SQL anti-join (`FaceDao::list_unscanned_candidates`); videos and EXCLUDED_DIRS paths filtered out client-side via `face_watch::filter_excluded` so they never reach Apollo.
+
+**Auto-bind on detection:** when a photo carries a tag whose name matches a `persons.name` (case-insensitive), the new face binds automatically iff cosine similarity to the person's existing-face mean is ≥ `FACE_AUTOBIND_MIN_COS` (default 0.4). Persons with no existing faces bind unconditionally and the new face becomes the cover.
+
+**Manual face create** (`POST /image/faces`): crops the image to the user-supplied bbox, applies EXIF orientation via `exif::apply_orientation` (the `image` crate hands raw pre-rotation pixels — without this, manually-drawn bboxes never resolved a face on re-detection), pads to ~50% of bbox dims (RetinaFace anchor scales need ~50% face-fill at det_size=640), then calls Apollo's embed endpoint. A `force` flag lets the operator save a face the detector couldn't see (e.g. profile shots, occluded faces) — the row gets a zero-vector embedding so it's manually-bound only and won't participate in clustering.
+
+**Rerun preserves manual rows** (`POST /image/faces/{id}/rerun`): only `source='auto'` rows are deleted before re-running detection. `already_scanned` returns true on ANY row, so a photo whose only faces are manually drawn never auto-redetects.
+
+Module map:
+- `src/faces.rs` — `FaceDao` trait + `SqliteFaceDao` impl, route handlers for `/faces/*`, `/image/faces/*`, `/persons/*`. Mirror of `tags.rs` layout.
+- `src/face_watch.rs` — Tokio orchestration for the file-watch detect pass; `filter_excluded` (PathExcluder + image-extension filter), `read_image_bytes_for_detect` (RAW preview fallback).
+- `src/ai/face_client.rs` — HTTP client for Apollo's inference. Configured by `APOLLO_FACE_API_BASE_URL`, falls back to `APOLLO_API_BASE_URL`. Both unset → feature disabled, file-watch hook is a no-op.
+- `migrations/2026-04-29-000000_add_faces/` — schema.
 
 ### Startup Sequence
 
@@ -286,6 +313,15 @@ SMS_API_TOKEN=your-api-token                   # SMS API authentication token (o
 # `get_personal_place_at` tool. Unset = legacy Nominatim-only path.
 APOLLO_API_BASE_URL=http://apollo.lan:8000     # Base URL of the sibling Apollo backend
 
+# Face inference (optional). Apollo also hosts the insightface inference
+# service; ImageApi calls it from the file-watch hook (Phase 3) and from
+# the manual face-create endpoint. Falls back to APOLLO_API_BASE_URL when
+# unset (typical single-Apollo deploy). Both unset = feature disabled.
+APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000 # Override if face service runs separately
+FACE_AUTOBIND_MIN_COS=0.4                       # Phase 3: cosine-sim floor for tag-name auto-bind
+FACE_DETECT_CONCURRENCY=8                       # Phase 3: per-scan-tick parallel detect calls
+FACE_DETECT_TIMEOUT_SEC=60                      # reqwest client timeout (CPU inference can be slow)
+
 # OpenRouter (Hybrid Backend) - keeps embeddings + vision local, routes chat to OpenRouter
 OPENROUTER_API_KEY=sk-or-...                   # Required to enable hybrid backend
 OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4   # Used when client doesn't pick a model
diff --git a/Cargo.lock b/Cargo.lock
index 891a2f9..6f6575b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1913,7 +1913,7 @@ dependencies = [
 
 [[package]]
 name = "image-api"
-version = "1.0.0"
+version = "1.1.0"
 dependencies = [
  "actix",
  "actix-cors",
@@ -3229,6 +3229,7 @@ dependencies = [
  "js-sys",
  "log",
  "mime",
+ "mime_guess",
  "native-tls",
  "percent-encoding",
  "pin-project-lite",
diff --git a/Cargo.toml b/Cargo.toml
index 0a25252..2432869 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "image-api"
-version = "1.0.0"
+version = "1.1.0"
 authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
 edition = "2024"
 
@@ -49,7 +49,7 @@ opentelemetry-appender-log = "0.31.0"
 tempfile = "3.20.0"
 regex = "1.11.1"
 exif = { package = "kamadak-exif", version = "0.6.1" }
-reqwest = { version = "0.12", features = ["json", "stream"] }
+reqwest = { version = "0.12", features = ["json", "stream", "multipart"] }
 async-stream = "0.3"
 tokio-util = { version = "0.7", features = ["io"] }
 bytes = "1"
diff --git a/README.md b/README.md
index fceba81..b6d764b 100644
--- a/README.md
+++ b/README.md
@@ -159,3 +159,34 @@ Daily conversation summaries are generated automatically on server startup. Conf
 - Contacts to process
 - Model version used for embeddings: `nomic-embed-text:v1.5`
 
+### Apollo + Face Recognition (Optional)
+
+Apollo (sibling project) hosts both the Places API and the local insightface
+inference service. Both integrations are optional and degrade gracefully when
+unset.
+
+- `APOLLO_API_BASE_URL` - Base URL of the sibling Apollo backend.
+  - When set, photo-insight enrichment folds the user's personal place name
+    (Home, Work, Cabin, ...) into the location string, and the agentic loop
+    gains a `get_personal_place_at` tool. Unset = legacy Nominatim-only path.
+- `APOLLO_FACE_API_BASE_URL` - Base URL for the face-detection service.
+  - Falls back to `APOLLO_API_BASE_URL` when unset (typical single-Apollo
+    deploy). Both unset = face feature disabled (file-watch hook and
+    manual-face endpoints short-circuit silently).
+- `FACE_AUTOBIND_MIN_COS` (Phase 3) - Cosine-sim floor for auto-binding a
+  detected face to an existing same-named person via people-tag bootstrap
+  [default: `0.4`].
+- `FACE_DETECT_CONCURRENCY` (Phase 3) - Per-scan-tick concurrent detect
+  calls fired by the file watcher [default: `8`]. Apollo serializes them
+  via its single-worker GPU pool.
+- `FACE_DETECT_TIMEOUT_SEC` - reqwest client timeout per detect call
+  [default: `60`]. CPU inference on a backlog can take many seconds.
+- `FACE_BACKLOG_MAX_PER_TICK` - Cap on the per-tick backlog drain (photos
+  with a content_hash but no face_detections row) [default: `64`]. Runs
+  every watcher tick regardless of quick-vs-full scan, so the unscanned
+  set drains independently of the file walk.
+- `FACE_HASH_BACKFILL_MAX_PER_TICK` - Cap on the per-tick content_hash
+  backfill (photos that were registered before the hash field was
+  populated retroactively) [default: `2000`]. Errors don't burn the cap;
+  only successful hashes count.
+
diff --git a/migrations/2026-04-29-000000_add_faces/down.sql b/migrations/2026-04-29-000000_add_faces/down.sql
new file mode 100644
index 0000000..bae8303
--- /dev/null
+++ b/migrations/2026-04-29-000000_add_faces/down.sql
@@ -0,0 +1,2 @@
+DROP TABLE IF EXISTS face_detections;
+DROP TABLE IF EXISTS persons;
diff --git a/migrations/2026-04-29-000000_add_faces/up.sql b/migrations/2026-04-29-000000_add_faces/up.sql
new file mode 100644
index 0000000..4f4f4c8
--- /dev/null
+++ b/migrations/2026-04-29-000000_add_faces/up.sql
@@ -0,0 +1,67 @@
+-- Local face recognition tables.
+--
+-- `persons` are visual identities (the "who" of a face). The optional
+-- `entity_id` bridges to the existing knowledge graph `entities` table —
+-- when set, this person is the visual side of an LLM-extracted entity.
+-- Don't auto-create entities from persons; the entity table represents
+-- LLM-extracted knowledge with its own confidence semantics, and silently
+-- filling it from face detections muddies the provenance.
+--
+-- `face_detections` carries one row per detected face on a content_hash,
+-- plus marker rows with `status='no_faces'` or `status='failed'` so the
+-- file watcher knows not to re-scan a hash. Keying on `content_hash`
+-- (cross-library dedup) rather than `(library_id, rel_path)` means the
+-- same JPEG in two libraries is scanned once. The denormalized `rel_path`
+-- carries the most-recently-seen path — useful for cluster-thumb URL
+-- generation; canonical path lookup goes through image_exif.
+
+CREATE TABLE persons (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    name TEXT NOT NULL,
+    cover_face_id INTEGER,                  -- backfilled when the first face binds
+    entity_id INTEGER,                      -- optional bridge to entities(id)
+    created_from_tag BOOLEAN NOT NULL DEFAULT 0,
+    notes TEXT,
+    created_at BIGINT NOT NULL,
+    updated_at BIGINT NOT NULL,
+    CONSTRAINT fk_persons_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE SET NULL,
+    UNIQUE(name COLLATE NOCASE)
+);
+
+CREATE INDEX idx_persons_entity ON persons(entity_id);
+
+CREATE TABLE face_detections (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    library_id INTEGER NOT NULL,
+    content_hash TEXT NOT NULL,             -- canonical key (cross-library dedup)
+    rel_path TEXT NOT NULL,                 -- denormalized; most recently seen
+    bbox_x REAL,                            -- normalized 0..1; NULL on marker rows
+    bbox_y REAL,
+    bbox_w REAL,
+    bbox_h REAL,
+    embedding BLOB,                         -- 512×f32 = 2048 bytes; NULL on marker rows
+    confidence REAL,                        -- detector score
+    source TEXT NOT NULL,                   -- 'auto' | 'manual'
+    person_id INTEGER,
+    status TEXT NOT NULL DEFAULT 'detected', -- 'detected' | 'no_faces' | 'failed'
+    model_version TEXT NOT NULL,            -- e.g. 'buffalo_l'; embedding lineage
+    created_at BIGINT NOT NULL,
+    CONSTRAINT fk_fd_library FOREIGN KEY (library_id) REFERENCES libraries(id),
+    CONSTRAINT fk_fd_person FOREIGN KEY (person_id) REFERENCES persons(id) ON DELETE SET NULL,
+    -- Detected rows carry geometry + embedding; marker rows ('no_faces',
+    -- 'failed') carry neither. CHECK enforces the invariant so manual
+    -- inserts can't slip through with half a row.
+    CONSTRAINT chk_marker CHECK (
+        (status = 'detected' AND bbox_x IS NOT NULL AND embedding IS NOT NULL)
+        OR (status IN ('no_faces','failed') AND bbox_x IS NULL AND embedding IS NULL)
+    )
+);
+
+CREATE INDEX idx_face_detections_hash       ON face_detections(content_hash);
+CREATE INDEX idx_face_detections_lib_path   ON face_detections(library_id, rel_path);
+CREATE INDEX idx_face_detections_person     ON face_detections(person_id);
+CREATE INDEX idx_face_detections_status     ON face_detections(status);
+-- One marker row per (content_hash, status='no_faces') so the file watcher
+-- doesn't double-mark when a hash is seen on multiple full-scan passes.
+CREATE UNIQUE INDEX idx_face_detections_no_faces_unique
+    ON face_detections(content_hash) WHERE status = 'no_faces';
diff --git a/migrations/2026-04-29-000200_add_is_ignored/down.sql b/migrations/2026-04-29-000200_add_is_ignored/down.sql
new file mode 100644
index 0000000..41f7c00
--- /dev/null
+++ b/migrations/2026-04-29-000200_add_is_ignored/down.sql
@@ -0,0 +1,2 @@
+DROP INDEX IF EXISTS idx_persons_is_ignored;
+ALTER TABLE persons DROP COLUMN is_ignored;
diff --git a/migrations/2026-04-29-000200_add_is_ignored/up.sql b/migrations/2026-04-29-000200_add_is_ignored/up.sql
new file mode 100644
index 0000000..d8fdac9
--- /dev/null
+++ b/migrations/2026-04-29-000200_add_is_ignored/up.sql
@@ -0,0 +1,20 @@
+-- IGNORE / junk bucket for the face recognition feature.
+--
+-- An "Ignored" person is the destination for strangers, faces the user
+-- doesn't want tagged, and false detections. It looks like any other
+-- person row (so face_detections.person_id stays a clean foreign key)
+-- but `is_ignored=1` flags it for special UI treatment:
+--   - hidden from the persons list by default
+--   - excluded from `find_persons_by_names_ci` so a tag-name match
+--     can never auto-bind a real face to the ignore bucket
+--   - cluster-suggest already filters by `person_id IS NULL`, so faces
+--     bound to an ignored person are naturally excluded from future
+--     re-clustering
+--
+-- Partial index because the WHERE-clause is small (typically 1 row),
+-- and we only ever query for `is_ignored = 1` to find the bucket.
+
+ALTER TABLE persons ADD COLUMN is_ignored BOOLEAN NOT NULL DEFAULT 0;
+
+CREATE INDEX idx_persons_is_ignored
+    ON persons(is_ignored) WHERE is_ignored = 1;
diff --git a/src/ai/face_client.rs b/src/ai/face_client.rs
new file mode 100644
index 0000000..8a52812
--- /dev/null
+++ b/src/ai/face_client.rs
@@ -0,0 +1,370 @@
+//! Thin async HTTP client for Apollo's `/api/internal/faces/*` endpoints.
+//!
+//! Apollo (the personal location-history viewer at the sibling repo) hosts the
+//! insightface inference service. This client is the ImageApi side of the
+//! contract — it shoves image bytes through `/detect` and returns boxes +
+//! 512-d ArcFace embeddings, plus a single-embedding `/embed` for the manual
+//! face-create flow.
+//!
+//! Mirrors `apollo_client.rs` shape: optional base URL (None = disabled, the
+//! file watcher and manual-create handlers no-op), reqwest client with a
+//! generous timeout because CPU inference on a backlog can take many seconds
+//! per photo.
+//!
+//! Configured via `APOLLO_FACE_API_BASE_URL`, falling back to
+//! `APOLLO_API_BASE_URL` when the dedicated var is unset (single-Apollo
+//! deploys are the common case). Both unset → `is_enabled()` returns false.
+//!
+//! Wire format: multipart/form-data with `file=<bytes>` and `meta=<json>`.
+//! `meta` carries `{content_hash, library_id, rel_path, orientation?,
+//! model_version?}` — useful for Apollo-side logging and idempotency, ignored
+//! by Apollo today but part of the stable wire contract so future versions
+//! can act on it without a client change.
+//!
+//! Error mapping (reflected in [`FaceDetectError`]):
+//! - 422 `decode_failed` → permanent: ImageApi marks `status='failed'` and
+//!   doesn't retry until manual rerun.
+//! - 200 with `faces:[]` → `status='no_faces'` marker row.
+//! - 503 `cuda_oom` / `engine_unavailable` → defer-and-retry: no marker
+//!   written.
+//! - Any other 5xx / network error → defer.
+
+use anyhow::{Context, Result};
+use base64::Engine;
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+#[derive(Debug, Clone, Serialize)]
+pub struct DetectMeta {
+    pub content_hash: String,
+    pub library_id: i32,
+    pub rel_path: String,
+    /// EXIF orientation int (1..8). Apollo applies `exif_transpose` on the
+    /// bytes before inference, so this is informational only — supply when
+    /// the bytes were extracted from a RAW preview that lost the tag.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub orientation: Option<i32>,
+    /// Echoed back in the response. ImageApi stores it in
+    /// `face_detections.model_version`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model_version: Option<String>,
+}
+
+// Wire shape for the bbox sub-object Apollo returns. Read by Phase 3's
+// file-watch hook; silence the dead-code lint until then.
+#[allow(dead_code)]
+#[derive(Debug, Clone, Deserialize)]
+pub struct DetectedBbox {
+    pub x: f32,
+    pub y: f32,
+    pub w: f32,
+    pub h: f32,
+}
+
+#[allow(dead_code)] // bbox consumed by Phase 3 file-watch hook
+#[derive(Debug, Clone, Deserialize)]
+pub struct DetectedFace {
+    pub bbox: DetectedBbox,
+    pub confidence: f32,
+    /// base64 of 2048 bytes (512×f32 LE). ImageApi stores the raw bytes
+    /// verbatim as a BLOB — see `decode_embedding` for the unpack.
+    pub embedding: String,
+}
+
+impl DetectedFace {
+    /// Decode the wire-format embedding back into raw bytes for storage.
+    /// Returns the 2048-byte little-endian f32 buffer or an error if the
+    /// base64 is malformed or the wrong length.
+    pub fn decode_embedding(&self) -> Result<Vec<u8>> {
+        let bytes = base64::engine::general_purpose::STANDARD
+            .decode(self.embedding.as_bytes())
+            .context("face embedding base64 decode")?;
+        if bytes.len() != 2048 {
+            anyhow::bail!(
+                "face embedding wrong size: got {} bytes, expected 2048",
+                bytes.len()
+            );
+        }
+        Ok(bytes)
+    }
+}
+
+#[allow(dead_code)] // duration_ms logged by Phase 3 file-watch hook
+#[derive(Debug, Clone, Deserialize)]
+pub struct DetectResponse {
+    pub model_version: String,
+    pub duration_ms: i64,
+    pub faces: Vec<DetectedFace>,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+#[allow(dead_code)] // Reported by Apollo; useful for future health-driven backoff
+pub struct FaceHealth {
+    pub loaded: bool,
+    pub providers: Vec<String>,
+    pub model_version: String,
+    pub det_size: i32,
+    #[serde(default)]
+    pub load_error: Option<String>,
+}
+
+/// Distinguishes permanent failures (don't retry) from transient ones
+/// (defer and retry on next scan tick). The file-watch hook keys its
+/// marker-row decision on this — a `Permanent` outcome writes
+/// `status='failed'`, a `Transient` outcome writes nothing so the next
+/// pass tries again.
+#[derive(Debug)]
+pub enum FaceDetectError {
+    /// Apollo refused the bytes for a reason that won't change on retry
+    /// (decode failure, zero-dim image). Mark `status='failed'`.
+    Permanent(anyhow::Error),
+    /// Apollo couldn't process this turn but might next time (CUDA OOM,
+    /// engine not loaded yet, network hiccup). Don't mark anything.
+    Transient(anyhow::Error),
+    /// Feature is disabled (no `APOLLO_FACE_API_BASE_URL`). Caller should
+    /// silently no-op — same shape as `apollo_client::is_enabled()` false.
+    Disabled,
+}
+
+impl std::fmt::Display for FaceDetectError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            FaceDetectError::Permanent(e) => write!(f, "permanent: {e}"),
+            FaceDetectError::Transient(e) => write!(f, "transient: {e}"),
+            FaceDetectError::Disabled => write!(f, "face client disabled"),
+        }
+    }
+}
+
+impl std::error::Error for FaceDetectError {}
+
+#[derive(Clone)]
+pub struct FaceClient {
+    client: Client,
+    /// `None` → disabled. Trim trailing slash at construction so url
+    /// building doesn't double up.
+    base_url: Option<String>,
+}
+
+impl FaceClient {
+    pub fn new(base_url: Option<String>) -> Self {
+        // 60 s timeout: CPU inference on a backlog can take many seconds
+        // per photo, especially the first call into a cold GPU. Apollo's
+        // bounded threadpool (1 worker on CUDA) means concurrent calls
+        // queue server-side; 60 s is enough headroom for a few items in
+        // the queue without surfacing a false transient.
+        let timeout_secs = std::env::var("FACE_DETECT_TIMEOUT_SEC")
+            .ok()
+            .and_then(|s| s.parse::<u64>().ok())
+            .unwrap_or(60);
+        let client = Client::builder()
+            .timeout(Duration::from_secs(timeout_secs))
+            .build()
+            .expect("reqwest client build");
+        Self {
+            client,
+            base_url: base_url.map(|u| u.trim_end_matches('/').to_string()),
+        }
+    }
+
+    pub fn is_enabled(&self) -> bool {
+        self.base_url.is_some()
+    }
+
+    /// Detect every face in `bytes`. ImageApi calls this from the file-watch
+    /// hook (Phase 3) and from the manual rerun handler. Empty `faces[]` in
+    /// the response is the no-faces signal — caller writes a marker row.
+    #[allow(dead_code)] // Phase 3 file-watch hook + rerun handler
+    pub async fn detect(
+        &self,
+        bytes: Vec<u8>,
+        meta: DetectMeta,
+    ) -> std::result::Result<DetectResponse, FaceDetectError> {
+        let Some(base) = self.base_url.as_deref() else {
+            return Err(FaceDetectError::Disabled);
+        };
+        let url = format!("{}/api/internal/faces/detect", base);
+        self.post_multipart(&url, bytes, &meta).await
+    }
+
+    /// Single-embedding endpoint for the manual face-create flow. Caller
+    /// crops the image to the user-drawn bbox and passes those bytes; we
+    /// run detection inside the crop and return the highest-confidence
+    /// face's embedding. Apollo returns 422 `no_face_in_crop` when the
+    /// box missed — surfaced here as `Permanent`.
+    pub async fn embed(
+        &self,
+        bytes: Vec<u8>,
+        meta: DetectMeta,
+    ) -> std::result::Result<DetectResponse, FaceDetectError> {
+        let Some(base) = self.base_url.as_deref() else {
+            return Err(FaceDetectError::Disabled);
+        };
+        let url = format!("{}/api/internal/faces/embed", base);
+        self.post_multipart(&url, bytes, &meta).await
+    }
+
+    /// Engine reachability + provider/model report. Used by ImageApi for a
+    /// startup sanity check; not on the hot path.
+    #[allow(dead_code)] // Phase 3 startup probe
+    pub async fn health(&self) -> Result<FaceHealth> {
+        let base = self.base_url.as_deref().context("face client disabled")?;
+        let url = format!("{}/api/internal/faces/health", base);
+        let resp = self.client.get(&url).send().await?.error_for_status()?;
+        let body: FaceHealth = resp.json().await?;
+        Ok(body)
+    }
+
+    async fn post_multipart(
+        &self,
+        url: &str,
+        bytes: Vec<u8>,
+        meta: &DetectMeta,
+    ) -> std::result::Result<DetectResponse, FaceDetectError> {
+        let meta_json = serde_json::to_string(meta)
+            .map_err(|e| FaceDetectError::Permanent(anyhow::anyhow!("meta serialize: {e}")))?;
+        let form = reqwest::multipart::Form::new()
+            .text("meta", meta_json)
+            .part(
+                "file",
+                reqwest::multipart::Part::bytes(bytes)
+                    .file_name(meta.rel_path.clone())
+                    .mime_str("application/octet-stream")
+                    .unwrap_or_else(|_| reqwest::multipart::Part::bytes(Vec::new())),
+            );
+
+        let resp = match self.client.post(url).multipart(form).send().await {
+            Ok(r) => r,
+            Err(e) if e.is_timeout() || e.is_connect() => {
+                return Err(FaceDetectError::Transient(anyhow::anyhow!(
+                    "face client network: {e}"
+                )));
+            }
+            Err(e) => {
+                return Err(FaceDetectError::Transient(anyhow::anyhow!(
+                    "face client request: {e}"
+                )));
+            }
+        };
+
+        let status = resp.status();
+        if status.is_success() {
+            let body: DetectResponse = resp.json().await.map_err(|e| {
+                FaceDetectError::Transient(anyhow::anyhow!("face response decode: {e}"))
+            })?;
+            return Ok(body);
+        }
+
+        let body_text = resp.text().await.unwrap_or_default();
+        Err(classify_error_response(status.as_u16(), &body_text))
+    }
+}
+
+/// Map an Apollo HTTP error response to a FaceDetectError. Pulled out as a
+/// pure function so the marker-row contract (422 → Permanent, 503 →
+/// Transient) is unit-testable without spinning up an HTTP server.
+fn classify_error_response(status: u16, body_text: &str) -> FaceDetectError {
+    // Apollo encodes its error class in the JSON body's `detail`. Try to
+    // parse it; fall back to status-only classification.
+    let detail_code = serde_json::from_str::<serde_json::Value>(body_text)
+        .ok()
+        .and_then(|v| {
+            // detail can be a string ("decode_failed") or an object
+            // ({"code": "cuda_oom", ...}) depending on the endpoint and
+            // Apollo's response shape — handle both.
+            v.get("detail")
+                .and_then(|d| d.as_str().map(str::to_string))
+                .or_else(|| {
+                    v.get("detail")
+                        .and_then(|d| d.get("code"))
+                        .and_then(|c| c.as_str())
+                        .map(str::to_string)
+                })
+        })
+        .unwrap_or_default();
+
+    if status == 422 {
+        return FaceDetectError::Permanent(anyhow::anyhow!(
+            "face detect 422 {}: {}",
+            detail_code,
+            body_text
+        ));
+    }
+    if status == 503 {
+        return FaceDetectError::Transient(anyhow::anyhow!(
+            "face detect 503 {}: {}",
+            detail_code,
+            body_text
+        ));
+    }
+    // Any other 4xx: be conservative and treat as Permanent so we don't
+    // loop forever on a stable rejection. Any other 5xx: Transient —
+    // likely intermittent.
+    if (400..500).contains(&status) {
+        FaceDetectError::Permanent(anyhow::anyhow!(
+            "face detect {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ))
+    } else {
+        FaceDetectError::Transient(anyhow::anyhow!(
+            "face detect {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn is_permanent(e: &FaceDetectError) -> bool {
+        matches!(e, FaceDetectError::Permanent(_))
+    }
+    fn is_transient(e: &FaceDetectError) -> bool {
+        matches!(e, FaceDetectError::Transient(_))
+    }
+
+    #[test]
+    fn classify_422_decode_failed_is_permanent() {
+        // Permanent → ImageApi marks status='failed' and stops retrying.
+        let e = classify_error_response(422, r#"{"detail":"decode_failed: bad bytes"}"#);
+        assert!(is_permanent(&e), "422 decode_failed must be Permanent");
+        assert!(format!("{e}").contains("decode_failed"));
+    }
+
+    #[test]
+    fn classify_503_cuda_oom_is_transient() {
+        // Transient → ImageApi must NOT write a marker so the next scan
+        // retries. The detail.code is nested in an object rather than a
+        // bare string; the parser handles both.
+        let e = classify_error_response(
+            503,
+            r#"{"detail":{"code":"cuda_oom","error":"out of memory"}}"#,
+        );
+        assert!(is_transient(&e), "503 cuda_oom must be Transient");
+        assert!(format!("{e}").contains("cuda_oom"));
+    }
+
+    #[test]
+    fn classify_500_is_transient_other_4xx_is_permanent() {
+        // Conservative split: 5xx defers (intermittent), other 4xx
+        // is treated as a stable rejection so we don't loop forever.
+        assert!(is_transient(&classify_error_response(500, "")));
+        assert!(is_transient(&classify_error_response(502, "{}")));
+        assert!(is_permanent(&classify_error_response(400, "{}")));
+        assert!(is_permanent(&classify_error_response(404, "{}")));
+    }
+
+    #[test]
+    fn classify_handles_unparseable_body() {
+        // Apollo can return non-JSON on misroute / proxy errors; the
+        // classifier must still produce a useful variant.
+        let e = classify_error_response(503, "<html>nginx</html>");
+        assert!(is_transient(&e));
+    }
+}
diff --git a/src/ai/mod.rs b/src/ai/mod.rs
index a9d55bf..d6fda90 100644
--- a/src/ai/mod.rs
+++ b/src/ai/mod.rs
@@ -1,5 +1,6 @@
 pub mod apollo_client;
 pub mod daily_summary_job;
+pub mod face_client;
 pub mod handlers;
 pub mod insight_chat;
 pub mod insight_generator;
diff --git a/src/database/mod.rs b/src/database/mod.rs
index 07406d6..a4e348a 100644
--- a/src/database/mod.rs
+++ b/src/database/mod.rs
@@ -386,6 +386,16 @@ pub trait ExifDao: Sync + Send {
         hash: &str,
     ) -> Result<Vec<String>, DbError>;
 
+    /// Batch version of [`get_rel_paths_by_hash`]. Returns a
+    /// `hash → Vec<rel_path>` map for every hash that has at least one
+    /// rel_path. Used by the batch tag lookup endpoint to expand
+    /// content-hash siblings without firing a query per hash.
+    fn get_rel_paths_for_hashes(
+        &mut self,
+        context: &opentelemetry::Context,
+        hashes: &[String],
+    ) -> Result<std::collections::HashMap<String, Vec<String>>, DbError>;
+
     /// List `(library_id, rel_path)` pairs for the given libraries, optionally
     /// restricted to rows whose rel_path starts with `path_prefix`. When
     /// `library_ids` is empty, rows from every library are returned. Used by
@@ -956,6 +966,40 @@ impl ExifDao for SqliteExifDao {
         .map_err(|_| DbError::new(DbErrorKind::QueryError))
     }
 
+    fn get_rel_paths_for_hashes(
+        &mut self,
+        context: &opentelemetry::Context,
+        hashes: &[String],
+    ) -> Result<std::collections::HashMap<String, Vec<String>>, DbError> {
+        use std::collections::HashMap;
+        let mut out: HashMap<String, Vec<String>> = HashMap::new();
+        if hashes.is_empty() {
+            return Ok(out);
+        }
+        trace_db_call(context, "query", "get_rel_paths_for_hashes", |_span| {
+            use schema::image_exif::dsl::*;
+
+            let mut connection = self.connection.lock().expect("Unable to get ExifDao");
+
+            // Chunk the IN clause to stay safely under SQLite's
+            // SQLITE_LIMIT_VARIABLE_NUMBER (32766 modern, 999 legacy).
+            const CHUNK: usize = 500;
+            for chunk in hashes.chunks(CHUNK) {
+                let rows: Vec<(String, String)> = image_exif
+                    .filter(content_hash.eq_any(chunk))
+                    .select((content_hash.assume_not_null(), rel_path))
+                    .distinct()
+                    .load::<(String, String)>(connection.deref_mut())
+                    .map_err(|_| anyhow::anyhow!("Query error"))?;
+                for (hash, path) in rows {
+                    out.entry(hash).or_default().push(path);
+                }
+            }
+            Ok(out)
+        })
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+    }
+
     fn list_rel_paths_for_libraries(
         &mut self,
         context: &opentelemetry::Context,
diff --git a/src/database/schema.rs b/src/database/schema.rs
index e49f21f..55ad9e5 100644
--- a/src/database/schema.rs
+++ b/src/database/schema.rs
@@ -70,6 +70,26 @@ diesel::table! {
     }
 }
 
+diesel::table! {
+    face_detections (id) {
+        id -> Integer,
+        library_id -> Integer,
+        content_hash -> Text,
+        rel_path -> Text,
+        bbox_x -> Nullable<Float>,
+        bbox_y -> Nullable<Float>,
+        bbox_w -> Nullable<Float>,
+        bbox_h -> Nullable<Float>,
+        embedding -> Nullable<Binary>,
+        confidence -> Nullable<Float>,
+        source -> Text,
+        person_id -> Nullable<Integer>,
+        status -> Text,
+        model_version -> Text,
+        created_at -> BigInt,
+    }
+}
+
 diesel::table! {
     favorites (id) {
         id -> Integer,
@@ -130,6 +150,20 @@ diesel::table! {
     }
 }
 
+diesel::table! {
+    persons (id) {
+        id -> Integer,
+        name -> Text,
+        cover_face_id -> Nullable<Integer>,
+        entity_id -> Nullable<Integer>,
+        created_from_tag -> Bool,
+        notes -> Nullable<Text>,
+        created_at -> BigInt,
+        updated_at -> BigInt,
+        is_ignored -> Bool,
+    }
+}
+
 diesel::table! {
     photo_insights (id) {
         id -> Integer,
@@ -201,7 +235,10 @@ diesel::table! {
 diesel::joinable!(entity_facts -> photo_insights (source_insight_id));
 diesel::joinable!(entity_photo_links -> entities (entity_id));
 diesel::joinable!(entity_photo_links -> libraries (library_id));
+diesel::joinable!(face_detections -> libraries (library_id));
+diesel::joinable!(face_detections -> persons (person_id));
 diesel::joinable!(image_exif -> libraries (library_id));
+diesel::joinable!(persons -> entities (entity_id));
 diesel::joinable!(photo_insights -> libraries (library_id));
 diesel::joinable!(tagged_photo -> tags (tag_id));
 diesel::joinable!(video_preview_clips -> libraries (library_id));
@@ -212,10 +249,12 @@ diesel::allow_tables_to_appear_in_same_query!(
     entities,
     entity_facts,
     entity_photo_links,
+    face_detections,
     favorites,
     image_exif,
     libraries,
     location_history,
+    persons,
     photo_insights,
     search_history,
     tagged_photo,
diff --git a/src/face_watch.rs b/src/face_watch.rs
new file mode 100644
index 0000000..42c6128
--- /dev/null
+++ b/src/face_watch.rs
@@ -0,0 +1,590 @@
+//! Face-detection pass for the file watcher.
+//!
+//! `process_new_files` calls [`run_face_detection_pass`] after the EXIF
+//! registration loop. We walk the candidates (images, not yet face-scanned,
+//! not excluded by EXCLUDED_DIRS), fan out parallel detect calls to Apollo,
+//! and persist the results — detected faces, `no_faces` markers when Apollo
+//! found nothing, `failed` markers on permanent decode errors, no marker on
+//! transient failures so the next scan retries.
+//!
+//! The watcher runs in a plain `std::thread`, so we build a short-lived
+//! tokio runtime per pass and `block_on` a join of K detect futures. K is
+//! configurable via `FACE_DETECT_CONCURRENCY` (default 8). Apollo's
+//! threadpool is bounded to 1–2 workers anyway, so the runs queue
+//! server-side; the client-side fan-out is purely about overlapping IO
+//! (file read + JSON encode) with someone else's inference.
+
+use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError};
+use crate::exif;
+use crate::faces::{self, FaceDao, InsertFaceDetectionInput};
+use crate::file_types;
+use crate::libraries::Library;
+use crate::memories::PathExcluder;
+use crate::tags::TagDao;
+use log::{debug, info, warn};
+use std::path::Path;
+use std::sync::{Arc, Mutex};
+use tokio::sync::Semaphore;
+
+/// One file the watcher would like to face-scan. Built by the caller from
+/// the EXIF batch (we need `content_hash` to key everything against).
+#[derive(Debug, Clone)]
+pub struct FaceCandidate {
+    pub rel_path: String,
+    pub content_hash: String,
+}
+
+/// Synchronous entry point. Returns once every candidate has been
+/// processed (or definitively skipped). When `face_client.is_enabled()`
+/// is false this is a no-op so the watcher can call unconditionally.
+pub fn run_face_detection_pass(
+    library: &Library,
+    excluded_dirs: &[String],
+    face_client: &FaceClient,
+    face_dao: Arc<Mutex<Box<dyn FaceDao>>>,
+    tag_dao: Arc<Mutex<Box<dyn TagDao>>>,
+    candidates: Vec<FaceCandidate>,
+) {
+    if !face_client.is_enabled() {
+        return;
+    }
+    if candidates.is_empty() {
+        return;
+    }
+
+    let base = Path::new(&library.root_path);
+    let filtered = filter_excluded(base, excluded_dirs, candidates, Some(&library.name));
+    if filtered.is_empty() {
+        return;
+    }
+
+    let concurrency: usize = std::env::var("FACE_DETECT_CONCURRENCY")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|n: &usize| *n > 0)
+        .unwrap_or(8);
+
+    info!(
+        "face_watch: running detection on {} candidates (library '{}', concurrency {})",
+        filtered.len(),
+        library.name,
+        concurrency
+    );
+
+    // Per-pass tokio runtime. The watcher thread isn't in any pre-existing
+    // async context — building one here keeps the rest of the watcher
+    // sync-only. Worker count is small; the parallelism we care about is
+    // task-level (semaphore) not thread-level.
+    let rt = match tokio::runtime::Builder::new_multi_thread()
+        .worker_threads(2)
+        .enable_all()
+        .build()
+    {
+        Ok(rt) => rt,
+        Err(e) => {
+            warn!("face_watch: failed to build tokio runtime: {e}");
+            return;
+        }
+    };
+
+    let library_id = library.id;
+    let library_root = library.root_path.clone();
+    rt.block_on(async move {
+        let sem = Arc::new(Semaphore::new(concurrency));
+        let mut handles = Vec::with_capacity(filtered.len());
+        for cand in filtered {
+            let permit_sem = sem.clone();
+            let face_client = face_client.clone();
+            let face_dao = face_dao.clone();
+            let tag_dao = tag_dao.clone();
+            let library_root = library_root.clone();
+            handles.push(tokio::spawn(async move {
+                // acquire_owned would let us drop the permit explicitly
+                // before await points; for a one-shot call into Apollo
+                // the simpler bounded acquire is enough.
+                let _permit = permit_sem.acquire().await.expect("face semaphore");
+                process_one(
+                    library_id,
+                    &library_root,
+                    cand,
+                    &face_client,
+                    face_dao,
+                    tag_dao,
+                )
+                .await;
+            }));
+        }
+        for h in handles {
+            // join; per-task panics are logged inside process_one before
+            // they reach here, so we don't propagate.
+            let _ = h.await;
+        }
+    });
+}
+
+async fn process_one(
+    library_id: i32,
+    library_root: &str,
+    cand: FaceCandidate,
+    face_client: &FaceClient,
+    face_dao: Arc<Mutex<Box<dyn FaceDao>>>,
+    tag_dao: Arc<Mutex<Box<dyn TagDao>>>,
+) {
+    let abs = Path::new(library_root).join(&cand.rel_path);
+    // Read the bytes off disk in a blocking-friendly task. Filesystem IO
+    // is sync but cheap; a small spawn_blocking would be overkill.
+    let bytes = match read_image_bytes_for_detect(&abs) {
+        Ok(b) => b,
+        Err(e) => {
+            // Don't mark — file may have been moved/renamed mid-scan; let
+            // the next pass try again. Future-bug check: a permanently
+            // unreadable file would loop forever; we accept that for v1
+            // because process_new_files already prunes vanished rows on
+            // full scans.
+            warn!(
+                "face_watch: read failed for {} ({}): {}",
+                cand.rel_path, library_id, e
+            );
+            return;
+        }
+    };
+
+    let meta = DetectMeta {
+        content_hash: cand.content_hash.clone(),
+        library_id,
+        rel_path: cand.rel_path.clone(),
+        orientation: None,
+        model_version: None,
+    };
+    let ctx = opentelemetry::Context::current();
+
+    match face_client.detect(bytes, meta).await {
+        Ok(resp) => {
+            // Stage 1: persist detections, holding the dao lock only
+            // across synchronous DB writes.
+            let mut stored_for_autobind: Vec<(i32, Vec<f32>)> = Vec::new();
+            {
+                let mut dao = face_dao.lock().expect("face dao");
+                if resp.faces.is_empty() {
+                    if let Err(e) = dao.mark_status(
+                        &ctx,
+                        library_id,
+                        &cand.content_hash,
+                        &cand.rel_path,
+                        "no_faces",
+                        &resp.model_version,
+                    ) {
+                        warn!(
+                            "face_watch: mark no_faces failed for {}: {:?}",
+                            cand.rel_path, e
+                        );
+                    }
+                    debug!(
+                        "face_watch: {} → no faces (model {})",
+                        cand.rel_path, resp.model_version
+                    );
+                } else {
+                    let face_count = resp.faces.len();
+                    for face in &resp.faces {
+                        let emb = match face.decode_embedding() {
+                            Ok(b) => b,
+                            Err(e) => {
+                                warn!("face_watch: bad embedding for {}: {:?}", cand.rel_path, e);
+                                continue;
+                            }
+                        };
+                        // Decode the f32 vector once for auto-bind comparison.
+                        let emb_floats = faces::decode_embedding_bytes(&emb);
+                        match dao.store_detection(
+                            &ctx,
+                            InsertFaceDetectionInput {
+                                library_id,
+                                content_hash: cand.content_hash.clone(),
+                                rel_path: cand.rel_path.clone(),
+                                bbox: Some((face.bbox.x, face.bbox.y, face.bbox.w, face.bbox.h)),
+                                embedding: Some(emb),
+                                confidence: Some(face.confidence),
+                                source: "auto".to_string(),
+                                person_id: None,
+                                status: "detected".to_string(),
+                                model_version: resp.model_version.clone(),
+                            },
+                        ) {
+                            Ok(row) => {
+                                if let Some(floats) = emb_floats {
+                                    stored_for_autobind.push((row.id, floats));
+                                }
+                            }
+                            Err(e) => warn!(
+                                "face_watch: store_detection failed for {}: {:?}",
+                                cand.rel_path, e
+                            ),
+                        }
+                    }
+                    info!(
+                        "face_watch: {} → {} face(s) ({}ms, {})",
+                        cand.rel_path, face_count, resp.duration_ms, resp.model_version
+                    );
+                }
+            }
+
+            // Stage 2: auto-bind newly-stored faces against same-named
+            // people-tags. Done outside the dao lock so the lookups don't
+            // serialize with concurrent detect tasks.
+            if !stored_for_autobind.is_empty() {
+                try_auto_bind(
+                    &ctx,
+                    &cand.rel_path,
+                    &resp.model_version,
+                    stored_for_autobind,
+                    &tag_dao,
+                    &face_dao,
+                );
+            }
+        }
+        Err(FaceDetectError::Permanent(e)) => {
+            warn!(
+                "face_watch: permanent failure on {}: {} — marking failed",
+                cand.rel_path, e
+            );
+            let mut dao = face_dao.lock().expect("face dao");
+            // model_version is best-effort here — the engine that rejected
+            // the bytes may not have echoed one. Empty string is fine; this
+            // row is purely a "don't retry" sentinel.
+            if let Err(e) = dao.mark_status(
+                &ctx,
+                library_id,
+                &cand.content_hash,
+                &cand.rel_path,
+                "failed",
+                "",
+            ) {
+                warn!(
+                    "face_watch: mark failed errored for {}: {:?}",
+                    cand.rel_path, e
+                );
+            }
+        }
+        Err(FaceDetectError::Transient(e)) => {
+            // Don't mark anything; next scan tick retries naturally.
+            // Demoted to debug because OOM and engine-not-ready are noisy
+            // and self-resolving.
+            debug!(
+                "face_watch: transient on {}: {} (will retry next pass)",
+                cand.rel_path, e
+            );
+        }
+        Err(FaceDetectError::Disabled) => {
+            // Caller already checked is_enabled(); this branch is defensive.
+        }
+    }
+}
+
+/// Auto-bind newly-detected faces to a same-named person, when a tag on the
+/// photo unambiguously identifies one. Driven by `FACE_AUTOBIND_MIN_COS`
+/// (default 0.4): the new face's embedding must reach this cosine
+/// similarity against the L2-normalized mean of the person's existing
+/// faces. The first face for a person binds unconditionally — there's
+/// nothing to compare against, and the alternative ("never bind without
+/// a reference") would mean bootstrap never kicks off.
+///
+/// Multi-match (the photo carries tags for two different known persons)
+/// is intentionally a no-op — we can't tell which face is which without
+/// additional matching. Those faces stay unassigned for the cluster
+/// suggester (Phase 6) to handle.
+fn try_auto_bind(
+    ctx: &opentelemetry::Context,
+    rel_path: &str,
+    model_version: &str,
+    new_faces: Vec<(i32, Vec<f32>)>, // (face_id, decoded embedding)
+    tag_dao: &Arc<Mutex<Box<dyn TagDao>>>,
+    face_dao: &Arc<Mutex<Box<dyn FaceDao>>>,
+) {
+    // 1. Pull the photo's tags.
+    let tag_names: Vec<String> = {
+        let mut td = tag_dao.lock().expect("tag dao");
+        match td.get_tags_for_path(ctx, rel_path) {
+            Ok(tags) => tags.into_iter().map(|t| t.name).collect(),
+            Err(e) => {
+                warn!(
+                    "face_watch: get_tags_for_path failed for {}: {:?}",
+                    rel_path, e
+                );
+                return;
+            }
+        }
+    };
+    if tag_names.is_empty() {
+        return;
+    }
+
+    // 2. Find tags that map to existing persons (case-insensitive).
+    let person_for_tag: std::collections::HashMap<String, i32> = {
+        let mut fd = face_dao.lock().expect("face dao");
+        match fd.find_persons_by_names_ci(ctx, &tag_names) {
+            Ok(m) => m,
+            Err(e) => {
+                warn!(
+                    "face_watch: find_persons_by_names_ci failed for {}: {:?}",
+                    rel_path, e
+                );
+                return;
+            }
+        }
+    };
+
+    // 3. Multi-match: ambiguous, skip. Single match: candidate person.
+    let unique_person_ids: std::collections::HashSet<i32> =
+        person_for_tag.values().copied().collect();
+    if unique_person_ids.len() != 1 {
+        if !unique_person_ids.is_empty() {
+            debug!(
+                "face_watch: {} carries tags for {} different persons; skipping auto-bind",
+                rel_path,
+                unique_person_ids.len()
+            );
+        }
+        return;
+    }
+    let person_id = *unique_person_ids.iter().next().expect("nonempty set");
+
+    let threshold: f32 = std::env::var("FACE_AUTOBIND_MIN_COS")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|t: &f32| *t >= 0.0 && *t <= 1.0)
+        .unwrap_or(0.4);
+
+    // 4. Reference embedding (if any) under the same model_version.
+    let reference: Option<Vec<f32>> = {
+        let mut fd = face_dao.lock().expect("face dao");
+        match fd.person_reference_embedding(ctx, person_id, model_version) {
+            Ok(r) => r,
+            Err(e) => {
+                warn!(
+                    "face_watch: person_reference_embedding failed for person {}: {:?}",
+                    person_id, e
+                );
+                return;
+            }
+        }
+    };
+
+    // 5. Bind each new face that meets the criterion. Hold the lock once
+    // for the whole batch; assign_face_to_person uses its own short
+    // transaction internally.
+    let mut fd = face_dao.lock().expect("face dao");
+    for (face_id, emb) in new_faces {
+        let bind = match &reference {
+            None => {
+                // Person has no faces yet — first one wins so bootstrap
+                // can ever produce a usable reference. After this row
+                // commits, future faces evaluate against it.
+                debug!(
+                    "face_watch: auto-binding first face {} → person {} (no reference yet)",
+                    face_id, person_id
+                );
+                true
+            }
+            Some(ref_vec) => {
+                let sim = faces::cosine_similarity(&emb, ref_vec);
+                if sim >= threshold {
+                    debug!(
+                        "face_watch: auto-binding face {} → person {} (cos={:.3} ≥ {:.3})",
+                        face_id, person_id, sim, threshold
+                    );
+                    true
+                } else {
+                    debug!(
+                        "face_watch: leaving face {} unassigned (cos={:.3} < {:.3} for person {})",
+                        face_id, sim, threshold, person_id
+                    );
+                    false
+                }
+            }
+        };
+        if bind && let Err(e) = fd.assign_face_to_person(ctx, face_id, person_id) {
+            warn!(
+                "face_watch: assign_face_to_person failed (face={}, person={}): {:?}",
+                face_id, person_id, e
+            );
+        }
+    }
+}
+
+/// Drop candidates whose path matches the watcher's `EXCLUDED_DIRS` rules.
+/// Pulled out for unit testing — the same `PathExcluder` /memories uses,
+/// just applied at the face-detect candidate set instead of the memories
+/// listing. Skip @eaDir / .thumbnails / user-defined paths before we burn
+/// a detect call (and Apollo's GPU memory) on junk. Also drops anything
+/// that isn't an image file — the backlog drain pulls every hashed row in
+/// `image_exif`, which includes videos; sending those to Apollo just
+/// produces `failed` markers and inflates the FAILED stat.
+pub(crate) fn filter_excluded(
+    base: &Path,
+    excluded_dirs: &[String],
+    candidates: Vec<FaceCandidate>,
+    library_name: Option<&str>,
+) -> Vec<FaceCandidate> {
+    let excluder = if excluded_dirs.is_empty() {
+        None
+    } else {
+        Some(PathExcluder::new(base, excluded_dirs))
+    };
+    candidates
+        .into_iter()
+        .filter(|c| {
+            let abs = base.join(&c.rel_path);
+            if !file_types::is_image_file(&abs) {
+                debug!(
+                    "face_watch: skipping non-image path {} (library {})",
+                    c.rel_path,
+                    library_name.unwrap_or("<unknown>")
+                );
+                return false;
+            }
+            if let Some(ex) = excluder.as_ref()
+                && ex.is_excluded(&abs)
+            {
+                debug!(
+                    "face_watch: skipping excluded path {} (library {})",
+                    c.rel_path,
+                    library_name.unwrap_or("<unknown>")
+                );
+                return false;
+            }
+            true
+        })
+        .collect()
+}
+
+/// Read image bytes for face detection. Insightface (via opencv) can't
+/// decode RAW or HEIC — for those we extract the embedded JPEG preview
+/// the way the thumbnail pipeline does. Plain JPEG/PNG/WebP/etc. go
+/// through a direct read.
+pub(crate) fn read_image_bytes_for_detect(path: &Path) -> std::io::Result<Vec<u8>> {
+    if file_types::needs_ffmpeg_thumbnail(path)
+        && let Some(preview) = exif::extract_embedded_jpeg_preview(path)
+    {
+        return Ok(preview);
+    }
+    // Plain read for everything else. RAW/HEIC files without an embedded
+    // preview fall through here too; Apollo will then 422 and the caller
+    // marks the row failed. That's fine; we tried.
+    std::fs::read(path)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+
+    fn cand(rel_path: &str) -> FaceCandidate {
+        FaceCandidate {
+            rel_path: rel_path.to_string(),
+            content_hash: format!("hash-{rel_path}"),
+        }
+    }
+
+    #[test]
+    fn filter_excluded_pattern_drops_dir_components() {
+        // A pattern matches a path *component* under base, not a substring.
+        // Phase 3 needs this for @eaDir / .thumbnails skipping.
+        let tmp = tempfile::tempdir().unwrap();
+        let base = tmp.path();
+        let candidates = vec![
+            cand("photos/a.jpg"),                  // keep
+            cand("photos/@eaDir/SYNOPHOTO_THUMB"), // drop (component match)
+            cand("photos/eaDir-not-a-thing.jpg"),  // keep (substring, not component)
+        ];
+        let kept = filter_excluded(base, &["@eaDir".to_string()], candidates, Some("test"));
+        let kept_paths: Vec<_> = kept.iter().map(|c| c.rel_path.as_str()).collect();
+        assert_eq!(
+            kept_paths,
+            vec!["photos/a.jpg", "photos/eaDir-not-a-thing.jpg"]
+        );
+    }
+
+    #[test]
+    fn filter_excluded_absolute_dir_drops_subtree() {
+        // Absolute (under-base) entries drop the whole subtree.
+        let tmp = tempfile::tempdir().unwrap();
+        let base = tmp.path();
+        let candidates = vec![
+            cand("public/a.jpg"),
+            cand("private/a.jpg"),
+            cand("private/sub/b.jpg"),
+        ];
+        let kept = filter_excluded(base, &["/private".to_string()], candidates, None);
+        let kept_paths: Vec<_> = kept.iter().map(|c| c.rel_path.as_str()).collect();
+        assert_eq!(kept_paths, vec!["public/a.jpg"]);
+    }
+
+    #[test]
+    fn filter_excluded_empty_rules_passes_all() {
+        // EXCLUDED_DIRS unset still lets every image through — only the
+        // PathExcluder is skipped, the image-extension gate still runs.
+        let tmp = tempfile::tempdir().unwrap();
+        let base = tmp.path();
+        let candidates = vec![cand("a.jpg"), cand("b.jpg")];
+        let kept = filter_excluded(base, &[], candidates, None);
+        assert_eq!(kept.len(), 2);
+    }
+
+    #[test]
+    fn filter_excluded_drops_videos_and_non_media() {
+        // Backlog drain pulls every hashed row in image_exif (videos
+        // included). Videos must never reach Apollo — opencv can't
+        // decode them, every call would 422 and write a `failed` marker.
+        let tmp = tempfile::tempdir().unwrap();
+        let base = tmp.path();
+        let candidates = vec![
+            cand("photos/a.jpg"),
+            cand("photos/clip.mp4"),
+            cand("photos/clip.MOV"),
+            cand("photos/notes.txt"),
+            cand("photos/b.heic"),
+        ];
+        let kept = filter_excluded(base, &[], candidates, Some("test"));
+        let kept_paths: Vec<_> = kept.iter().map(|c| c.rel_path.as_str()).collect();
+        assert_eq!(kept_paths, vec!["photos/a.jpg", "photos/b.heic"]);
+    }
+
+    #[test]
+    fn read_bytes_passes_through_for_jpeg() {
+        // JPEG goes through plain read — we DON'T want to lose orientation
+        // metadata or re-encode here; insightface's exif_transpose handles
+        // orientation on its end.
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("test.jpg");
+        let mut buf = Vec::new();
+        // Tiny 4x4 grey JPEG — encoded by image crate so we know it round-trips.
+        let img = image::DynamicImage::ImageRgb8(image::RgbImage::from_pixel(
+            4,
+            4,
+            image::Rgb([128, 128, 128]),
+        ));
+        img.write_to(
+            &mut std::io::Cursor::new(&mut buf),
+            image::ImageFormat::Jpeg,
+        )
+        .unwrap();
+        fs::write(&path, &buf).unwrap();
+
+        let read = read_image_bytes_for_detect(&path).expect("read jpeg");
+        assert_eq!(read, buf, "JPEG bytes must pass through verbatim");
+    }
+
+    #[test]
+    fn read_bytes_falls_back_when_raw_has_no_preview() {
+        // A `.nef` file with non-RAW bytes won't have an embedded preview —
+        // the helper falls through to plain read rather than refusing. This
+        // matches the docstring contract; Apollo will then 422 and we'll
+        // mark the row as failed.
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("not_really.nef");
+        fs::write(&path, b"definitely-not-a-raw-file").unwrap();
+
+        let read = read_image_bytes_for_detect(&path).expect("fallback read");
+        assert_eq!(read, b"definitely-not-a-raw-file");
+    }
+}
diff --git a/src/faces.rs b/src/faces.rs
new file mode 100644
index 0000000..20fd700
--- /dev/null
+++ b/src/faces.rs
@@ -0,0 +1,3403 @@
+//! Local face recognition: data layer + HTTP surface.
+//!
+//! Phase 2 ships the persistence model and the manual CRUD endpoints; the
+//! file-watch hook that drives automatic detection lives in `process_new_files`
+//! (Phase 3) and is not registered yet. Inference is delegated to Apollo over
+//! HTTP via [`crate::ai::face_client`]; this module never imports onnxruntime.
+//!
+//! Data model:
+//! - `persons` are visual identities (the "who" of a face).
+//! - `face_detections` rows are either real detections (`status='detected'`)
+//!   or markers (`status='no_faces' | 'failed'`). Both are keyed on
+//!   `content_hash` so the same JPEG in two libraries is scanned once.
+//! - The `(library_id, rel_path)` pair is the *display* lookup; we resolve
+//!   it through `image_exif.content_hash` on every read so renames don't
+//!   strand face rows.
+//!
+//! The `FaceDao` trait abstracts persistence; `SqliteFaceDao` is the
+//! production impl. The Phase 2 endpoints use it directly. A test impl
+//! (in-memory) lives at the bottom of the module behind `#[cfg(test)]`.
+
+use crate::Claims;
+use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError};
+use crate::exif;
+use crate::database::schema::{face_detections, image_exif, persons};
+use crate::error::IntoHttpError;
+use crate::libraries::{self, Library};
+use crate::otel::{extract_context_from_request, global_tracer, trace_db_call};
+use crate::state::AppState;
+use crate::utils::normalize_path;
+use crate::{ThumbnailRequest, connect};
+use actix_web::dev::{ServiceFactory, ServiceRequest};
+use actix_web::{App, HttpRequest, HttpResponse, Responder, web};
+use anyhow::{Context, anyhow};
+use chrono::Utc;
+use diesel::prelude::*;
+use image::GenericImageView;
+use log::{info, warn};
+use opentelemetry::KeyValue;
+use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer};
+use serde::{Deserialize, Serialize};
+use std::ops::DerefMut;
+use std::sync::{Arc, Mutex};
+
+// ── Wire types ──────────────────────────────────────────────────────────────
+
+/// Visual identity. The optional `entity_id` bridges this person to an
+/// LLM-extracted knowledge-graph entity (textual side). Persons are NOT
+/// auto-bridged at creation — only when the user explicitly links them in
+/// the management UI, or when bootstrap finds an exact-name match.
+#[derive(Serialize, Queryable, Clone, Debug)]
+pub struct Person {
+    pub id: i32,
+    pub name: String,
+    pub cover_face_id: Option<i32>,
+    pub entity_id: Option<i32>,
+    pub created_from_tag: bool,
+    pub notes: Option<String>,
+    pub created_at: i64,
+    pub updated_at: i64,
+    /// True for the IGNORE / junk bucket. Hidden from the default
+    /// persons list, skipped by `find_persons_by_names_ci` (so a tag
+    /// match can never auto-bind a real face into the ignore bucket),
+    /// and excluded from cluster suggestions because cluster-suggest
+    /// already filters by `person_id IS NULL` and ignored faces have
+    /// a non-null person_id.
+    pub is_ignored: bool,
+}
+
+#[derive(Insertable, Debug)]
+#[diesel(table_name = persons)]
+struct InsertPerson {
+    name: String,
+    notes: Option<String>,
+    created_from_tag: bool,
+    is_ignored: bool,
+    created_at: i64,
+    updated_at: i64,
+}
+
+#[derive(Serialize, Queryable, Clone, Debug)]
+pub struct FaceDetectionRow {
+    pub id: i32,
+    pub library_id: i32,
+    pub content_hash: String,
+    pub rel_path: String,
+    pub bbox_x: Option<f32>,
+    pub bbox_y: Option<f32>,
+    pub bbox_w: Option<f32>,
+    pub bbox_h: Option<f32>,
+    /// Skip on the wire — clients call /faces/embeddings explicitly when
+    /// they need it. Saves ~2 KB per face on every list response.
+    #[serde(skip_serializing)]
+    pub embedding: Option<Vec<u8>>,
+    pub confidence: Option<f32>,
+    pub source: String,
+    pub person_id: Option<i32>,
+    pub status: String,
+    pub model_version: String,
+    pub created_at: i64,
+}
+
+/// Row shape for `list_unscanned_candidates`'s raw SQL. Diesel's
+/// `sql_query` requires a `QueryableByName` row type with explicit
+/// column SQL types; using a tuple isn't supported.
+#[derive(diesel::QueryableByName, Debug)]
+struct UnscannedRow {
+    #[diesel(sql_type = diesel::sql_types::Text)]
+    rel_path: String,
+    #[diesel(sql_type = diesel::sql_types::Text)]
+    content_hash: String,
+}
+
+#[derive(Insertable, Debug)]
+#[diesel(table_name = face_detections)]
+struct InsertFaceDetection {
+    library_id: i32,
+    content_hash: String,
+    rel_path: String,
+    bbox_x: Option<f32>,
+    bbox_y: Option<f32>,
+    bbox_w: Option<f32>,
+    bbox_h: Option<f32>,
+    embedding: Option<Vec<u8>>,
+    confidence: Option<f32>,
+    source: String,
+    person_id: Option<i32>,
+    status: String,
+    model_version: String,
+    created_at: i64,
+}
+
+/// Build a [`FaceWithPerson`] from a freshly-mutated row by resolving the
+/// person name via [`FaceDao::get_person`]. Used by `create_face_handler`
+/// and `update_face_handler` so PATCH/POST responses match the join shape
+/// `/image/faces` returns — without this the carousel overlay's
+/// optimistic-replace would clobber the rendered name (the bare
+/// [`FaceDetectionRow`] doesn't carry it).
+fn hydrate_face_with_person<D: FaceDao>(
+    dao: &mut D,
+    ctx: &opentelemetry::Context,
+    row: FaceDetectionRow,
+) -> anyhow::Result<FaceWithPerson> {
+    let person_name = match row.person_id {
+        Some(pid) => dao.get_person(ctx, pid)?.map(|p| p.name),
+        None => None,
+    };
+    Ok(FaceWithPerson {
+        id: row.id,
+        bbox_x: row.bbox_x.unwrap_or(0.0),
+        bbox_y: row.bbox_y.unwrap_or(0.0),
+        bbox_w: row.bbox_w.unwrap_or(0.0),
+        bbox_h: row.bbox_h.unwrap_or(0.0),
+        confidence: row.confidence.unwrap_or(0.0),
+        source: row.source,
+        person_id: row.person_id,
+        person_name,
+        model_version: row.model_version,
+    })
+}
+
+/// Face row decorated with its assigned person's name. Returned by
+/// `/image/faces` for the rendering side (carousel overlay, person chips).
+#[derive(Serialize, Debug, Clone)]
+pub struct FaceWithPerson {
+    pub id: i32,
+    pub bbox_x: f32,
+    pub bbox_y: f32,
+    pub bbox_w: f32,
+    pub bbox_h: f32,
+    pub confidence: f32,
+    pub source: String,
+    pub person_id: Option<i32>,
+    pub person_name: Option<String>,
+    pub model_version: String,
+}
+
+/// Face row plus the photo it lives on. Powers the per-person photo grid
+/// (`GET /persons/{id}/faces`) and unassigned-cluster surfacing in Apollo.
+#[derive(Serialize, Debug, Clone)]
+pub struct FaceWithPath {
+    pub id: i32,
+    pub library_id: i32,
+    pub rel_path: String,
+    pub bbox_x: f32,
+    pub bbox_y: f32,
+    pub bbox_w: f32,
+    pub bbox_h: f32,
+    pub confidence: f32,
+    pub person_id: Option<i32>,
+    pub model_version: String,
+}
+
+/// Embedding-bearing face row. Returned by `/faces/embeddings` for Apollo's
+/// clustering layer; embedding is base64-encoded so the JSON payload is
+/// self-contained (Apollo's DBSCAN runs over numpy arrays decoded from this).
+#[derive(Serialize, Debug, Clone)]
+pub struct FaceEmbeddingRow {
+    pub id: i32,
+    pub library_id: i32,
+    pub rel_path: String,
+    pub content_hash: String,
+    pub person_id: Option<i32>,
+    pub model_version: String,
+    /// base64 of 2048 bytes (512×f32 LE).
+    pub embedding: String,
+    /// Normalized bbox 0..1, included so the cluster suggester UI can
+    /// crop a face thumbnail without an extra round-trip per cluster.
+    /// Shouldn't be NULL for `status='detected'` rows (CHECK constraint
+    /// in the migration), but the DB type is nullable so we mirror it.
+    pub bbox_x: Option<f32>,
+    pub bbox_y: Option<f32>,
+    pub bbox_w: Option<f32>,
+    pub bbox_h: Option<f32>,
+}
+
+#[derive(Serialize, Debug, Default)]
+pub struct FaceStats {
+    pub library_id: Option<i32>,
+    pub total_photos: i64,
+    pub scanned: i64,
+    pub with_faces: i64,
+    pub no_faces: i64,
+    pub failed: i64,
+    pub persons_count: i64,
+    pub unassigned_faces: i64,
+}
+
+#[derive(Serialize, Debug, Clone)]
+pub struct PersonSummary {
+    pub id: i32,
+    pub name: String,
+    pub cover_face_id: Option<i32>,
+    pub entity_id: Option<i32>,
+    pub created_from_tag: bool,
+    pub notes: Option<String>,
+    pub is_ignored: bool,
+    pub face_count: i64,
+}
+
+// ── Request bodies ──────────────────────────────────────────────────────────
+
+#[derive(Deserialize, Debug)]
+pub struct CreatePersonReq {
+    pub name: String,
+    #[serde(default)]
+    pub notes: Option<String>,
+    /// Optional bridge to an existing entity. NULL/missing leaves it
+    /// unbridged; set explicitly to wire the person to LLM-extracted facts.
+    #[serde(default)]
+    pub entity_id: Option<i32>,
+    /// True for the IGNORE / junk bucket. The frontend sets this when
+    /// lazily creating the Ignored person via the dedicated endpoint;
+    /// hand-rolled callers leave it false.
+    #[serde(default)]
+    pub is_ignored: bool,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct UpdatePersonReq {
+    #[serde(default)]
+    pub name: Option<String>,
+    #[serde(default)]
+    pub notes: Option<String>,
+    #[serde(default)]
+    pub cover_face_id: Option<i32>,
+    #[serde(default)]
+    pub entity_id: Option<i32>,
+    /// Toggle the ignore flag. Mostly used by the UI to "un-ignore" a
+    /// person that was previously bound to the bucket.
+    #[serde(default)]
+    pub is_ignored: Option<bool>,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct MergePersonsReq {
+    /// Person id to merge *into*. The source (`{id}` in the path) is
+    /// re-pointed to this id, then deleted.
+    pub into: i32,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct DeletePersonQuery {
+    /// `set_null` (default) leaves face rows orphaned (person_id NULL);
+    /// `delete` cascades through and removes the face rows entirely.
+    /// Default is set_null because deleting the person almost never means
+    /// "delete every photo of them ever existed."
+    #[serde(default)]
+    pub cascade: Option<String>,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct CreateFaceReq {
+    /// Photo path (library-relative). Resolved to content_hash via
+    /// image_exif before any face row is inserted.
+    pub path: String,
+    pub library: Option<i32>,
+    pub bbox: BboxReq,
+    /// Optional initial person assignment. Use this when the user draws a
+    /// box and immediately picks a name from the autocomplete.
+    #[serde(default)]
+    pub person_id: Option<i32>,
+    /// Skip the embedding step. Set when the user wants to tag a region
+    /// the detector can't find a face in (back of head, profile partly
+    /// occluded, etc.). The row is stored with a zero-vector embedding,
+    /// which the cluster suggester filters on `norm <= 0` and auto-bind
+    /// cosine resolves to 0 against — so the row participates only as a
+    /// browse-by-person tag, not in similarity matching. The frontend
+    /// only sets this after a 422 from a strict create plus an explicit
+    /// operator confirmation.
+    #[serde(default)]
+    pub force: bool,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct BboxReq {
+    pub x: f32,
+    pub y: f32,
+    pub w: f32,
+    pub h: f32,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct UpdateFaceReq {
+    /// `null` literally clears the assignment; missing leaves it alone.
+    /// Distinguish via `Option<Option<…>>` is tricky in serde without
+    /// custom deserialization; encode "clear" as `clear_person: true`
+    /// instead.
+    #[serde(default)]
+    pub person_id: Option<i32>,
+    #[serde(default)]
+    pub clear_person: bool,
+    #[serde(default)]
+    pub bbox: Option<BboxReq>,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct EmbeddingsQuery {
+    pub library: Option<i32>,
+    /// Default true — clustering only cares about unassigned faces. Set
+    /// false to dump all embeddings (e.g. for re-clustering everything).
+    #[serde(default = "default_unassigned")]
+    pub unassigned: bool,
+    #[serde(default = "default_embeddings_limit")]
+    pub limit: i64,
+    #[serde(default)]
+    pub offset: i64,
+}
+
+fn default_unassigned() -> bool {
+    true
+}
+fn default_embeddings_limit() -> i64 {
+    500
+}
+
+// ── DAO trait ───────────────────────────────────────────────────────────────
+
+// File-watch hook (Phase 3) and the rerun handler (Phase 6) consume the
+// methods the Phase 2 routes don't. Allow dead_code on the trait so we
+// don't have to sprinkle attributes on every method that's wired up later.
+#[allow(dead_code)]
+pub trait FaceDao: Send + Sync {
+    fn already_scanned(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        content_hash: &str,
+    ) -> anyhow::Result<bool>;
+    /// Find image_exif rows in `library_id` that have a populated
+    /// content_hash but no matching face_detections row yet. Used by
+    /// the watcher's quick-scan path to drain the backlog without
+    /// re-walking the filesystem. Returns `(rel_path, content_hash)`
+    /// pairs, capped at `limit`. Distinct on content_hash so the same
+    /// hash that lives at multiple rel_paths only fires one detection.
+    fn list_unscanned_candidates(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: i32,
+        limit: i64,
+    ) -> anyhow::Result<Vec<(String, String)>>;
+    fn store_detection(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        row: InsertFaceDetectionInput,
+    ) -> anyhow::Result<FaceDetectionRow>;
+    fn mark_status(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: i32,
+        content_hash: &str,
+        rel_path: &str,
+        status: &str,
+        model_version: &str,
+    ) -> anyhow::Result<()>;
+    fn list_for_content_hash(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        content_hash: &str,
+    ) -> anyhow::Result<Vec<FaceWithPerson>>;
+    fn list_for_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        person_id: i32,
+        library_id: Option<i32>,
+    ) -> anyhow::Result<Vec<FaceWithPath>>;
+    fn list_embeddings(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: Option<i32>,
+        unassigned: bool,
+        limit: i64,
+        offset: i64,
+    ) -> anyhow::Result<Vec<(FaceDetectionRow, String)>>;
+    fn get_face(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        id: i32,
+    ) -> anyhow::Result<Option<FaceDetectionRow>>;
+    fn update_face(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        id: i32,
+        person_id: Option<Option<i32>>, // None=leave; Some(None)=clear; Some(Some(id))=set
+        bbox: Option<(f32, f32, f32, f32)>,
+        embedding: Option<Vec<u8>>,
+    ) -> anyhow::Result<FaceDetectionRow>;
+    fn delete_face(&mut self, ctx: &opentelemetry::Context, id: i32) -> anyhow::Result<bool>;
+    fn delete_auto_for_hash(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        content_hash: &str,
+    ) -> anyhow::Result<usize>;
+    fn stats(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: Option<i32>,
+    ) -> anyhow::Result<FaceStats>;
+
+    // ── Persons ─────────────────────────────────────────────────────────
+    fn create_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        req: &CreatePersonReq,
+        from_tag: bool,
+    ) -> anyhow::Result<Person>;
+    fn get_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        id: i32,
+    ) -> anyhow::Result<Option<Person>>;
+    fn list_persons(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: Option<i32>,
+        include_ignored: bool,
+    ) -> anyhow::Result<Vec<PersonSummary>>;
+    /// Get the IGNORE/junk bucket, creating it lazily on first call.
+    /// Idempotent — returns the same row across calls. Single global
+    /// bucket per database; the frontend never sees the literal name.
+    fn get_or_create_ignored_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+    ) -> anyhow::Result<Person>;
+    fn update_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        id: i32,
+        patch: &UpdatePersonReq,
+    ) -> anyhow::Result<Person>;
+    /// Delete a person. `cascade=true` removes face rows; otherwise the
+    /// rows have their `person_id` set NULL by the FK constraint.
+    fn delete_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        id: i32,
+        cascade_delete_faces: bool,
+    ) -> anyhow::Result<bool>;
+    fn merge_persons(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        src: i32,
+        into: i32,
+    ) -> anyhow::Result<Person>;
+
+    /// Resolve `(library_id, rel_path)` → `content_hash` via image_exif.
+    /// Returns None when the photo hasn't been EXIF-indexed yet (no row
+    /// in image_exif) or when the row exists but content_hash is NULL.
+    fn resolve_content_hash(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: i32,
+        rel_path: &str,
+    ) -> anyhow::Result<Option<String>>;
+
+    // ── Auto-bind support (Phase 4) ─────────────────────────────────────
+
+    /// Map case-insensitive person names → person id. Used by the
+    /// auto-bind path to look up "is this tag a known person?". Names
+    /// passed in are matched LOWER(persons.name); collisions resolve to
+    /// the person with the lowest id (stable, but the UNIQUE constraint
+    /// on persons.name COLLATE NOCASE prevents collisions in practice).
+    fn find_persons_by_names_ci(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        names: &[String],
+    ) -> anyhow::Result<std::collections::HashMap<String, i32>>;
+
+    /// Mean of a person's existing face embeddings. Returns the L2-
+    /// normalized 512-d reference vector, or None when the person has
+    /// no detected faces yet (auto-bind treats that as "first face wins
+    /// unconditionally"). Filters by the same model_version that produced
+    /// the candidate embedding so cross-model averaging never happens.
+    fn person_reference_embedding(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        person_id: i32,
+        model_version: &str,
+    ) -> anyhow::Result<Option<Vec<f32>>>;
+
+    /// Set face_detections.person_id and, when the target person has no
+    /// cover_face_id yet, set it to this face. One transaction so a
+    /// half-bound state can't survive a SQLite write error.
+    fn assign_face_to_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        face_id: i32,
+        person_id: i32,
+    ) -> anyhow::Result<()>;
+}
+
+/// Free-standing input struct; the DAO copies it into [`InsertFaceDetection`]
+/// so callers don't need to import the diesel-derived insertable.
+#[derive(Debug, Clone)]
+pub struct InsertFaceDetectionInput {
+    pub library_id: i32,
+    pub content_hash: String,
+    pub rel_path: String,
+    pub bbox: Option<(f32, f32, f32, f32)>,
+    pub embedding: Option<Vec<u8>>,
+    pub confidence: Option<f32>,
+    pub source: String,
+    pub person_id: Option<i32>,
+    pub status: String,
+    pub model_version: String,
+}
+
+// ── SqliteFaceDao impl ──────────────────────────────────────────────────────
+
+pub struct SqliteFaceDao {
+    connection: Arc<Mutex<SqliteConnection>>,
+}
+
+impl SqliteFaceDao {
+    pub fn new() -> Self {
+        Self {
+            connection: Arc::new(Mutex::new(connect())),
+        }
+    }
+
+    /// Test helper — bind to a pre-built (typically in-memory) connection.
+    #[cfg(test)]
+    pub fn from_connection(connection: Arc<Mutex<SqliteConnection>>) -> Self {
+        Self { connection }
+    }
+}
+
+impl Default for SqliteFaceDao {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl FaceDao for SqliteFaceDao {
+    fn already_scanned(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        content_hash: &str,
+    ) -> anyhow::Result<bool> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "face_already_scanned", |span| {
+            span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
+            face_detections::table
+                .filter(face_detections::content_hash.eq(content_hash))
+                .select(face_detections::id)
+                .first::<i32>(conn.deref_mut())
+                .optional()
+                .map(|x| x.is_some())
+                .with_context(|| "already_scanned query")
+        })
+    }
+
+    fn list_unscanned_candidates(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: i32,
+        limit: i64,
+    ) -> anyhow::Result<Vec<(String, String)>> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "list_unscanned_candidates", |span| {
+            span.set_attribute(KeyValue::new("library_id", library_id as i64));
+            // Pick the smallest-id rel_path per content_hash so we don't
+            // fire multiple detect calls for the same hash if it lives
+            // under several rel_paths in the same library. The
+            // anti-join (NOT EXISTS) drains hashes that have no row in
+            // face_detections at all.
+            let rows: Vec<(String, String)> = diesel::sql_query(
+                "SELECT rel_path, content_hash \
+                 FROM image_exif e \
+                 WHERE library_id = ? \
+                   AND content_hash IS NOT NULL \
+                   AND NOT EXISTS ( \
+                     SELECT 1 FROM face_detections f \
+                     WHERE f.content_hash = e.content_hash \
+                   ) \
+                 GROUP BY content_hash \
+                 LIMIT ?",
+            )
+            .bind::<diesel::sql_types::Integer, _>(library_id)
+            .bind::<diesel::sql_types::BigInt, _>(limit)
+            .load::<UnscannedRow>(conn.deref_mut())
+            .with_context(|| "list_unscanned_candidates")?
+            .into_iter()
+            .map(|r| (r.rel_path, r.content_hash))
+            .collect();
+            Ok(rows)
+        })
+    }
+
+    fn store_detection(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        row: InsertFaceDetectionInput,
+    ) -> anyhow::Result<FaceDetectionRow> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "insert", "store_detection", |span| {
+            span.set_attribute(KeyValue::new("status", row.status.clone()));
+            span.set_attribute(KeyValue::new("source", row.source.clone()));
+            let now = Utc::now().timestamp();
+            let (bx, by, bw, bh) = match row.bbox {
+                Some((x, y, w, h)) => (Some(x), Some(y), Some(w), Some(h)),
+                None => (None, None, None, None),
+            };
+            let insert = InsertFaceDetection {
+                library_id: row.library_id,
+                content_hash: row.content_hash,
+                rel_path: row.rel_path,
+                bbox_x: bx,
+                bbox_y: by,
+                bbox_w: bw,
+                bbox_h: bh,
+                embedding: row.embedding,
+                confidence: row.confidence,
+                source: row.source,
+                person_id: row.person_id,
+                status: row.status,
+                model_version: row.model_version,
+                created_at: now,
+            };
+            diesel::insert_into(face_detections::table)
+                .values(&insert)
+                .execute(conn.deref_mut())
+                .with_context(|| "insert face_detection")?;
+            define_sql_function! { fn last_insert_rowid() -> diesel::sql_types::Integer; }
+            let id = diesel::select(last_insert_rowid())
+                .get_result::<i32>(conn.deref_mut())
+                .with_context(|| "last_insert_rowid")?;
+            face_detections::table
+                .find(id)
+                .first::<FaceDetectionRow>(conn.deref_mut())
+                .with_context(|| "fetch inserted face")
+        })
+    }
+
+    fn mark_status(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: i32,
+        content_hash: &str,
+        rel_path: &str,
+        status: &str,
+        model_version: &str,
+    ) -> anyhow::Result<()> {
+        // Marker rows have NULL bbox + NULL embedding (CHECK enforces
+        // this). We let the UNIQUE partial index on (content_hash) WHERE
+        // status='no_faces' guard against double-marking; for 'failed' we
+        // do a manual exists-check.
+        let exists = self.already_scanned(ctx, content_hash)?;
+        if exists {
+            // Don't write a second marker if any row already exists for
+            // this hash — that includes detected rows from a prior run
+            // that succeeded; the file watcher's already_scanned() check
+            // should have caught this, but stay idempotent.
+            return Ok(());
+        }
+        self.store_detection(
+            ctx,
+            InsertFaceDetectionInput {
+                library_id,
+                content_hash: content_hash.to_string(),
+                rel_path: rel_path.to_string(),
+                bbox: None,
+                embedding: None,
+                confidence: None,
+                source: "auto".to_string(),
+                person_id: None,
+                status: status.to_string(),
+                model_version: model_version.to_string(),
+            },
+        )?;
+        Ok(())
+    }
+
+    fn list_for_content_hash(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        content_hash: &str,
+    ) -> anyhow::Result<Vec<FaceWithPerson>> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "faces_for_hash", |span| {
+            span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
+            face_detections::table
+                .left_join(persons::table.on(persons::id.nullable().eq(face_detections::person_id)))
+                .filter(face_detections::content_hash.eq(content_hash))
+                .filter(face_detections::status.eq("detected"))
+                .select((
+                    face_detections::id,
+                    face_detections::bbox_x,
+                    face_detections::bbox_y,
+                    face_detections::bbox_w,
+                    face_detections::bbox_h,
+                    face_detections::confidence,
+                    face_detections::source,
+                    face_detections::person_id,
+                    persons::name.nullable(),
+                    face_detections::model_version,
+                ))
+                .load::<(
+                    i32,
+                    Option<f32>,
+                    Option<f32>,
+                    Option<f32>,
+                    Option<f32>,
+                    Option<f32>,
+                    String,
+                    Option<i32>,
+                    Option<String>,
+                    String,
+                )>(conn.deref_mut())
+                .with_context(|| "list faces for hash")
+                .map(|rows| {
+                    rows.into_iter()
+                        .map(|r| FaceWithPerson {
+                            id: r.0,
+                            bbox_x: r.1.unwrap_or(0.0),
+                            bbox_y: r.2.unwrap_or(0.0),
+                            bbox_w: r.3.unwrap_or(0.0),
+                            bbox_h: r.4.unwrap_or(0.0),
+                            confidence: r.5.unwrap_or(0.0),
+                            source: r.6,
+                            person_id: r.7,
+                            person_name: r.8,
+                            model_version: r.9,
+                        })
+                        .collect()
+                })
+        })
+    }
+
+    fn list_for_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        person_id: i32,
+        library_id: Option<i32>,
+    ) -> anyhow::Result<Vec<FaceWithPath>> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "faces_for_person", |span| {
+            span.set_attribute(KeyValue::new("person_id", person_id as i64));
+            let mut query = face_detections::table
+                .filter(face_detections::person_id.eq(person_id))
+                .filter(face_detections::status.eq("detected"))
+                .into_boxed();
+            if let Some(lib) = library_id {
+                query = query.filter(face_detections::library_id.eq(lib));
+            }
+            query
+                .select((
+                    face_detections::id,
+                    face_detections::library_id,
+                    face_detections::rel_path,
+                    face_detections::bbox_x,
+                    face_detections::bbox_y,
+                    face_detections::bbox_w,
+                    face_detections::bbox_h,
+                    face_detections::confidence,
+                    face_detections::person_id,
+                    face_detections::model_version,
+                ))
+                .load::<(
+                    i32,
+                    i32,
+                    String,
+                    Option<f32>,
+                    Option<f32>,
+                    Option<f32>,
+                    Option<f32>,
+                    Option<f32>,
+                    Option<i32>,
+                    String,
+                )>(conn.deref_mut())
+                .with_context(|| "list faces for person")
+                .map(|rows| {
+                    rows.into_iter()
+                        .map(|r| FaceWithPath {
+                            id: r.0,
+                            library_id: r.1,
+                            rel_path: r.2,
+                            bbox_x: r.3.unwrap_or(0.0),
+                            bbox_y: r.4.unwrap_or(0.0),
+                            bbox_w: r.5.unwrap_or(0.0),
+                            bbox_h: r.6.unwrap_or(0.0),
+                            confidence: r.7.unwrap_or(0.0),
+                            person_id: r.8,
+                            model_version: r.9,
+                        })
+                        .collect()
+                })
+        })
+    }
+
+    fn list_embeddings(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: Option<i32>,
+        unassigned: bool,
+        limit: i64,
+        offset: i64,
+    ) -> anyhow::Result<Vec<(FaceDetectionRow, String)>> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "list_embeddings", |span| {
+            span.set_attribute(KeyValue::new("limit", limit));
+            span.set_attribute(KeyValue::new("offset", offset));
+            let mut query = face_detections::table
+                .filter(face_detections::status.eq("detected"))
+                .into_boxed();
+            if let Some(lib) = library_id {
+                query = query.filter(face_detections::library_id.eq(lib));
+            }
+            if unassigned {
+                query = query.filter(face_detections::person_id.is_null());
+            }
+            let rows = query
+                .order(face_detections::id.asc())
+                .limit(limit)
+                .offset(offset)
+                .load::<FaceDetectionRow>(conn.deref_mut())
+                .with_context(|| "list embeddings")?;
+            // Pair with the base64-encoded embedding string so the handler
+            // doesn't need to know the wire format. Skip rows with NULL
+            // embedding (shouldn't happen on detected rows, but defensive).
+            use base64::Engine;
+            Ok(rows
+                .into_iter()
+                .filter_map(|r| {
+                    r.embedding.as_ref().map(|bytes| {
+                        let b64 = base64::engine::general_purpose::STANDARD.encode(bytes);
+                        (r.clone(), b64)
+                    })
+                })
+                .collect())
+        })
+    }
+
+    fn get_face(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        id: i32,
+    ) -> anyhow::Result<Option<FaceDetectionRow>> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "get_face", |span| {
+            span.set_attribute(KeyValue::new("id", id as i64));
+            face_detections::table
+                .find(id)
+                .first::<FaceDetectionRow>(conn.deref_mut())
+                .optional()
+                .with_context(|| "get_face")
+        })
+    }
+
+    fn update_face(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        id: i32,
+        person_id: Option<Option<i32>>,
+        bbox: Option<(f32, f32, f32, f32)>,
+        embedding: Option<Vec<u8>>,
+    ) -> anyhow::Result<FaceDetectionRow> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "update", "update_face", |span| {
+            span.set_attribute(KeyValue::new("id", id as i64));
+            // Apply patches one at a time so each set() has the right type.
+            // Diesel's update DSL is type-driven and combining heterogeneous
+            // optional sets in one statement is awkward.
+            if let Some(pid) = person_id {
+                diesel::update(face_detections::table.find(id))
+                    .set(face_detections::person_id.eq(pid))
+                    .execute(conn.deref_mut())
+                    .with_context(|| "update person_id")?;
+            }
+            if let Some((x, y, w, h)) = bbox {
+                diesel::update(face_detections::table.find(id))
+                    .set((
+                        face_detections::bbox_x.eq(x),
+                        face_detections::bbox_y.eq(y),
+                        face_detections::bbox_w.eq(w),
+                        face_detections::bbox_h.eq(h),
+                    ))
+                    .execute(conn.deref_mut())
+                    .with_context(|| "update bbox")?;
+            }
+            if let Some(emb) = embedding {
+                diesel::update(face_detections::table.find(id))
+                    .set(face_detections::embedding.eq(emb))
+                    .execute(conn.deref_mut())
+                    .with_context(|| "update embedding")?;
+            }
+            face_detections::table
+                .find(id)
+                .first::<FaceDetectionRow>(conn.deref_mut())
+                .with_context(|| "fetch updated face")
+        })
+    }
+
+    fn delete_face(&mut self, ctx: &opentelemetry::Context, id: i32) -> anyhow::Result<bool> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "delete", "delete_face", |span| {
+            span.set_attribute(KeyValue::new("id", id as i64));
+            let n = diesel::delete(face_detections::table.find(id))
+                .execute(conn.deref_mut())
+                .with_context(|| "delete face")?;
+            Ok(n > 0)
+        })
+    }
+
+    fn delete_auto_for_hash(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        content_hash: &str,
+    ) -> anyhow::Result<usize> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "delete", "delete_auto_for_hash", |span| {
+            span.set_attribute(KeyValue::new("content_hash", content_hash.to_string()));
+            diesel::delete(
+                face_detections::table
+                    .filter(face_detections::content_hash.eq(content_hash))
+                    .filter(face_detections::source.eq("auto")),
+            )
+            .execute(conn.deref_mut())
+            .with_context(|| "delete auto rows")
+        })
+    }
+
+    fn stats(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: Option<i32>,
+    ) -> anyhow::Result<FaceStats> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "face_stats", |span| {
+            if let Some(lib) = library_id {
+                span.set_attribute(KeyValue::new("library_id", lib as i64));
+            }
+            // Count distinct content_hashes per status by status — one
+            // hash can have many rows (multiple detected faces) but we
+            // want it counted once.
+            let scanned: i64 = {
+                let mut q = face_detections::table.into_boxed();
+                if let Some(lib) = library_id {
+                    q = q.filter(face_detections::library_id.eq(lib));
+                }
+                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
+                    .first(conn.deref_mut())
+                    .with_context(|| "stats: scanned")?
+            };
+            let with_faces: i64 = {
+                let mut q = face_detections::table
+                    .filter(face_detections::status.eq("detected"))
+                    .into_boxed();
+                if let Some(lib) = library_id {
+                    q = q.filter(face_detections::library_id.eq(lib));
+                }
+                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
+                    .first(conn.deref_mut())
+                    .with_context(|| "stats: with_faces")?
+            };
+            let no_faces: i64 = {
+                let mut q = face_detections::table
+                    .filter(face_detections::status.eq("no_faces"))
+                    .into_boxed();
+                if let Some(lib) = library_id {
+                    q = q.filter(face_detections::library_id.eq(lib));
+                }
+                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
+                    .first(conn.deref_mut())
+                    .with_context(|| "stats: no_faces")?
+            };
+            let failed: i64 = {
+                let mut q = face_detections::table
+                    .filter(face_detections::status.eq("failed"))
+                    .into_boxed();
+                if let Some(lib) = library_id {
+                    q = q.filter(face_detections::library_id.eq(lib));
+                }
+                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
+                    .first(conn.deref_mut())
+                    .with_context(|| "stats: failed")?
+            };
+            let total_photos: i64 = {
+                let mut q = image_exif::table.into_boxed();
+                if let Some(lib) = library_id {
+                    q = q.filter(image_exif::library_id.eq(lib));
+                }
+                q.select(diesel::dsl::count_star())
+                    .first(conn.deref_mut())
+                    .with_context(|| "stats: total_photos")?
+            };
+            let persons_count: i64 = persons::table
+                .select(diesel::dsl::count_star())
+                .first(conn.deref_mut())
+                .with_context(|| "stats: persons")?;
+            let unassigned_faces: i64 = {
+                let mut q = face_detections::table
+                    .filter(face_detections::status.eq("detected"))
+                    .filter(face_detections::person_id.is_null())
+                    .into_boxed();
+                if let Some(lib) = library_id {
+                    q = q.filter(face_detections::library_id.eq(lib));
+                }
+                q.select(diesel::dsl::count_star())
+                    .first(conn.deref_mut())
+                    .with_context(|| "stats: unassigned")?
+            };
+
+            Ok(FaceStats {
+                library_id,
+                total_photos,
+                scanned,
+                with_faces,
+                no_faces,
+                failed,
+                persons_count,
+                unassigned_faces,
+            })
+        })
+    }
+
+    fn create_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        req: &CreatePersonReq,
+        from_tag: bool,
+    ) -> anyhow::Result<Person> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "insert", "create_person", |span| {
+            span.set_attribute(KeyValue::new("name", req.name.clone()));
+            let now = Utc::now().timestamp();
+            let insert = InsertPerson {
+                name: req.name.clone(),
+                notes: req.notes.clone(),
+                created_from_tag: from_tag,
+                is_ignored: req.is_ignored,
+                created_at: now,
+                updated_at: now,
+            };
+            diesel::insert_into(persons::table)
+                .values(&insert)
+                .execute(conn.deref_mut())
+                .with_context(|| format!("insert person {}", req.name))?;
+            define_sql_function! { fn last_insert_rowid() -> diesel::sql_types::Integer; }
+            let id = diesel::select(last_insert_rowid())
+                .get_result::<i32>(conn.deref_mut())
+                .with_context(|| "last_insert_rowid persons")?;
+            // Optional entity bridge — do this as a follow-up update so
+            // schema's UNIQUE(name COLLATE NOCASE) can fire on insert
+            // before we touch entity_id.
+            if let Some(entity_id) = req.entity_id {
+                diesel::update(persons::table.find(id))
+                    .set(persons::entity_id.eq(entity_id))
+                    .execute(conn.deref_mut())
+                    .with_context(|| "set entity_id on new person")?;
+            }
+            persons::table
+                .find(id)
+                .first::<Person>(conn.deref_mut())
+                .with_context(|| "fetch new person")
+        })
+    }
+
+    fn get_or_create_ignored_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+    ) -> anyhow::Result<Person> {
+        // Fast path: there's already an is_ignored row → return it.
+        // Slow path on first use: create one with a stable display name
+        // ("Ignored"). Race-safe because the UNIQUE(name COLLATE NOCASE)
+        // index forces only one ever to exist (we trip and look up).
+        {
+            let mut conn = self.connection.lock().expect("face dao lock");
+            if let Some(p) = persons::table
+                .filter(persons::is_ignored.eq(true))
+                .order(persons::id.asc())
+                .first::<Person>(conn.deref_mut())
+                .optional()
+                .with_context(|| "lookup ignored person")?
+            {
+                return Ok(p);
+            }
+        }
+        // Drop the lock before delegating to create_person — that
+        // method takes its own lock.
+        match self.create_person(
+            ctx,
+            &CreatePersonReq {
+                name: "Ignored".to_string(),
+                notes: Some(
+                    "Bucket for strangers, false detections, and faces \
+                     you don't want bound to a real person."
+                        .to_string(),
+                ),
+                entity_id: None,
+                is_ignored: true,
+            },
+            /*from_tag*/ false,
+        ) {
+            Ok(p) => Ok(p),
+            Err(e) if is_unique_violation(&e) => {
+                // Race: someone else created the row. Re-read.
+                let mut conn = self.connection.lock().expect("face dao lock");
+                persons::table
+                    .filter(persons::is_ignored.eq(true))
+                    .order(persons::id.asc())
+                    .first::<Person>(conn.deref_mut())
+                    .with_context(|| "load ignored person after race")
+            }
+            Err(e) => Err(e),
+        }
+    }
+
+    fn get_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        id: i32,
+    ) -> anyhow::Result<Option<Person>> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "get_person", |span| {
+            span.set_attribute(KeyValue::new("id", id as i64));
+            persons::table
+                .find(id)
+                .first::<Person>(conn.deref_mut())
+                .optional()
+                .with_context(|| "get_person")
+        })
+    }
+
+    fn list_persons(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: Option<i32>,
+        include_ignored: bool,
+    ) -> anyhow::Result<Vec<PersonSummary>> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "list_persons", |_| {
+            // Two-step: load all persons, then a single grouped count
+            // query for face counts. Using a LEFT JOIN + GROUP BY in
+            // Diesel here gets noisy with the optional library filter; a
+            // second roundtrip is cheap and clearer.
+            let mut person_query = persons::table.into_boxed();
+            if !include_ignored {
+                // Default — hide the IGNORE/junk bucket from the list.
+                // The frontend asks include_ignored=true explicitly when
+                // it needs to surface ignored persons (e.g. a "show
+                // ignored" toggle in the management UI).
+                person_query = person_query.filter(persons::is_ignored.eq(false));
+            }
+            let person_rows: Vec<Person> = person_query
+                .order(persons::name.asc())
+                .load::<Person>(conn.deref_mut())
+                .with_context(|| "load persons")?;
+
+            // Diesel's BoxedSelectStatement + group_by trips the trait
+            // resolver into recursion, so this aggregation goes through
+            // sql_query. The shape is small and the bind list is at most
+            // one parameter — readability isn't really worse than the DSL.
+            let counts: Vec<(i32, i64)> = {
+                use diesel::sql_types::*;
+                #[derive(QueryableByName)]
+                struct PersonCountRow {
+                    #[diesel(sql_type = Integer)]
+                    person_id: i32,
+                    #[diesel(sql_type = BigInt)]
+                    count: i64,
+                }
+                let sql = if library_id.is_some() {
+                    "SELECT person_id, COUNT(*) AS count FROM face_detections \
+                     WHERE status='detected' AND person_id IS NOT NULL AND library_id = ? \
+                     GROUP BY person_id"
+                } else {
+                    "SELECT person_id, COUNT(*) AS count FROM face_detections \
+                     WHERE status='detected' AND person_id IS NOT NULL \
+                     GROUP BY person_id"
+                };
+                let mut q = diesel::sql_query(sql).into_boxed();
+                if let Some(lib) = library_id {
+                    q = q.bind::<Integer, _>(lib);
+                }
+                q.load::<PersonCountRow>(conn.deref_mut())
+                    .with_context(|| "person face counts")?
+                    .into_iter()
+                    .map(|r| (r.person_id, r.count))
+                    .collect()
+            };
+            use std::collections::HashMap;
+            let count_map: HashMap<i32, i64> = counts.into_iter().collect();
+
+            Ok(person_rows
+                .into_iter()
+                .map(|p| {
+                    let face_count = count_map.get(&p.id).copied().unwrap_or(0);
+                    PersonSummary {
+                        id: p.id,
+                        name: p.name,
+                        cover_face_id: p.cover_face_id,
+                        entity_id: p.entity_id,
+                        created_from_tag: p.created_from_tag,
+                        notes: p.notes,
+                        is_ignored: p.is_ignored,
+                        face_count,
+                    }
+                })
+                .collect())
+        })
+    }
+
+    fn update_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        id: i32,
+        patch: &UpdatePersonReq,
+    ) -> anyhow::Result<Person> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "update", "update_person", |span| {
+            span.set_attribute(KeyValue::new("id", id as i64));
+            let now = Utc::now().timestamp();
+            // Apply each patched column individually for the same
+            // reason as update_face — heterogeneous optional sets are
+            // painful in Diesel's type-driven update DSL.
+            if let Some(name) = &patch.name {
+                diesel::update(persons::table.find(id))
+                    .set((persons::name.eq(name), persons::updated_at.eq(now)))
+                    .execute(conn.deref_mut())
+                    .with_context(|| "update person name")?;
+            }
+            if let Some(notes) = &patch.notes {
+                diesel::update(persons::table.find(id))
+                    .set((persons::notes.eq(notes), persons::updated_at.eq(now)))
+                    .execute(conn.deref_mut())
+                    .with_context(|| "update person notes")?;
+            }
+            if let Some(cover) = patch.cover_face_id {
+                diesel::update(persons::table.find(id))
+                    .set((
+                        persons::cover_face_id.eq(cover),
+                        persons::updated_at.eq(now),
+                    ))
+                    .execute(conn.deref_mut())
+                    .with_context(|| "update person cover")?;
+            }
+            if let Some(eid) = patch.entity_id {
+                diesel::update(persons::table.find(id))
+                    .set((persons::entity_id.eq(eid), persons::updated_at.eq(now)))
+                    .execute(conn.deref_mut())
+                    .with_context(|| "update person entity_id")?;
+            }
+            if let Some(flag) = patch.is_ignored {
+                diesel::update(persons::table.find(id))
+                    .set((persons::is_ignored.eq(flag), persons::updated_at.eq(now)))
+                    .execute(conn.deref_mut())
+                    .with_context(|| "update person is_ignored")?;
+            }
+            persons::table
+                .find(id)
+                .first::<Person>(conn.deref_mut())
+                .with_context(|| "fetch updated person")
+        })
+    }
+
+    fn delete_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        id: i32,
+        cascade_delete_faces: bool,
+    ) -> anyhow::Result<bool> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "delete", "delete_person", |span| {
+            span.set_attribute(KeyValue::new("id", id as i64));
+            span.set_attribute(KeyValue::new("cascade", cascade_delete_faces));
+            if cascade_delete_faces {
+                diesel::delete(face_detections::table.filter(face_detections::person_id.eq(id)))
+                    .execute(conn.deref_mut())
+                    .with_context(|| "cascade delete faces for person")?;
+            }
+            // Always clear cover_face_id pointers that referenced this
+            // person's faces (otherwise the FK from persons.cover_face_id
+            // could hang). cover_face_id has no FK constraint in SQLite
+            // so this is documentation-only — the explicit nuke is on
+            // the face rows above.
+            let n = diesel::delete(persons::table.find(id))
+                .execute(conn.deref_mut())
+                .with_context(|| "delete person")?;
+            Ok(n > 0)
+        })
+    }
+
+    fn merge_persons(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        src: i32,
+        into: i32,
+    ) -> anyhow::Result<Person> {
+        if src == into {
+            anyhow::bail!("cannot merge a person into itself");
+        }
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "update", "merge_persons", |span| {
+            span.set_attribute(KeyValue::new("src", src as i64));
+            span.set_attribute(KeyValue::new("into", into as i64));
+            // Wrap in a transaction so a half-merged state can't survive
+            // a SQLite write error mid-operation.
+            conn.deref_mut().transaction::<_, anyhow::Error, _>(|tx| {
+                // Re-point face_detections.
+                diesel::update(face_detections::table.filter(face_detections::person_id.eq(src)))
+                    .set(face_detections::person_id.eq(into))
+                    .execute(tx)
+                    .with_context(|| "repoint faces on merge")?;
+                // Copy notes from src into target if the target is empty.
+                let src_person: Person = persons::table
+                    .find(src)
+                    .first(tx)
+                    .with_context(|| "load src person for merge")?;
+                let into_person: Person = persons::table
+                    .find(into)
+                    .first(tx)
+                    .with_context(|| "load target person for merge")?;
+                if into_person.notes.as_deref().unwrap_or("").is_empty()
+                    && src_person
+                        .notes
+                        .as_deref()
+                        .map(|s| !s.is_empty())
+                        .unwrap_or(false)
+                {
+                    diesel::update(persons::table.find(into))
+                        .set(persons::notes.eq(src_person.notes))
+                        .execute(tx)
+                        .with_context(|| "copy notes on merge")?;
+                }
+                diesel::delete(persons::table.find(src))
+                    .execute(tx)
+                    .with_context(|| "delete src person on merge")?;
+                persons::table
+                    .find(into)
+                    .first::<Person>(tx)
+                    .with_context(|| "fetch merged person")
+            })
+        })
+    }
+
+    fn resolve_content_hash(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        library_id: i32,
+        rel_path: &str,
+    ) -> anyhow::Result<Option<String>> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "resolve_content_hash", |_| {
+            image_exif::table
+                .filter(image_exif::library_id.eq(library_id))
+                .filter(image_exif::rel_path.eq(rel_path))
+                .select(image_exif::content_hash)
+                .first::<Option<String>>(conn.deref_mut())
+                .optional()
+                .map(|outer| outer.and_then(|inner| inner))
+                .with_context(|| "resolve content_hash")
+        })
+    }
+
+    fn find_persons_by_names_ci(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        names: &[String],
+    ) -> anyhow::Result<std::collections::HashMap<String, i32>> {
+        if names.is_empty() {
+            return Ok(std::collections::HashMap::new());
+        }
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "find_persons_by_names_ci", |span| {
+            span.set_attribute(KeyValue::new("count", names.len() as i64));
+            // Lowercase comparison both sides. Use sql_query to keep the
+            // bind list dynamic without fighting Diesel's type system on
+            // the LOWER() function.
+            use diesel::sql_types::*;
+            let placeholders = std::iter::repeat_n("?", names.len())
+                .collect::<Vec<_>>()
+                .join(",");
+            // Filter out is_ignored persons so the auto-bind path can
+            // never target the IGNORE/junk bucket — even if a tag name
+            // happens to match it (e.g. someone tags photos as "Ignored"
+            // by hand). Ignore-bucket assignment is an explicit operator
+            // action through the dedicated endpoint, never a heuristic.
+            let sql = format!(
+                "SELECT id, LOWER(name) AS lower_name FROM persons \
+                 WHERE is_ignored = 0 AND LOWER(name) IN ({}) \
+                 ORDER BY id ASC",
+                placeholders
+            );
+            #[derive(QueryableByName)]
+            struct Row {
+                #[diesel(sql_type = Integer)]
+                id: i32,
+                #[diesel(sql_type = Text)]
+                lower_name: String,
+            }
+            let mut q = diesel::sql_query(sql).into_boxed();
+            for n in names {
+                q = q.bind::<Text, _>(n.to_lowercase());
+            }
+            let rows = q
+                .load::<Row>(conn.deref_mut())
+                .with_context(|| "find_persons_by_names_ci")?;
+            // Lowest id wins on collision (UNIQUE COLLATE NOCASE on the
+            // table prevents that today, but the deduplication is a
+            // defensive belt-and-braces).
+            let mut out = std::collections::HashMap::with_capacity(rows.len());
+            for r in rows {
+                out.entry(r.lower_name).or_insert(r.id);
+            }
+            Ok(out)
+        })
+    }
+
+    fn person_reference_embedding(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        person_id: i32,
+        model_version: &str,
+    ) -> anyhow::Result<Option<Vec<f32>>> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "query", "person_reference_embedding", |span| {
+            span.set_attribute(KeyValue::new("person_id", person_id as i64));
+            span.set_attribute(KeyValue::new("model_version", model_version.to_string()));
+            // Pull only the embedding bytes; we average them in Rust. A
+            // SQL aggregate over 512-d vectors isn't meaningfully faster
+            // and would tie us to a specific embedding length.
+            let blobs: Vec<Option<Vec<u8>>> = face_detections::table
+                .filter(face_detections::person_id.eq(person_id))
+                .filter(face_detections::status.eq("detected"))
+                .filter(face_detections::model_version.eq(model_version))
+                .select(face_detections::embedding)
+                .load(conn.deref_mut())
+                .with_context(|| "load person embeddings")?;
+            let vectors: Vec<Vec<f32>> = blobs
+                .into_iter()
+                .filter_map(|b| b.and_then(|bytes| decode_embedding_bytes(&bytes)))
+                .collect();
+            if vectors.is_empty() {
+                return Ok(None);
+            }
+            Ok(Some(mean_normalized(&vectors)))
+        })
+    }
+
+    fn assign_face_to_person(
+        &mut self,
+        ctx: &opentelemetry::Context,
+        face_id: i32,
+        person_id: i32,
+    ) -> anyhow::Result<()> {
+        let mut conn = self.connection.lock().expect("face dao lock");
+        trace_db_call(ctx, "update", "assign_face_to_person", |span| {
+            span.set_attribute(KeyValue::new("face_id", face_id as i64));
+            span.set_attribute(KeyValue::new("person_id", person_id as i64));
+            conn.deref_mut().transaction::<_, anyhow::Error, _>(|tx| {
+                diesel::update(face_detections::table.find(face_id))
+                    .set(face_detections::person_id.eq(person_id))
+                    .execute(tx)
+                    .with_context(|| "set face person_id")?;
+                // If this person has no cover yet, claim this face.
+                // Don't overwrite an existing cover — the user may have
+                // hand-picked one in the UI.
+                let cover: Option<i32> = persons::table
+                    .find(person_id)
+                    .select(persons::cover_face_id)
+                    .first::<Option<i32>>(tx)
+                    .with_context(|| "load person cover")?;
+                if cover.is_none() {
+                    diesel::update(persons::table.find(person_id))
+                        .set(persons::cover_face_id.eq(face_id))
+                        .execute(tx)
+                        .with_context(|| "set cover_face_id")?;
+                }
+                Ok(())
+            })
+        })
+    }
+}
+
+// ── Embedding helpers ───────────────────────────────────────────────────────
+
+/// Decode a 2048-byte little-endian f32 BLOB into a Vec<f32> of length 512.
+/// Returns None on malformed input rather than erroring — the caller treats
+/// "no usable embedding" the same as "no embedding at all" (skip averaging).
+pub(crate) fn decode_embedding_bytes(bytes: &[u8]) -> Option<Vec<f32>> {
+    if bytes.len() != 2048 {
+        return None;
+    }
+    let mut out = Vec::with_capacity(512);
+    for chunk in bytes.chunks_exact(4) {
+        out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
+    }
+    Some(out)
+}
+
+/// Mean of L2-normalized vectors, then re-normalize. ArcFace embeddings
+/// from insightface are already L2-normalized, so re-normalizing the
+/// average is a one-step "average direction" operation.
+fn mean_normalized(vectors: &[Vec<f32>]) -> Vec<f32> {
+    debug_assert!(
+        !vectors.is_empty(),
+        "mean_normalized requires non-empty input"
+    );
+    let dim = vectors[0].len();
+    let mut acc = vec![0.0f32; dim];
+    for v in vectors {
+        debug_assert_eq!(v.len(), dim, "mismatched embedding dim");
+        for (i, x) in v.iter().enumerate() {
+            acc[i] += *x;
+        }
+    }
+    let n = vectors.len() as f32;
+    for x in &mut acc {
+        *x /= n;
+    }
+    let norm = acc.iter().map(|x| x * x).sum::<f32>().sqrt();
+    if norm > 0.0 {
+        for x in &mut acc {
+            *x /= norm;
+        }
+    }
+    acc
+}
+
+/// Cosine similarity of two embeddings. Both must be the same length;
+/// neither needs to be pre-normalized. Returns 0.0 on length mismatch
+/// or zero-magnitude input rather than NaN — the auto-bind path
+/// interprets 0.0 as "no useful similarity, leave unassigned".
+pub(crate) fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
+    if a.len() != b.len() || a.is_empty() {
+        return 0.0;
+    }
+    let mut dot = 0.0f32;
+    let mut na = 0.0f32;
+    let mut nb = 0.0f32;
+    for (x, y) in a.iter().zip(b.iter()) {
+        dot += x * y;
+        na += x * x;
+        nb += y * y;
+    }
+    let denom = na.sqrt() * nb.sqrt();
+    if denom <= 0.0 { 0.0 } else { dot / denom }
+}
+
+// ── Handlers ────────────────────────────────────────────────────────────────
+
+pub fn add_face_services<T, D: FaceDao + 'static>(app: App<T>) -> App<T>
+where
+    T: ServiceFactory<ServiceRequest, Config = (), Error = actix_web::Error, InitError = ()>,
+{
+    app.service(web::resource("/faces/stats").route(web::get().to(stats_handler::<D>)))
+        .service(web::resource("/faces/embeddings").route(web::get().to(embeddings_handler::<D>)))
+        .service(
+            web::resource("/image/faces")
+                .route(web::get().to(list_faces_handler::<D>))
+                .route(web::post().to(create_face_handler::<D>)),
+        )
+        .service(
+            web::resource("/image/faces/{id}")
+                .route(web::patch().to(update_face_handler::<D>))
+                .route(web::delete().to(delete_face_handler::<D>)),
+        )
+        .service(
+            web::resource("/persons")
+                .route(web::get().to(list_persons_handler::<D>))
+                .route(web::post().to(create_person_handler::<D>)),
+        )
+        .service(
+            web::resource("/persons/bootstrap")
+                .route(web::post().to(bootstrap_persons_handler::<D>)),
+        )
+        .service(
+            web::resource("/persons/ignore-bucket")
+                .route(web::post().to(ignore_bucket_handler::<D>)),
+        )
+        .service(
+            web::resource("/tags/people-bootstrap-candidates")
+                .route(web::get().to(bootstrap_candidates_handler::<D>)),
+        )
+        .service(
+            web::resource("/persons/{id}")
+                .route(web::get().to(get_person_handler::<D>))
+                .route(web::patch().to(update_person_handler::<D>))
+                .route(web::delete().to(delete_person_handler::<D>)),
+        )
+        .service(
+            web::resource("/persons/{id}/merge").route(web::post().to(merge_persons_handler::<D>)),
+        )
+        .service(
+            web::resource("/persons/{id}/faces").route(web::get().to(person_faces_handler::<D>)),
+        )
+}
+
+// ── Bootstrap (Phase 4) ─────────────────────────────────────────────────────
+
+#[derive(Serialize, Debug, Clone)]
+pub struct BootstrapCandidate {
+    /// Display name — most-frequent capitalization across the case-insensitive
+    /// group, or simply the first one seen if it's a tie.
+    pub name: String,
+    /// Lowercased name; the stable key for grouping and the auto-bind path.
+    pub normalized_name: String,
+    /// Sum of `tagged_photo` counts across all capitalizations of this name.
+    pub usage_count: i64,
+    /// Heuristic suggestion; the UI defaults this to checked but the user
+    /// confirms before [`bootstrap_persons_handler`] actually creates rows.
+    pub looks_like_person: bool,
+    /// True when a `persons` row already exists for this name (any case).
+    /// The UI hides these — re-running bootstrap is idempotent so it's fine
+    /// either way, but the noise isn't worth showing.
+    pub already_exists: bool,
+}
+
+#[derive(Serialize, Debug)]
+pub struct BootstrapCandidatesResponse {
+    pub candidates: Vec<BootstrapCandidate>,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct BootstrapPersonsReq {
+    pub names: Vec<String>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct BootstrapPersonsResponse {
+    pub created: Vec<Person>,
+    pub skipped: Vec<BootstrapSkipped>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct BootstrapSkipped {
+    pub name: String,
+    pub reason: String,
+}
+
+/// Hard filter for the bootstrap candidate list. Returns true if the tag
+/// could plausibly be a person name; returns false to drop it from the
+/// candidates entirely (not just leave looks_like_person=false).
+///
+/// Rules — all required:
+/// - At least 3 characters after trimming. Two-letter tags ("AB", "OK")
+///   are almost always abbreviations or markers, not names.
+/// - No emoji or symbol-class characters. SQL-side string sort already
+///   surfaces those at the top of the tag list; filtering them keeps
+///   the candidate UI focused on names rather than chart-junk.
+/// - No control characters or null bytes.
+pub(crate) fn is_plausible_name_token(raw: &str) -> bool {
+    let trimmed = raw.trim();
+    if trimmed.chars().count() < 3 {
+        return false;
+    }
+    for c in trimmed.chars() {
+        // Letter / mark / decimal-digit / connector-punctuation /
+        // dash / apostrophe / period / whitespace are all plausible in a
+        // name. Anything else (emoji, symbols, math operators, arrows,
+        // box drawing, control codes) disqualifies the whole tag.
+        if c.is_alphabetic()
+            || c.is_whitespace()
+            || matches!(c, '\'' | '-' | '.' | '_' | '\u{2019}')
+        {
+            continue;
+        }
+        if c.is_ascii_digit() {
+            // Digits don't disqualify here — `looks_like_person` rejects
+            // them later, but `is_plausible_name_token` is just about
+            // "could this be in the candidate list at all?". A tag like
+            // "Sarah2" stays as a candidate (display-flagged not-a-person
+            // by looks_like_person) so the operator can still spot and
+            // confirm it manually if it's an alias.
+            continue;
+        }
+        return false;
+    }
+    true
+}
+
+/// Conservative "this tag *might* be a person name" heuristic. False
+/// negatives are fine — the operator confirms in the UI before any row
+/// is created. False positives are also fine for the same reason; the
+/// goal is just to default sensible candidates to checked.
+///
+/// Rules:
+/// - 1–2 whitespace-separated words
+/// - Each word starts with an uppercase character
+/// - No digits anywhere (rejects "Trip 2018", "2024", etc.)
+/// - Single-word names not on a small denylist of common non-person
+///   tags (cat, christmas, beach, ...). Two-word names skip the
+///   denylist because a real two-word person name is the dominant
+///   case ("Sarah Smith") and false-blocking it is worse than false-
+///   accepting "Sunset Walk".
+pub(crate) fn looks_like_person(raw: &str) -> bool {
+    let trimmed = raw.trim();
+    if trimmed.is_empty() {
+        return false;
+    }
+    let words: Vec<&str> = trimmed.split_whitespace().collect();
+    if !(1..=2).contains(&words.len()) {
+        return false;
+    }
+    for w in &words {
+        let Some(first) = w.chars().next() else {
+            return false;
+        };
+        if !first.is_uppercase() {
+            return false;
+        }
+        if w.chars().any(|c| c.is_ascii_digit()) {
+            return false;
+        }
+    }
+    if words.len() == 1 {
+        const DENY: &[&str] = &[
+            // Pets / animals
+            "cat",
+            "dog",
+            "kitten",
+            "puppy",
+            "bird",
+            "fish",
+            "pet",
+            "pets",
+            // Events / occasions
+            "birthday",
+            "christmas",
+            "halloween",
+            "easter",
+            "thanksgiving",
+            "wedding",
+            "anniversary",
+            "vacation",
+            "holiday",
+            "party",
+            "trip",
+            "graduation",
+            "concert",
+            // Places (generic)
+            "home",
+            "work",
+            "beach",
+            "park",
+            "hotel",
+            "restaurant",
+            "office",
+            "house",
+            "garden",
+            // Subjects / styles
+            "food",
+            "sunset",
+            "sunrise",
+            "landscape",
+            "portrait",
+            "selfie",
+            "nature",
+            "flowers",
+            "flower",
+            "snow",
+            "rain",
+            "sky",
+            // Buckets
+            "untagged",
+            "favorites",
+            "favourites",
+            "misc",
+            "other",
+            "random",
+        ];
+        let lower = trimmed.to_lowercase();
+        if DENY.iter().any(|w| *w == lower) {
+            return false;
+        }
+    }
+    true
+}
+
+async fn bootstrap_candidates_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    face_dao: web::Data<Mutex<D>>,
+    tag_dao: web::Data<Mutex<crate::tags::SqliteTagDao>>,
+) -> impl Responder {
+    use std::collections::HashMap;
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("faces.bootstrap_candidates", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+
+    // All tags + their counts. Path filter unused — bootstrap is library-wide.
+    let tags_with_counts = {
+        let mut td = tag_dao.lock().expect("tag dao lock");
+        match crate::tags::TagDao::get_all_tags(&mut *td, &span_context, None) {
+            Ok(t) => t,
+            Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
+        }
+    };
+
+    // Group by lowercase name. Pick the most-frequent capitalization
+    // for the display name (ties broken by first-seen). Filter out
+    // short tags and tags carrying non-name characters (emojis, symbols)
+    // before grouping — they're noise no operator would tick, so showing
+    // them just makes the candidate list harder to scan.
+    struct Group {
+        display: String,
+        display_freq: i64,
+        total_count: i64,
+    }
+    let mut groups: HashMap<String, Group> = HashMap::new();
+    for (count, tag) in tags_with_counts {
+        if !is_plausible_name_token(&tag.name) {
+            continue;
+        }
+        let lower = tag.name.to_lowercase();
+        let g = groups.entry(lower).or_insert_with(|| Group {
+            display: tag.name.clone(),
+            display_freq: 0,
+            total_count: 0,
+        });
+        g.total_count += count;
+        if count > g.display_freq {
+            g.display = tag.name.clone();
+            g.display_freq = count;
+        }
+    }
+
+    // Cross-reference against existing persons (bulk one-query lookup).
+    let lower_names: Vec<String> = groups.keys().cloned().collect();
+    let existing = {
+        let mut fd = face_dao.lock().expect("face dao lock");
+        match fd.find_persons_by_names_ci(&span_context, &lower_names) {
+            Ok(m) => m,
+            Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
+        }
+    };
+
+    let mut candidates: Vec<BootstrapCandidate> = groups
+        .into_iter()
+        .map(|(lower, g)| BootstrapCandidate {
+            looks_like_person: looks_like_person(&g.display),
+            already_exists: existing.contains_key(&lower),
+            name: g.display,
+            normalized_name: lower,
+            usage_count: g.total_count,
+        })
+        .collect();
+    // Sort: persons-first heuristic by descending count, then alphabetical.
+    // Persons-likely candidates surface near the top so the user doesn't
+    // scroll past dozens of "vacation"-style tags to find them.
+    candidates.sort_by(|a, b| {
+        b.looks_like_person
+            .cmp(&a.looks_like_person)
+            .then(b.usage_count.cmp(&a.usage_count))
+            .then(a.normalized_name.cmp(&b.normalized_name))
+    });
+
+    HttpResponse::Ok().json(BootstrapCandidatesResponse { candidates })
+}
+
+async fn bootstrap_persons_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    body: web::Json<BootstrapPersonsReq>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("faces.bootstrap_persons", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+
+    let mut created: Vec<Person> = Vec::new();
+    let mut skipped: Vec<BootstrapSkipped> = Vec::new();
+
+    let mut dao = face_dao.lock().expect("face dao lock");
+
+    // Pre-fetch the existing-name set so a duplicate request reports
+    // "already exists" (skipped) rather than firing N inserts that all
+    // 409 against the UNIQUE COLLATE NOCASE constraint.
+    let lower_names: Vec<String> = body.names.iter().map(|n| n.to_lowercase()).collect();
+    let existing = match dao.find_persons_by_names_ci(&span_context, &lower_names) {
+        Ok(m) => m,
+        Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)),
+    };
+
+    for name in &body.names {
+        let trimmed = name.trim();
+        if trimmed.is_empty() {
+            skipped.push(BootstrapSkipped {
+                name: name.clone(),
+                reason: "empty name".into(),
+            });
+            continue;
+        }
+        let lower = trimmed.to_lowercase();
+        if existing.contains_key(&lower) {
+            skipped.push(BootstrapSkipped {
+                name: trimmed.to_string(),
+                reason: "person already exists".into(),
+            });
+            continue;
+        }
+        match dao.create_person(
+            &span_context,
+            &CreatePersonReq {
+                name: trimmed.to_string(),
+                notes: None,
+                entity_id: None,
+                is_ignored: false,
+            },
+            /*from_tag*/ true,
+        ) {
+            Ok(p) => created.push(p),
+            Err(e) => {
+                if is_unique_violation(&e) {
+                    // Race with a concurrent create; treat as skipped.
+                    skipped.push(BootstrapSkipped {
+                        name: trimmed.to_string(),
+                        reason: "person already exists".into(),
+                    });
+                } else {
+                    skipped.push(BootstrapSkipped {
+                        name: trimmed.to_string(),
+                        reason: format!("{:#}", e),
+                    });
+                }
+            }
+        }
+    }
+
+    HttpResponse::Ok().json(BootstrapPersonsResponse { created, skipped })
+}
+
+// ── Stats / list ────────────────────────────────────────────────────────────
+
+#[derive(Deserialize)]
+pub struct LibraryQuery {
+    pub library: Option<String>,
+}
+
+/// `GET /persons` query: optional library scope, optional include of
+/// the IGNORE/junk bucket. The bucket is hidden by default so the
+/// management UI shows only "real" persons; the persons-management
+/// screen requests it explicitly when it needs to surface ignored.
+#[derive(Deserialize)]
+pub struct ListPersonsQuery {
+    pub library: Option<String>,
+    #[serde(default)]
+    pub include_ignored: bool,
+}
+
+async fn stats_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    app_state: web::Data<AppState>,
+    query: web::Query<LibraryQuery>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("faces.stats", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+
+    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
+        .ok()
+        .flatten()
+        .map(|l| l.id);
+    let mut dao = face_dao.lock().expect("face dao lock");
+    dao.stats(&span_context, library_id)
+        .map(|s| {
+            span_context.span().set_status(Status::Ok);
+            HttpResponse::Ok().json(s)
+        })
+        .into_http_internal_err()
+}
+
+async fn list_faces_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    query: web::Query<ThumbnailRequest>,
+    app_state: web::Data<AppState>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("faces.list", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+
+    let normalized_path = normalize_path(&query.path);
+    // resolve_library_param returns Option<&Library>; clone so the result
+    // is owned (matching the primary_library fallback's type).
+    let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref())
+        .ok()
+        .flatten()
+        .cloned()
+        .unwrap_or_else(|| app_state.primary_library().clone());
+
+    let mut dao = face_dao.lock().expect("face dao lock");
+    let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
+        Ok(Some(h)) => h,
+        Ok(None) => {
+            // Photo not yet hashed — empty face list is a graceful answer.
+            // The carousel falls back to "no overlay" which is fine until
+            // the watcher catches up.
+            return HttpResponse::Ok().json(Vec::<FaceWithPerson>::new());
+        }
+        Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
+    };
+    match dao.list_for_content_hash(&span_context, &hash) {
+        Ok(faces) => HttpResponse::Ok().json(faces),
+        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
+    }
+}
+
+async fn embeddings_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    query: web::Query<EmbeddingsQuery>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("faces.embeddings", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+
+    let limit = query.limit.clamp(1, 5_000);
+    let offset = query.offset.max(0);
+    let mut dao = face_dao.lock().expect("face dao lock");
+    dao.list_embeddings(
+        &span_context,
+        query.library,
+        query.unassigned,
+        limit,
+        offset,
+    )
+    .map(|rows| {
+        let out: Vec<FaceEmbeddingRow> = rows
+            .into_iter()
+            .map(|(r, b64)| FaceEmbeddingRow {
+                id: r.id,
+                library_id: r.library_id,
+                rel_path: r.rel_path,
+                content_hash: r.content_hash,
+                person_id: r.person_id,
+                model_version: r.model_version,
+                embedding: b64,
+                bbox_x: r.bbox_x,
+                bbox_y: r.bbox_y,
+                bbox_w: r.bbox_w,
+                bbox_h: r.bbox_h,
+            })
+            .collect();
+        HttpResponse::Ok().json(out)
+    })
+    .into_http_internal_err()
+}
+
+// ── Manual face create / update / delete ────────────────────────────────────
+
+async fn create_face_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    body: web::Json<CreateFaceReq>,
+    app_state: web::Data<AppState>,
+    face_client: web::Data<FaceClient>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("faces.create_manual", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+
+    // The force path doesn't need Apollo at all (no embed call); the
+    // strict path does. Surface the disabled state only when we'd
+    // actually use the client.
+    if !body.force && !face_client.is_enabled() {
+        return HttpResponse::ServiceUnavailable().body("face client disabled");
+    }
+
+    let normalized_path = normalize_path(&body.path);
+    let library: Library = match libraries::resolve_library_param(
+        &app_state,
+        body.library.as_ref().map(|i| i.to_string()).as_deref(),
+    ) {
+        Ok(Some(lib)) => lib.clone(),
+        _ => app_state.primary_library().clone(),
+    };
+
+    // 1. Resolve content_hash for the photo.
+    let hash = {
+        let mut dao = face_dao.lock().expect("face dao lock");
+        match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
+            Ok(Some(h)) => h,
+            Ok(None) => {
+                return HttpResponse::Conflict()
+                    .body("photo not yet hashed; wait for next watcher pass");
+            }
+            Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
+        }
+    };
+
+    // 2 + 3. Crop + embed via Apollo (strict path), or skip both (force).
+    //
+    // Force is the "tag a face the detector can't see" path — back of
+    // head, heavily-occluded profile, etc. We store a zero-vector
+    // embedding under a sentinel model_version so the row participates
+    // only as a browse-by-person tag: clustering filters norm<=0 (see
+    // face_clustering._decode_b64_embedding) and auto-bind cosine
+    // resolves to 0 / NaN, never crossing the threshold. Cluster
+    // suggester also groups by model_version so this sentinel never
+    // mixes with real buffalo_l rows.
+    let (embedding_bytes, model_version, confidence) = if body.force {
+        info!(
+            "manual face (force): skipping detection for {:?} bbox=({},{},{},{})",
+            normalized_path, body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h
+        );
+        (vec![0u8; 2048], "manual_no_embed".to_string(), 0.0_f32)
+    } else {
+        let abs_path = library.resolve(&normalized_path);
+        let crop_bytes = match crop_image_to_bbox(
+            &abs_path,
+            body.bbox.x,
+            body.bbox.y,
+            body.bbox.w,
+            body.bbox.h,
+        ) {
+            Ok(b) => b,
+            Err(e) => {
+                warn!("crop_image_to_bbox failed for {:?}: {:?}", abs_path, e);
+                return HttpResponse::BadRequest().body(format!("cannot crop photo: {}", e));
+            }
+        };
+
+        let meta = DetectMeta {
+            content_hash: hash.clone(),
+            library_id: library.id,
+            rel_path: normalized_path.clone(),
+            orientation: None,
+            model_version: None,
+        };
+        let detect = match face_client.embed(crop_bytes, meta).await {
+            Ok(r) => r,
+            Err(FaceDetectError::Permanent(e)) => {
+                return HttpResponse::UnprocessableEntity().body(format!("{}", e));
+            }
+            Err(FaceDetectError::Transient(e)) => {
+                return HttpResponse::ServiceUnavailable().body(format!("{}", e));
+            }
+            Err(FaceDetectError::Disabled) => {
+                return HttpResponse::ServiceUnavailable().body("face client disabled");
+            }
+        };
+
+        let detected = match detect.faces.first() {
+            Some(f) => f.clone(),
+            None => {
+                // Apollo would have returned 422 on no_face_in_crop; defensive.
+                return HttpResponse::UnprocessableEntity().body("no face in crop");
+            }
+        };
+        let bytes = match detected.decode_embedding() {
+            Ok(b) => b,
+            Err(e) => {
+                warn!("manual face: decode embedding failed: {:?}", e);
+                return HttpResponse::BadGateway().body("invalid embedding from face service");
+            }
+        };
+        (bytes, detect.model_version, detected.confidence)
+    };
+
+    // 4. Insert the manual row using the bbox the user drew (NOT the
+    //    detector's tighter box around their drawing — they get what they
+    //    asked for; cluster matching uses the embedding which is from the
+    //    detector's true box anyway).
+    let mut dao = face_dao.lock().expect("face dao lock");
+    let row = match dao.store_detection(
+        &span_context,
+        InsertFaceDetectionInput {
+            library_id: library.id,
+            content_hash: hash,
+            rel_path: normalized_path,
+            bbox: Some((body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h)),
+            embedding: Some(embedding_bytes),
+            confidence: Some(confidence),
+            source: "manual".to_string(),
+            person_id: body.person_id,
+            status: "detected".to_string(),
+            model_version,
+        },
+    ) {
+        Ok(r) => r,
+        Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
+    };
+    info!(
+        "Created manual face id={} library={} hash={} person_id={:?}",
+        row.id, row.library_id, row.content_hash, row.person_id
+    );
+    match hydrate_face_with_person(&mut *dao, &span_context, row) {
+        Ok(joined) => HttpResponse::Created().json(joined),
+        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
+    }
+}
+
+async fn update_face_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    path: web::Path<i32>,
+    body: web::Json<UpdateFaceReq>,
+    app_state: web::Data<AppState>,
+    face_client: web::Data<FaceClient>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("faces.update", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+    let id = path.into_inner();
+
+    let person_patch: Option<Option<i32>> = if body.clear_person {
+        Some(None)
+    } else {
+        body.person_id.map(Some)
+    };
+    let bbox_patch = body.bbox.as_ref().map(|b| (b.x, b.y, b.w, b.h));
+
+    // Bbox change → re-embed. The embedding is what auto-bind and the
+    // cluster suggester key on, so leaving it stale would silently
+    // corrupt every downstream similarity match. We crop the new bbox,
+    // pass it through face_client.embed, and store the fresh vector.
+    // Net cost: one Apollo round-trip per bbox edit (~100-500ms on
+    // CPU); acceptable for a manual operator action.
+    let mut new_embedding: Option<Vec<u8>> = None;
+    if let Some((bx, by, bw, bh)) = bbox_patch {
+        if !face_client.is_enabled() {
+            return HttpResponse::ServiceUnavailable()
+                .body("face client disabled — bbox edit requires Apollo");
+        }
+        // Look up the current row so we know which photo to crop.
+        let current = {
+            let mut dao = face_dao.lock().expect("face dao lock");
+            match dao.get_face(&span_context, id) {
+                Ok(Some(r)) => r,
+                Ok(None) => return HttpResponse::NotFound().finish(),
+                Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
+            }
+        };
+        let library = match app_state.library_by_id(current.library_id) {
+            Some(l) => l.clone(),
+            None => {
+                return HttpResponse::InternalServerError().body(format!(
+                    "face row references unknown library_id {}",
+                    current.library_id
+                ));
+            }
+        };
+        let abs_path = library.resolve(&current.rel_path);
+        let crop_bytes = match crop_image_to_bbox(&abs_path, bx, by, bw, bh) {
+            Ok(b) => b,
+            Err(e) => {
+                warn!(
+                    "PATCH /image/faces/{}: crop failed for {:?}: {:?}",
+                    id, abs_path, e
+                );
+                return HttpResponse::BadRequest()
+                    .body(format!("cannot crop new bbox: {}", e));
+            }
+        };
+        let meta = DetectMeta {
+            content_hash: current.content_hash.clone(),
+            library_id: current.library_id,
+            rel_path: current.rel_path.clone(),
+            orientation: None,
+            model_version: Some(current.model_version.clone()),
+        };
+        // Soft contract on the re-embed: we'd LIKE a fresh ArcFace
+        // vector for the new crop, but the operator's bbox edit is
+        // sacred. If detection finds no face in the new region (they
+        // dragged the box slightly off-center, or moved it to a back-
+        // of-head shot they've already manually tagged), or returns a
+        // bad embedding, we keep the old embedding and apply the bbox
+        // anyway. Cost: stale embedding for that row, which slightly
+        // pollutes clustering for files re-detected against this
+        // person — accepted because dropping the user's drag is a
+        // worse UX. Transient failures (cuda_oom, engine unavailable)
+        // still 503 so the operator can retry once Apollo recovers.
+        match face_client.embed(crop_bytes, meta).await {
+            Ok(resp) => {
+                if let Some(face) = resp.faces.first() {
+                    match face.decode_embedding() {
+                        Ok(b) => new_embedding = Some(b),
+                        Err(e) => {
+                            warn!(
+                                "PATCH /image/faces/{}: bad embedding from face service ({:?}); keeping old embedding, bbox still applied",
+                                id, e
+                            );
+                        }
+                    }
+                } else {
+                    info!(
+                        "PATCH /image/faces/{}: no face detected in new bbox — keeping old embedding, bbox still applied",
+                        id
+                    );
+                }
+            }
+            Err(FaceDetectError::Permanent(e)) => {
+                info!(
+                    "PATCH /image/faces/{}: embed permanent error ({}); keeping old embedding, bbox still applied",
+                    id, e
+                );
+            }
+            Err(FaceDetectError::Transient(e)) => {
+                return HttpResponse::ServiceUnavailable().body(format!("{}", e));
+            }
+            Err(FaceDetectError::Disabled) => {
+                return HttpResponse::ServiceUnavailable()
+                    .body("face client disabled mid-flight");
+            }
+        }
+    }
+
+    let mut dao = face_dao.lock().expect("face dao lock");
+    let row = match dao.update_face(&span_context, id, person_patch, bbox_patch, new_embedding) {
+        Ok(r) => r,
+        Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
+    };
+    // Hydrate person_name so the response shape matches GET /image/faces
+    // — the carousel overlay does an optimistic replace on this row, and
+    // a bare FaceDetectionRow with no person_name would visibly drop the
+    // VFD label off the bbox even though the assignment didn't change.
+    match hydrate_face_with_person(&mut *dao, &span_context, row) {
+        Ok(joined) => HttpResponse::Ok().json(joined),
+        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
+    }
+}
+
+async fn delete_face_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    path: web::Path<i32>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("faces.delete", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+
+    let mut dao = face_dao.lock().expect("face dao lock");
+    match dao.delete_face(&span_context, path.into_inner()) {
+        Ok(true) => HttpResponse::NoContent().finish(),
+        Ok(false) => HttpResponse::NotFound().finish(),
+        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
+    }
+}
+
+// ── Persons ─────────────────────────────────────────────────────────────────
+
+async fn list_persons_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    app_state: web::Data<AppState>,
+    query: web::Query<ListPersonsQuery>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("persons.list", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+
+    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
+        .ok()
+        .flatten()
+        .map(|l| l.id);
+    let mut dao = face_dao.lock().expect("face dao lock");
+    dao.list_persons(&span_context, library_id, query.include_ignored)
+        .map(|p| HttpResponse::Ok().json(p))
+        .into_http_internal_err()
+}
+
+async fn ignore_bucket_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("persons.ignore_bucket", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+    let mut dao = face_dao.lock().expect("face dao lock");
+    dao.get_or_create_ignored_person(&span_context)
+        .map(|p| HttpResponse::Ok().json(p))
+        .into_http_internal_err()
+}
+
+async fn create_person_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    body: web::Json<CreatePersonReq>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("persons.create", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+    if body.name.trim().is_empty() {
+        return HttpResponse::BadRequest().body("name required");
+    }
+
+    let mut dao = face_dao.lock().expect("face dao lock");
+    match dao.create_person(&span_context, &body, /*from_tag*/ false) {
+        Ok(p) => HttpResponse::Created().json(p),
+        Err(e) => {
+            // SQLite UNIQUE(name COLLATE NOCASE) → 409 Conflict so the UI
+            // can show "name already exists" without parsing. Use {:#} to
+            // include the source chain — anyhow's plain Display only shows
+            // the outermost context ("insert person ...") which hides the
+            // diesel "UNIQUE constraint failed" we're keying on.
+            if is_unique_violation(&e) {
+                HttpResponse::Conflict().body("person name already exists")
+            } else {
+                HttpResponse::InternalServerError().body(format!("{:#}", e))
+            }
+        }
+    }
+}
+
+async fn get_person_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    path: web::Path<i32>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("persons.get", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+
+    let mut dao = face_dao.lock().expect("face dao lock");
+    match dao.get_person(&span_context, path.into_inner()) {
+        Ok(Some(p)) => HttpResponse::Ok().json(p),
+        Ok(None) => HttpResponse::NotFound().finish(),
+        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
+    }
+}
+
+async fn update_person_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    path: web::Path<i32>,
+    body: web::Json<UpdatePersonReq>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("persons.update", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+    let mut dao = face_dao.lock().expect("face dao lock");
+    match dao.update_person(&span_context, path.into_inner(), &body) {
+        Ok(p) => HttpResponse::Ok().json(p),
+        Err(e) => {
+            if is_unique_violation(&e) {
+                HttpResponse::Conflict().body("person name already exists")
+            } else {
+                HttpResponse::InternalServerError().body(format!("{:#}", e))
+            }
+        }
+    }
+}
+
+async fn delete_person_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    path: web::Path<i32>,
+    query: web::Query<DeletePersonQuery>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("persons.delete", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+    // Default cascade=set_null — don't destroy face history just because
+    // the user renamed/removed the identity.
+    let cascade = matches!(query.cascade.as_deref(), Some("delete"));
+    let mut dao = face_dao.lock().expect("face dao lock");
+    match dao.delete_person(&span_context, path.into_inner(), cascade) {
+        Ok(true) => HttpResponse::NoContent().finish(),
+        Ok(false) => HttpResponse::NotFound().finish(),
+        Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
+    }
+}
+
+async fn merge_persons_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    path: web::Path<i32>,
+    body: web::Json<MergePersonsReq>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("persons.merge", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+    let src = path.into_inner();
+    let mut dao = face_dao.lock().expect("face dao lock");
+    match dao.merge_persons(&span_context, src, body.into) {
+        Ok(p) => HttpResponse::Ok().json(p),
+        Err(e) => {
+            let msg = format!("{:#}", e);
+            if msg.contains("itself") {
+                HttpResponse::BadRequest().body(msg)
+            } else {
+                HttpResponse::InternalServerError().body(msg)
+            }
+        }
+    }
+}
+
+async fn person_faces_handler<D: FaceDao>(
+    _: Claims,
+    request: HttpRequest,
+    path: web::Path<i32>,
+    app_state: web::Data<AppState>,
+    query: web::Query<LibraryQuery>,
+    face_dao: web::Data<Mutex<D>>,
+) -> impl Responder {
+    let context = extract_context_from_request(&request);
+    let span = global_tracer().start_with_context("persons.faces", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
+        .ok()
+        .flatten()
+        .map(|l| l.id);
+    let mut dao = face_dao.lock().expect("face dao lock");
+    dao.list_for_person(&span_context, path.into_inner(), library_id)
+        .map(|faces| HttpResponse::Ok().json(faces))
+        .into_http_internal_err()
+}
+
+// ── Helpers ─────────────────────────────────────────────────────────────────
+
+/// Crop `abs_path` to the normalized bbox and re-encode as JPEG for the
+/// face service. `image::open` decodes most photo formats Apollo will see;
+/// HEIC/RAW are out of scope for the manual flow (the user can't draw a
+/// face on a thumbnail of a non-decodable file anyway).
+fn crop_image_to_bbox(
+    abs_path: &std::path::Path,
+    nx: f32,
+    ny: f32,
+    nw: f32,
+    nh: f32,
+) -> anyhow::Result<Vec<u8>> {
+    if !(0.0..=1.0).contains(&nx) || !(0.0..=1.0).contains(&ny) {
+        return Err(anyhow!("bbox xy out of [0,1]"));
+    }
+    if nw <= 0.0 || nh <= 0.0 || nx + nw > 1.001 || ny + nh > 1.001 {
+        return Err(anyhow!("bbox wh out of bounds or zero"));
+    }
+    let raw = image::open(abs_path).with_context(|| format!("open {:?}", abs_path))?;
+    // EXIF rotation: the bbox arrives in display space (the carousel /
+    // overlay are rendered post-rotation by the browser), but the
+    // `image` crate hands us raw pre-rotation pixels. For any phone
+    // photo with Orientation 6/8/etc., applying the bbox without
+    // rotating first lands the crop on a completely different region
+    // of the image — which is why manually-drawn bboxes basically
+    // never resolved a face on re-detection. Apply the orientation
+    // first, then index into the canonical-oriented dims. Photos with
+    // no EXIF rotation tag pay nothing (apply_orientation is a no-op).
+    let orientation = exif::read_orientation(abs_path).unwrap_or(1);
+    let img = exif::apply_orientation(raw, orientation);
+    let (w, h) = img.dimensions();
+    let px = (nx * w as f32).round().clamp(0.0, w as f32 - 1.0) as u32;
+    let py = (ny * h as f32).round().clamp(0.0, h as f32 - 1.0) as u32;
+    let pw = ((nw * w as f32).round() as u32).min(w.saturating_sub(px));
+    let ph = ((nh * h as f32).round() as u32).min(h.saturating_sub(py));
+    if pw == 0 || ph == 0 {
+        return Err(anyhow!("crop produced zero-dim image"));
+    }
+    // Generous padding so RetinaFace has anchor-friendly context.
+    // Insightface internally resizes to det_size=640 (square). A
+    // tightly-drawn 200×250 face bbox + 10 % padding becomes ~240×300,
+    // which after resize fills ~95 % of the input — near the upper
+    // edge of RetinaFace's anchor scales, where it routinely returns
+    // zero detections. Padding to 50 % on each side makes the crop
+    // 2× the bbox dims (face occupies ~50 % of the input), where
+    // anchors hit cleanly. Bbox is clamped to image bounds, so
+    // edge-of-image bboxes just get less padding on the clipped side.
+    let pad_x = (pw / 2).max(1);
+    let pad_y = (ph / 2).max(1);
+    let cx = px.saturating_sub(pad_x);
+    let cy = py.saturating_sub(pad_y);
+    let cw = (pw + 2 * pad_x).min(w - cx);
+    let ch = (ph + 2 * pad_y).min(h - cy);
+    let cropped = img.crop_imm(cx, cy, cw, ch);
+    let mut out = std::io::Cursor::new(Vec::new());
+    cropped
+        .write_to(&mut out, image::ImageFormat::Jpeg)
+        .with_context(|| "encode crop as JPEG")?;
+    Ok(out.into_inner())
+}
+
+/// Returns true if `err` (or anything in its source chain) is a SQLite
+/// `UNIQUE constraint failed`. Walks the chain so callers don't have to
+/// know the wrapping order — anyhow `with_context` plus diesel's own
+/// error layering buries the database error two levels deep.
+///
+/// String matching on `format!("{:#}", e)` would also work but is
+/// fragile (locale-dependent SQLite messages, false positives like
+/// "uniquely identifies"). Downcasting to the actual diesel kind is
+/// the contract-stable check.
+fn is_unique_violation(err: &anyhow::Error) -> bool {
+    use diesel::result::{DatabaseErrorKind, Error as DieselError};
+    err.chain().any(|cause| {
+        cause.downcast_ref::<DieselError>().is_some_and(|de| {
+            matches!(
+                de,
+                DieselError::DatabaseError(DatabaseErrorKind::UniqueViolation, _)
+            )
+        })
+    })
+}
+
+// ── Tests ───────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::database::test::in_memory_db_connection;
+
+    fn fresh_dao() -> SqliteFaceDao {
+        SqliteFaceDao::from_connection(Arc::new(Mutex::new(in_memory_db_connection())))
+    }
+
+    fn ctx() -> opentelemetry::Context {
+        opentelemetry::Context::current()
+    }
+
+    #[test]
+    fn is_unique_violation_walks_chain() {
+        // The bug we hit in manual testing: anyhow's plain Display only
+        // shows the outermost context ("insert person Cameron"), so a
+        // naive `format!("{}", e).contains("unique")` check misses the
+        // diesel UNIQUE error nested below. Downcasting the source chain
+        // is the stable contract.
+        let mut dao = fresh_dao();
+        let _ = dao
+            .create_person(
+                &ctx(),
+                &CreatePersonReq {
+                    name: "Cameron".into(),
+                    notes: None,
+                    entity_id: None,
+                    is_ignored: false,
+                },
+                false,
+            )
+            .expect("first insert");
+        let dup_err = dao
+            .create_person(
+                &ctx(),
+                &CreatePersonReq {
+                    name: "Cameron".into(),
+                    notes: None,
+                    entity_id: None,
+                    is_ignored: false,
+                },
+                false,
+            )
+            .expect_err("second insert must fail");
+
+        // Plain Display hides the UNIQUE — that's the bug we're guarding
+        // against. We don't assert a specific outer message; we just
+        // confirm string-matching at the top level is unreliable.
+        let plain = format!("{}", dup_err);
+        assert!(
+            !plain.to_lowercase().contains("unique"),
+            "if Display starts surfacing UNIQUE we can drop the helper, but \
+             today it doesn't and the handler must downcast"
+        );
+
+        // Alt-Display walks the chain — useful for debug body content too.
+        let chained = format!("{:#}", dup_err);
+        assert!(
+            chained.to_uppercase().contains("UNIQUE"),
+            "chained display must surface the diesel error: {chained}"
+        );
+
+        // The contract-stable check the handler actually uses.
+        assert!(
+            is_unique_violation(&dup_err),
+            "is_unique_violation must downcast into the diesel chain"
+        );
+    }
+
+    // ── Phase 4: bootstrap heuristic + cosine + DAO support ─────────────
+
+    #[test]
+    fn is_plausible_name_token_filters_short_and_emoji() {
+        // Hard filter applied before grouping — emojis and tags shorter
+        // than 3 chars never make it into the candidate list, regardless
+        // of looks_like_person's later assessment.
+        assert!(is_plausible_name_token("Cameron"));
+        assert!(is_plausible_name_token("Sarah Smith"));
+        assert!(is_plausible_name_token("O'Brien"));
+        assert!(is_plausible_name_token("Jean-Luc"));
+        assert!(is_plausible_name_token("St. James"));
+        assert!(is_plausible_name_token("Renée"));
+        assert!(is_plausible_name_token("José"));
+        // Asian script names — the alphabetic/letter check covers any
+        // script, not just Latin.
+        assert!(is_plausible_name_token("田中太郎"));
+
+        // Below the 3-character floor.
+        assert!(!is_plausible_name_token(""));
+        assert!(!is_plausible_name_token(" "));
+        assert!(!is_plausible_name_token("Bo"));
+        assert!(!is_plausible_name_token("AB"));
+        // Trim before counting — surrounding whitespace doesn't count.
+        assert!(!is_plausible_name_token("  AB  "));
+
+        // Emoji / symbol classes get the whole tag dropped.
+        assert!(!is_plausible_name_token("🐱cat"));
+        assert!(!is_plausible_name_token("Heart ❤"));
+        assert!(!is_plausible_name_token("📸Photo"));
+        assert!(!is_plausible_name_token("→ Trip"));
+        assert!(!is_plausible_name_token("★Vacation"));
+
+        // Digits are kept (handled by looks_like_person, not here).
+        assert!(is_plausible_name_token("Trip 2018"));
+        assert!(is_plausible_name_token("2024"));
+    }
+
+    #[test]
+    fn looks_like_person_accepts_typical_names() {
+        assert!(looks_like_person("Cameron"));
+        assert!(looks_like_person("Sarah Smith"));
+        assert!(looks_like_person("Mary Jane"));
+        // Non-ASCII title-cased single word still counts.
+        assert!(looks_like_person("Renée"));
+    }
+
+    #[test]
+    fn looks_like_person_rejects_obvious_non_people() {
+        // Digits, lowercase, three-or-more words, denylist hits.
+        assert!(!looks_like_person("2018"));
+        assert!(!looks_like_person("Trip 2018"));
+        assert!(!looks_like_person("trip"));
+        assert!(!looks_like_person("Birthday Party Cake"));
+        assert!(!looks_like_person("cat"));
+        assert!(!looks_like_person("Cat")); // denied even when title-cased
+        assert!(!looks_like_person("Christmas"));
+        assert!(!looks_like_person("home"));
+        assert!(!looks_like_person(""));
+        assert!(!looks_like_person("   "));
+    }
+
+    #[test]
+    fn looks_like_person_two_words_skips_denylist() {
+        // Two-word names get a pass on the single-word denylist —
+        // "Sunset Walk" is much more likely a real album than a person,
+        // but false-accepting is fine because the operator confirms.
+        // What matters is we don't false-reject "Sarah Smith".
+        assert!(looks_like_person("Sunset Walk"));
+        assert!(looks_like_person("Sarah Smith"));
+    }
+
+    #[test]
+    fn cosine_similarity_known_vectors() {
+        // Identical vectors → 1.0; orthogonal → 0.0; opposite → -1.0.
+        let a = vec![1.0, 0.0, 0.0];
+        let b = vec![1.0, 0.0, 0.0];
+        let c = vec![0.0, 1.0, 0.0];
+        let d = vec![-1.0, 0.0, 0.0];
+        assert!((cosine_similarity(&a, &b) - 1.0).abs() < 1e-6);
+        assert!(cosine_similarity(&a, &c).abs() < 1e-6);
+        assert!((cosine_similarity(&a, &d) - (-1.0)).abs() < 1e-6);
+        // Mismatched length → 0.0 (defensive, not NaN).
+        assert_eq!(cosine_similarity(&a, &[1.0, 0.0]), 0.0);
+        // Empty input → 0.0.
+        assert_eq!(cosine_similarity(&[], &[]), 0.0);
+        // Zero vector → 0.0 (denominator guard, not NaN).
+        let zero = vec![0.0, 0.0, 0.0];
+        assert_eq!(cosine_similarity(&a, &zero), 0.0);
+    }
+
+    #[test]
+    fn decode_embedding_bytes_round_trip() {
+        // 512×f32 LE = 2048 bytes. Anything else returns None.
+        let v: Vec<f32> = (0..512).map(|i| i as f32 * 0.001).collect();
+        let mut bytes = Vec::with_capacity(2048);
+        for f in &v {
+            bytes.extend_from_slice(&f.to_le_bytes());
+        }
+        let decoded = decode_embedding_bytes(&bytes).expect("decode");
+        assert_eq!(decoded.len(), 512);
+        for (a, b) in v.iter().zip(decoded.iter()) {
+            assert!((a - b).abs() < 1e-9);
+        }
+        assert_eq!(decode_embedding_bytes(&[0u8; 100]), None);
+        assert_eq!(decode_embedding_bytes(&[0u8; 4096]), None);
+    }
+
+    #[test]
+    fn find_persons_by_names_ci_groups_case() {
+        let mut dao = fresh_dao();
+        let _ = dao
+            .create_person(
+                &ctx(),
+                &CreatePersonReq {
+                    name: "Alice".into(),
+                    notes: None,
+                    entity_id: None,
+                    is_ignored: false,
+                },
+                false,
+            )
+            .unwrap();
+        let _ = dao
+            .create_person(
+                &ctx(),
+                &CreatePersonReq {
+                    name: "Bob".into(),
+                    notes: None,
+                    entity_id: None,
+                    is_ignored: false,
+                },
+                false,
+            )
+            .unwrap();
+
+        // Mix of cases + a name that has no person row.
+        let m = dao
+            .find_persons_by_names_ci(&ctx(), &["alice".into(), "BOB".into(), "charlie".into()])
+            .expect("lookup");
+        assert!(m.contains_key("alice"));
+        assert!(m.contains_key("bob"));
+        assert!(!m.contains_key("charlie"));
+        // Empty input is a no-op (don't fire a SQL with zero binds).
+        assert!(
+            dao.find_persons_by_names_ci(&ctx(), &[])
+                .unwrap()
+                .is_empty()
+        );
+    }
+
+    #[test]
+    fn person_reference_embedding_filters_by_model_version() {
+        // A person with embeddings from buffalo_l shouldn't have its
+        // reference contaminated by a future buffalo_xl row. The auto-
+        // bind path passes the candidate's model_version so old rows
+        // never reach the average.
+        let mut dao = fresh_dao();
+        diesel::sql_query(
+            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
+             VALUES (1, 'main', '/tmp', 0)",
+        )
+        .execute(dao.connection.lock().unwrap().deref_mut())
+        .expect("seed libraries");
+        let p = dao
+            .create_person(
+                &ctx(),
+                &CreatePersonReq {
+                    name: "Subject".into(),
+                    notes: None,
+                    entity_id: None,
+                    is_ignored: false,
+                },
+                false,
+            )
+            .unwrap();
+
+        // 512-d unit vector along axis 0, written for buffalo_l.
+        let mut emb_l: Vec<f32> = vec![0.0; 512];
+        emb_l[0] = 1.0;
+        let mut emb_l_bytes = Vec::with_capacity(2048);
+        for f in &emb_l {
+            emb_l_bytes.extend_from_slice(&f.to_le_bytes());
+        }
+        // 512-d unit vector along axis 1, written for some-other model.
+        let mut emb_xl: Vec<f32> = vec![0.0; 512];
+        emb_xl[1] = 1.0;
+        let mut emb_xl_bytes = Vec::with_capacity(2048);
+        for f in &emb_xl {
+            emb_xl_bytes.extend_from_slice(&f.to_le_bytes());
+        }
+
+        for (bytes, mv) in [(emb_l_bytes, "buffalo_l"), (emb_xl_bytes, "buffalo_xl")] {
+            let _ = dao
+                .store_detection(
+                    &ctx(),
+                    InsertFaceDetectionInput {
+                        library_id: 1,
+                        content_hash: format!("h-{mv}"),
+                        rel_path: format!("p-{mv}.jpg"),
+                        bbox: Some((0.1, 0.1, 0.2, 0.2)),
+                        embedding: Some(bytes),
+                        confidence: Some(0.9),
+                        source: "auto".into(),
+                        person_id: Some(p.id),
+                        status: "detected".into(),
+                        model_version: mv.into(),
+                    },
+                )
+                .unwrap();
+        }
+
+        let ref_l = dao
+            .person_reference_embedding(&ctx(), p.id, "buffalo_l")
+            .unwrap()
+            .expect("buffalo_l ref");
+        // Reference for buffalo_l should match emb_l (axis-0 unit).
+        assert!((ref_l[0] - 1.0).abs() < 1e-5, "axis 0 should be ~1.0");
+        assert!(ref_l[1].abs() < 1e-5, "axis 1 should be ~0.0");
+
+        // Unknown model_version → None, not a cross-version average.
+        assert!(
+            dao.person_reference_embedding(&ctx(), p.id, "buffalo_xxxl")
+                .unwrap()
+                .is_none()
+        );
+    }
+
+    #[test]
+    fn assign_face_to_person_sets_cover_when_unset() {
+        let mut dao = fresh_dao();
+        diesel::sql_query(
+            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
+             VALUES (1, 'main', '/tmp', 0)",
+        )
+        .execute(dao.connection.lock().unwrap().deref_mut())
+        .expect("seed libraries");
+        let p = dao
+            .create_person(
+                &ctx(),
+                &CreatePersonReq {
+                    name: "Cover".into(),
+                    notes: None,
+                    entity_id: None,
+                    is_ignored: false,
+                },
+                false,
+            )
+            .unwrap();
+        assert!(p.cover_face_id.is_none());
+
+        // Insert two faces unbound.
+        let face1 = dao
+            .store_detection(
+                &ctx(),
+                InsertFaceDetectionInput {
+                    library_id: 1,
+                    content_hash: "h1".into(),
+                    rel_path: "p1.jpg".into(),
+                    bbox: Some((0.1, 0.1, 0.2, 0.2)),
+                    embedding: Some(vec![0u8; 2048]),
+                    confidence: Some(0.9),
+                    source: "auto".into(),
+                    person_id: None,
+                    status: "detected".into(),
+                    model_version: "buffalo_l".into(),
+                },
+            )
+            .unwrap();
+        let face2 = dao
+            .store_detection(
+                &ctx(),
+                InsertFaceDetectionInput {
+                    library_id: 1,
+                    content_hash: "h2".into(),
+                    rel_path: "p2.jpg".into(),
+                    bbox: Some((0.1, 0.1, 0.2, 0.2)),
+                    embedding: Some(vec![0u8; 2048]),
+                    confidence: Some(0.9),
+                    source: "auto".into(),
+                    person_id: None,
+                    status: "detected".into(),
+                    model_version: "buffalo_l".into(),
+                },
+            )
+            .unwrap();
+
+        // First assignment claims the cover.
+        dao.assign_face_to_person(&ctx(), face1.id, p.id).unwrap();
+        let p_after_first = dao.get_person(&ctx(), p.id).unwrap().unwrap();
+        assert_eq!(p_after_first.cover_face_id, Some(face1.id));
+
+        // Second assignment must NOT overwrite — operator may have
+        // hand-picked the cover after the first auto-bind.
+        dao.assign_face_to_person(&ctx(), face2.id, p.id).unwrap();
+        let p_after_second = dao.get_person(&ctx(), p.id).unwrap().unwrap();
+        assert_eq!(
+            p_after_second.cover_face_id,
+            Some(face1.id),
+            "cover must remain face1 after second auto-bind"
+        );
+    }
+
+    #[test]
+    fn person_crud_roundtrip() {
+        let mut dao = fresh_dao();
+        let p = dao
+            .create_person(
+                &ctx(),
+                &CreatePersonReq {
+                    name: "Alice".into(),
+                    notes: Some("the boss".into()),
+                    entity_id: None,
+                    is_ignored: false,
+                },
+                false,
+            )
+            .expect("create person");
+        assert_eq!(p.name, "Alice");
+        assert_eq!(p.notes.as_deref(), Some("the boss"));
+        assert!(!p.created_from_tag);
+
+        // Case-insensitive uniqueness — second create with same name in
+        // different case must fail with a UNIQUE violation, surfacing
+        // as 409 Conflict at the handler layer.
+        let dup = dao.create_person(
+            &ctx(),
+            &CreatePersonReq {
+                name: "alice".into(),
+                notes: None,
+                entity_id: None,
+                is_ignored: false,
+            },
+            false,
+        );
+        assert!(dup.is_err(), "case-insensitive UNIQUE must reject 'alice'");
+
+        // Update notes; verify updated_at moves forward.
+        let prev_updated = p.updated_at;
+        std::thread::sleep(std::time::Duration::from_millis(1100)); // boundary cross
+        let updated = dao
+            .update_person(
+                &ctx(),
+                p.id,
+                &UpdatePersonReq {
+                    name: None,
+                    notes: Some("a new note".into()),
+                    cover_face_id: None,
+                    entity_id: None,
+                    is_ignored: None,
+                },
+            )
+            .expect("update");
+        assert_eq!(updated.notes.as_deref(), Some("a new note"));
+        assert!(updated.updated_at >= prev_updated);
+
+        // List + delete.
+        let listed = dao.list_persons(&ctx(), None, false).expect("list");
+        assert_eq!(listed.len(), 1);
+        assert_eq!(listed[0].face_count, 0);
+        assert!(dao.delete_person(&ctx(), p.id, false).expect("delete"));
+        assert!(
+            dao.list_persons(&ctx(), None, false)
+                .expect("list")
+                .is_empty()
+        );
+    }
+
+    #[test]
+    fn ignore_bucket_idempotent_and_filters_auto_bind() {
+        // First call creates the bucket; second returns the same row.
+        // Once it exists, find_persons_by_names_ci must skip it even if
+        // the search term matches its name — the auto-bind path must
+        // NEVER target the IGNORE/junk bucket.
+        let mut dao = fresh_dao();
+        let first = dao
+            .get_or_create_ignored_person(&ctx())
+            .expect("create bucket");
+        assert!(first.is_ignored);
+        let second = dao
+            .get_or_create_ignored_person(&ctx())
+            .expect("re-fetch bucket");
+        assert_eq!(first.id, second.id, "bucket must be idempotent");
+
+        // Searching by the bucket's name must return nothing — the
+        // auto-bind look-up filters is_ignored=true.
+        let m = dao
+            .find_persons_by_names_ci(&ctx(), &["ignored".into()])
+            .expect("name lookup");
+        assert!(
+            !m.contains_key("ignored"),
+            "find_persons_by_names_ci must skip the ignore bucket: {m:?}"
+        );
+
+        // Default list_persons hides it; include_ignored=true surfaces it.
+        let visible = dao.list_persons(&ctx(), None, false).expect("list");
+        assert!(visible.iter().all(|p| !p.is_ignored));
+        let all = dao.list_persons(&ctx(), None, true).expect("list all");
+        assert!(all.iter().any(|p| p.is_ignored && p.id == first.id));
+    }
+
+    #[test]
+    fn marker_rows_idempotent() {
+        let mut dao = fresh_dao();
+        // Need a libraries row to satisfy face_detections.library_id FK
+        // without DEFERRED — SQLite enforces FK immediately by default.
+        // The :memory: DB already has the libraries seed via
+        // seed_or_patch_from_env? No — in_memory_db_connection just runs
+        // migrations; the libraries seed is a runtime path. Insert one
+        // manually for the test.
+        // Migrations may seed libraries(id=1); INSERT OR IGNORE keeps the
+        // test runnable either way.
+        diesel::sql_query(
+            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
+             VALUES (1, 'main', '/tmp', 0)",
+        )
+        .execute(dao.connection.lock().unwrap().deref_mut())
+        .expect("seed libraries");
+
+        // Marker insert.
+        dao.mark_status(&ctx(), 1, "abc123", "x.jpg", "no_faces", "buffalo_l")
+            .expect("first mark");
+        assert!(
+            dao.already_scanned(&ctx(), "abc123").expect("scan"),
+            "already_scanned should report true after marker"
+        );
+
+        // Second mark for the same hash is a no-op (the partial UNIQUE
+        // index would otherwise reject; the DAO short-circuits before the
+        // insert).
+        dao.mark_status(&ctx(), 1, "abc123", "x.jpg", "no_faces", "buffalo_l")
+            .expect("second mark idempotent");
+
+        // Stats reflect the no_faces marker.
+        let stats = dao.stats(&ctx(), Some(1)).expect("stats");
+        assert_eq!(stats.no_faces, 1);
+        assert_eq!(stats.scanned, 1);
+        assert_eq!(stats.with_faces, 0);
+    }
+
+    #[test]
+    fn merge_persons_repoints_faces() {
+        let mut dao = fresh_dao();
+        // Migrations may seed libraries(id=1); INSERT OR IGNORE keeps the
+        // test runnable either way.
+        diesel::sql_query(
+            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
+             VALUES (1, 'main', '/tmp', 0)",
+        )
+        .execute(dao.connection.lock().unwrap().deref_mut())
+        .expect("seed libraries");
+
+        let alice = dao
+            .create_person(
+                &ctx(),
+                &CreatePersonReq {
+                    name: "Alice".into(),
+                    notes: None,
+                    entity_id: None,
+                    is_ignored: false,
+                },
+                false,
+            )
+            .unwrap();
+        let alyse = dao
+            .create_person(
+                &ctx(),
+                &CreatePersonReq {
+                    name: "Alyse".into(),
+                    notes: Some("dup of alice".into()),
+                    entity_id: None,
+                    is_ignored: false,
+                },
+                false,
+            )
+            .unwrap();
+
+        // Insert a detected face row owned by `alyse`.
+        let _ = dao
+            .store_detection(
+                &ctx(),
+                InsertFaceDetectionInput {
+                    library_id: 1,
+                    content_hash: "h1".into(),
+                    rel_path: "p1.jpg".into(),
+                    bbox: Some((0.1, 0.1, 0.2, 0.2)),
+                    embedding: Some(vec![0u8; 2048]),
+                    confidence: Some(0.9),
+                    source: "auto".into(),
+                    person_id: Some(alyse.id),
+                    status: "detected".into(),
+                    model_version: "buffalo_l".into(),
+                },
+            )
+            .unwrap();
+
+        // Merge alyse → alice. Notes from src copy when target empty.
+        let merged = dao.merge_persons(&ctx(), alyse.id, alice.id).unwrap();
+        assert_eq!(merged.id, alice.id);
+        assert_eq!(merged.notes.as_deref(), Some("dup of alice"));
+
+        // alyse is gone.
+        assert!(dao.get_person(&ctx(), alyse.id).unwrap().is_none());
+
+        // The face is now alice's.
+        let faces = dao.list_for_person(&ctx(), alice.id, Some(1)).unwrap();
+        assert_eq!(faces.len(), 1);
+        assert_eq!(faces[0].person_id, Some(alice.id));
+    }
+
+    // ── crop_image_to_bbox ──────────────────────────────────────────────
+    // Pure helper used by the manual face-create handler. Generate a tiny
+    // image in memory, write it to a temp file, then exercise the bbox
+    // validation + crop math.
+
+    fn write_solid_image(w: u32, h: u32) -> tempfile::NamedTempFile {
+        let mut img = image::RgbImage::new(w, h);
+        for p in img.pixels_mut() {
+            *p = image::Rgb([200, 200, 200]);
+        }
+        let f = tempfile::Builder::new()
+            .suffix(".jpg")
+            .tempfile()
+            .expect("tempfile");
+        image::DynamicImage::ImageRgb8(img)
+            .save(f.path())
+            .expect("save jpg");
+        f
+    }
+
+    #[test]
+    fn crop_rejects_invalid_bbox() {
+        let f = write_solid_image(64, 64);
+        // x out of [0,1]
+        assert!(crop_image_to_bbox(f.path(), -0.1, 0.0, 0.5, 0.5).is_err());
+        assert!(crop_image_to_bbox(f.path(), 1.5, 0.0, 0.5, 0.5).is_err());
+        // zero / negative dimensions
+        assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.0, 0.5).is_err());
+        assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.5, -0.1).is_err());
+        // overflows the image
+        assert!(crop_image_to_bbox(f.path(), 0.7, 0.0, 0.5, 0.5).is_err());
+    }
+
+    #[test]
+    fn crop_returns_decodable_jpeg() {
+        let f = write_solid_image(200, 200);
+        let bytes = crop_image_to_bbox(f.path(), 0.25, 0.25, 0.5, 0.5).expect("center crop");
+        // Re-decode to confirm the pipeline produced a valid JPEG. Exact
+        // dimensions depend on the 10% padding clamp, so just assert
+        // sanity bounds rather than pinning numbers (padding math can
+        // legitimately drift if we tweak the heuristic later).
+        let img = image::load_from_memory(&bytes).expect("decode crop");
+        let (w, h) = (img.width(), img.height());
+        assert!((80..=200).contains(&w), "unexpected crop width: {w}");
+        assert!((80..=200).contains(&h), "unexpected crop height: {h}");
+    }
+
+    #[test]
+    fn crop_padding_clamps_to_image_bounds() {
+        // A bbox right at the corner should pad inward as far as it can,
+        // never outside the image — otherwise we'd pass invalid coords
+        // to the embedding service.
+        let f = write_solid_image(100, 100);
+        let bytes = crop_image_to_bbox(f.path(), 0.9, 0.9, 0.1, 0.1).expect("corner crop");
+        let img = image::load_from_memory(&bytes).expect("decode corner crop");
+        // Padded crop must fit within the source's 100x100.
+        assert!(img.width() <= 100);
+        assert!(img.height() <= 100);
+        assert!(img.width() > 0 && img.height() > 0);
+    }
+
+    // ── hydrate_face_with_person — PATCH/POST /image/faces response shape ──
+
+    fn seed_library_and_face(dao: &mut SqliteFaceDao, person_id: Option<i32>) -> FaceDetectionRow {
+        diesel::sql_query(
+            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
+             VALUES (1, 'main', '/tmp', 0)",
+        )
+        .execute(dao.connection.lock().unwrap().deref_mut())
+        .expect("seed libraries");
+        dao.store_detection(
+            &ctx(),
+            InsertFaceDetectionInput {
+                library_id: 1,
+                content_hash: "h-hydrate".into(),
+                rel_path: "p.jpg".into(),
+                bbox: Some((0.1, 0.2, 0.3, 0.4)),
+                embedding: Some(vec![0u8; 2048]),
+                confidence: Some(0.9),
+                source: "manual".into(),
+                person_id,
+                status: "detected".into(),
+                model_version: "buffalo_l".into(),
+            },
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn hydrate_face_carries_person_name_when_assigned() {
+        // Regression guard for the bug where PATCH /image/faces/{id}
+        // returned a bare FaceDetectionRow (no person_name), causing
+        // the carousel overlay's optimistic replace to drop the VFD
+        // label off the bbox after every save. The handler hydrates
+        // via this helper; if anyone refactors the helper to skip the
+        // persons join, this test fails.
+        let mut dao = fresh_dao();
+        let p = dao
+            .create_person(
+                &ctx(),
+                &CreatePersonReq {
+                    name: "Alice".into(),
+                    notes: None,
+                    entity_id: None,
+                    is_ignored: false,
+                },
+                false,
+            )
+            .unwrap();
+        let row = seed_library_and_face(&mut dao, Some(p.id));
+        let joined =
+            hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate assigned");
+        assert_eq!(joined.person_id, Some(p.id));
+        assert_eq!(joined.person_name.as_deref(), Some("Alice"));
+        // Bbox + confidence + source must round-trip — these are what
+        // the optimistic-replace also keys on.
+        assert!((joined.bbox_x - 0.1).abs() < 1e-6);
+        assert!((joined.bbox_y - 0.2).abs() < 1e-6);
+        assert!((joined.bbox_w - 0.3).abs() < 1e-6);
+        assert!((joined.bbox_h - 0.4).abs() < 1e-6);
+        assert_eq!(joined.source, "manual");
+    }
+
+    #[test]
+    fn hydrate_face_leaves_person_name_null_when_unassigned() {
+        // Mirror branch: an unassigned face must hydrate cleanly with
+        // person_name = None, not a stale value left over from a
+        // previously-assigned row's serialization.
+        let mut dao = fresh_dao();
+        let row = seed_library_and_face(&mut dao, None);
+        let joined =
+            hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate unassigned");
+        assert!(joined.person_id.is_none());
+        assert!(joined.person_name.is_none());
+    }
+
+    #[test]
+    fn list_unscanned_candidates_filters_to_hashed_unscanned_in_library() {
+        // The watcher's per-tick backlog drain depends on this query
+        // returning *only* image_exif rows with a populated
+        // content_hash and no matching face_detections row in the
+        // requested library. A regression here would either silently
+        // re-scan files (waste of inference) or skip files that need
+        // scanning (the symptom we just shipped a fix for).
+        let mut dao = fresh_dao();
+        diesel::sql_query(
+            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
+             VALUES (1, 'main', '/tmp', 0), (2, 'other', '/tmp2', 0)",
+        )
+        .execute(dao.connection.lock().unwrap().deref_mut())
+        .expect("seed libraries");
+
+        // Seed image_exif: mix of hashed/unhashed/scanned/cross-library.
+        diesel::sql_query(
+            "INSERT INTO image_exif \
+             (library_id, rel_path, content_hash, created_time, last_modified) VALUES \
+             (1, 'a.jpg', 'h-a', 0, 0), \
+             (1, 'b.jpg', 'h-b', 0, 0), \
+             (1, 'c.jpg', NULL,  0, 0), \
+             (1, 'd.jpg', 'h-d', 0, 0), \
+             (2, 'e.jpg', 'h-e', 0, 0)",
+        )
+        .execute(dao.connection.lock().unwrap().deref_mut())
+        .expect("seed image_exif");
+
+        // 'b' has been scanned (no_faces marker) — expect it filtered out.
+        dao.mark_status(&ctx(), 1, "h-b", "b.jpg", "no_faces", "buffalo_l")
+            .expect("scanned marker");
+
+        let cands = dao
+            .list_unscanned_candidates(&ctx(), 1, 10)
+            .expect("list unscanned");
+
+        let hashes: std::collections::HashSet<_> =
+            cands.iter().map(|(_, h)| h.clone()).collect();
+
+        // Should contain a and d (hashed, unscanned, library 1).
+        assert!(hashes.contains("h-a"), "missing h-a: {:?}", hashes);
+        assert!(hashes.contains("h-d"), "missing h-d: {:?}", hashes);
+        // Should NOT contain b (scanned), c (no hash), e (other library).
+        assert!(!hashes.contains("h-b"), "expected h-b filtered (scanned)");
+        assert!(!hashes.contains("h-e"), "expected h-e filtered (other library)");
+        assert_eq!(cands.len(), 2, "unexpected candidates: {:?}", cands);
+    }
+
+}
diff --git a/src/files.rs b/src/files.rs
index b4458c9..9ab1468 100644
--- a/src/files.rs
+++ b/src/files.rs
@@ -1659,6 +1659,14 @@ mod tests {
             Ok(vec![])
         }
 
+        fn get_rel_paths_for_hashes(
+            &mut self,
+            _context: &opentelemetry::Context,
+            _hashes: &[String],
+        ) -> Result<std::collections::HashMap<String, Vec<String>>, DbError> {
+            Ok(std::collections::HashMap::new())
+        }
+
         fn list_rel_paths_for_libraries(
             &mut self,
             _context: &opentelemetry::Context,
diff --git a/src/lib.rs b/src/lib.rs
index e6d2cc1..12de818 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -12,6 +12,8 @@ pub mod data;
 pub mod database;
 pub mod error;
 pub mod exif;
+pub mod face_watch;
+pub mod faces;
 pub mod file_types;
 pub mod files;
 pub mod geo;
diff --git a/src/main.rs b/src/main.rs
index ccdb14b..3e85cbd 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -66,6 +66,8 @@ mod data;
 mod database;
 mod error;
 mod exif;
+mod face_watch;
+mod faces;
 mod file_types;
 mod files;
 mod geo;
@@ -1459,6 +1461,8 @@ fn main() -> std::io::Result<()> {
             app_state.libraries.clone(),
             playlist_mgr_for_watcher,
             preview_gen_for_watcher,
+            app_state.face_client.clone(),
+            app_state.excluded_dirs.clone(),
         );
 
         // Start orphaned playlist cleanup job
@@ -1518,6 +1522,7 @@ fn main() -> std::io::Result<()> {
             let exif_dao = SqliteExifDao::new();
             let insight_dao = SqliteInsightDao::new();
             let preview_dao = SqlitePreviewDao::new();
+            let face_dao = faces::SqliteFaceDao::new();
             let cors = Cors::default()
                 .allowed_origin_fn(|origin, _req_head| {
                     // Allow all origins in development, or check against CORS_ALLOWED_ORIGINS env var
@@ -1595,6 +1600,7 @@ fn main() -> std::io::Result<()> {
                 .service(libraries::list_libraries)
                 .add_feature(add_tag_services::<_, SqliteTagDao>)
                 .add_feature(knowledge::add_knowledge_services::<_, SqliteKnowledgeDao>)
+                .add_feature(faces::add_face_services::<_, faces::SqliteFaceDao>)
                 .app_data(app_data.clone())
                 .app_data::<Data<RealFileSystem>>(Data::new(RealFileSystem::new(
                     app_data.base_path.clone(),
@@ -1616,6 +1622,10 @@ fn main() -> std::io::Result<()> {
                 .app_data::<Data<Mutex<SqliteKnowledgeDao>>>(Data::new(Mutex::new(
                     SqliteKnowledgeDao::new(),
                 )))
+                .app_data::<Data<Mutex<faces::SqliteFaceDao>>>(Data::new(Mutex::new(face_dao)))
+                .app_data::<Data<crate::ai::face_client::FaceClient>>(Data::new(
+                    app_data.face_client.clone(),
+                ))
                 .app_data(mp::form::MultipartFormConfig::default().total_limit(1024 * 1024 * 1024)) // 1GB upload limit
                 .app_data(web::JsonConfig::default().error_handler(|err, req| {
                     let detail = err.to_string();
@@ -1780,6 +1790,8 @@ fn watch_files(
     libs: Vec<libraries::Library>,
     playlist_manager: Addr<VideoPlaylistManager>,
     preview_generator: Addr<video::actors::PreviewClipGenerator>,
+    face_client: crate::ai::face_client::FaceClient,
+    excluded_dirs: Vec<String>,
 ) {
     std::thread::spawn(move || {
         // Get polling intervals from environment variables
@@ -1798,6 +1810,18 @@ fn watch_files(
         info!("Starting optimized file watcher");
         info!("  Quick scan interval: {} seconds", quick_interval_secs);
         info!("  Full scan interval: {} seconds", full_interval_secs);
+        // Surface face-detection state at boot so it's obvious whether
+        // the watcher will hit Apollo. The branch silently no-ops when
+        // disabled (intentional for legacy deploys), which makes "why
+        // aren't faces being detected?" hard to diagnose otherwise.
+        if face_client.is_enabled() {
+            info!("  Face detection: ENABLED");
+        } else {
+            info!(
+                "  Face detection: DISABLED (set APOLLO_FACE_API_BASE_URL \
+                 or APOLLO_API_BASE_URL to enable)"
+            );
+        }
         for lib in &libs {
             info!(
                 "  Watching library '{}' (id={}) at {}",
@@ -1812,6 +1836,15 @@ fn watch_files(
         let preview_dao = Arc::new(Mutex::new(
             Box::new(SqlitePreviewDao::new()) as Box<dyn PreviewDao>
         ));
+        let face_dao = Arc::new(Mutex::new(
+            Box::new(faces::SqliteFaceDao::new()) as Box<dyn faces::FaceDao>
+        ));
+        // tag_dao for the watcher's auto-bind path. Independent of the
+        // request-handler tag_dao instance — both end up pointing at the
+        // same SQLite file via SqliteTagDao::default().
+        let watcher_tag_dao = Arc::new(Mutex::new(
+            Box::new(SqliteTagDao::default()) as Box<dyn tags::TagDao>
+        ));
 
         let mut last_quick_scan = SystemTime::now();
         let mut last_full_scan = SystemTime::now();
@@ -1828,6 +1861,26 @@ fn watch_files(
             let is_full_scan = since_last_full.as_secs() >= full_interval_secs;
 
             for lib in &libs {
+                // Drain the unhashed-hash backlog AND the face-detection
+                // backlog every tick, regardless of quick/full. Quick
+                // scans only walk recently-modified files, so the
+                // pre-Phase-3 backlog never enters their candidate set
+                // — without these standalone passes, backfill +
+                // detection only progressed during full scans
+                // (default once an hour).
+                if face_client.is_enabled() {
+                    let context = opentelemetry::Context::new();
+                    backfill_unhashed_backlog(&context, lib, &exif_dao);
+                    process_face_backlog(
+                        &context,
+                        lib,
+                        &face_client,
+                        &face_dao,
+                        &watcher_tag_dao,
+                        &excluded_dirs,
+                    );
+                }
+
                 if is_full_scan {
                     info!(
                         "Running full scan for library '{}' (scan #{})",
@@ -1837,6 +1890,10 @@ fn watch_files(
                         lib,
                         Arc::clone(&exif_dao),
                         Arc::clone(&preview_dao),
+                        Arc::clone(&face_dao),
+                        Arc::clone(&watcher_tag_dao),
+                        face_client.clone(),
+                        &excluded_dirs,
                         None,
                         playlist_manager.clone(),
                         preview_generator.clone(),
@@ -1854,6 +1911,10 @@ fn watch_files(
                         lib,
                         Arc::clone(&exif_dao),
                         Arc::clone(&preview_dao),
+                        Arc::clone(&face_dao),
+                        Arc::clone(&watcher_tag_dao),
+                        face_client.clone(),
+                        &excluded_dirs,
                         Some(check_since),
                         playlist_manager.clone(),
                         preview_generator.clone(),
@@ -1900,6 +1961,10 @@ fn process_new_files(
     library: &libraries::Library,
     exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
     preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
+    face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>>,
+    tag_dao: Arc<Mutex<Box<dyn tags::TagDao>>>,
+    face_client: crate::ai::face_client::FaceClient,
+    excluded_dirs: &[String],
     modified_since: Option<SystemTime>,
     playlist_manager: Addr<VideoPlaylistManager>,
     preview_generator: Addr<video::actors::PreviewClipGenerator>,
@@ -2075,6 +2140,43 @@ fn process_new_files(
         }
     }
 
+    // ── Face detection pass ────────────────────────────────────────────
+    // Run after EXIF writes so newly-registered files have their
+    // content_hash populated. Skipped wholesale when face_client is
+    // disabled (no Apollo integration configured) — Phase 3 wires this
+    // up; the watcher remains usable on legacy deploys.
+    if face_client.is_enabled() {
+        // Opportunistic content_hash backfill: photos indexed before
+        // content-hashing landed (or where the hash compute failed
+        // silently on insert) end up in image_exif with NULL
+        // content_hash. build_face_candidates keys on content_hash, so
+        // those files would never become candidates without backfill.
+        // Idempotent — subsequent scans see the populated hashes and
+        // no-op. The dedicated `backfill_hashes` binary is still the
+        // right tool for very large legacy libraries; this branch
+        // ensures small/medium deploys self-heal without operator
+        // action.
+        backfill_missing_content_hashes(&context, &files, library, &exif_dao);
+        let candidates = build_face_candidates(&context, &files, &exif_dao, &face_dao);
+        debug!(
+            "face_watch: scan tick — {} image file(s) walked, {} candidate(s) (library '{}', modified_since={})",
+            files.iter().filter(|(p, _)| !is_video_file(p)).count(),
+            candidates.len(),
+            library.name,
+            modified_since.is_some(),
+        );
+        if !candidates.is_empty() {
+            face_watch::run_face_detection_pass(
+                library,
+                excluded_dirs,
+                &face_client,
+                Arc::clone(&face_dao),
+                Arc::clone(&tag_dao),
+                candidates,
+            );
+        }
+    }
+
     // Check for videos that need HLS playlists
     let video_path_base = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
     let mut videos_needing_playlists = Vec::new();
@@ -2199,6 +2301,312 @@ fn process_new_files(
     }
 }
 
+/// Compute and persist content_hash for image_exif rows where it's NULL.
+///
+/// Bounded per call by `FACE_HASH_BACKFILL_MAX_PER_TICK` (default 500) so
+/// a watcher tick on a large legacy library doesn't block for hours
+/// blake3-ing every photo at once. Subsequent scans pick up the rest.
+/// For 50k+ libraries the dedicated `cargo run --bin backfill_hashes`
+/// is still faster (it doesn't fight a watcher loop for the DAO mutex).
+/// Drain unhashed image_exif rows by querying them directly, independent
+/// of the filesystem walk. Quick scans only walk recently-modified
+/// files, so a backlog of pre-existing unhashed rows never enters
+/// `process_new_files`'s candidate set — left alone, it would only
+/// drain on full scans (default once an hour). Calling this every tick
+/// keeps the face-detection backlog moving regardless.
+///
+/// Returns the number of rows successfully backfilled this pass.
+fn backfill_unhashed_backlog(
+    context: &opentelemetry::Context,
+    library: &libraries::Library,
+    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
+) -> usize {
+    let cap: i64 = dotenv::var("FACE_HASH_BACKFILL_MAX_PER_TICK")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|n: &i64| *n > 0)
+        .unwrap_or(2000);
+
+    // Fetch up to cap+1 rows so we can tell "more remain" without a
+    // separate count query. Across libraries — there's no per-library
+    // filter on get_rows_missing_hash today — but we only ever update
+    // rows whose library_id matches the caller's library, so other
+    // libraries' rows just get skipped here and picked up on the next
+    // library's tick. Negligible cost given the cap.
+    let rows: Vec<(i32, String)> = {
+        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+        dao.get_rows_missing_hash(context, cap + 1).unwrap_or_default()
+    };
+    if rows.is_empty() {
+        return 0;
+    }
+
+    let more_than_cap = rows.len() as i64 > cap;
+    let base_path = std::path::Path::new(&library.root_path);
+
+    let mut backfilled = 0usize;
+    let mut errors = 0usize;
+    let mut skipped_other_lib = 0usize;
+    for (lib_id, rel_path) in rows.iter().take(cap as usize) {
+        if *lib_id != library.id {
+            skipped_other_lib += 1;
+            continue;
+        }
+        let abs = base_path.join(rel_path);
+        if !abs.exists() {
+            // File walked away — the watcher's reconciliation pass will
+            // remove the orphan exif row eventually.
+            continue;
+        }
+        match content_hash::compute(&abs) {
+            Ok(id) => {
+                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+                if let Err(e) =
+                    dao.backfill_content_hash(context, library.id, rel_path, &id.content_hash, id.size_bytes)
+                {
+                    warn!(
+                        "face_watch: backfill_content_hash failed for {}: {:?}",
+                        rel_path, e
+                    );
+                    errors += 1;
+                } else {
+                    backfilled += 1;
+                }
+            }
+            Err(e) => {
+                debug!("face_watch: hash compute failed for {} ({:?})", abs.display(), e);
+                errors += 1;
+            }
+        }
+    }
+
+    if backfilled > 0 || errors > 0 || more_than_cap {
+        info!(
+            "face_watch: backfill pass for library '{}': hashed {} ({} error(s), {} skipped to other libraries; {} cap, more_remain={})",
+            library.name, backfilled, errors, skipped_other_lib, cap, more_than_cap
+        );
+    }
+    backfilled
+}
+
+/// Per-tick face-detection drain. Pulls a capped batch of hashed-but-
+/// unscanned image_exif rows directly via the FaceDao anti-join and
+/// hands them to the existing detection pass. Runs on every tick (not
+/// just full scans) so the backlog moves at quick-scan cadence.
+fn process_face_backlog(
+    context: &opentelemetry::Context,
+    library: &libraries::Library,
+    face_client: &crate::ai::face_client::FaceClient,
+    face_dao: &Arc<Mutex<Box<dyn faces::FaceDao>>>,
+    tag_dao: &Arc<Mutex<Box<dyn tags::TagDao>>>,
+    excluded_dirs: &[String],
+) {
+    let cap: i64 = dotenv::var("FACE_BACKLOG_MAX_PER_TICK")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|n: &i64| *n > 0)
+        .unwrap_or(64);
+
+    let rows: Vec<(String, String)> = {
+        let mut dao = face_dao.lock().expect("face dao");
+        match dao.list_unscanned_candidates(context, library.id, cap) {
+            Ok(r) => r,
+            Err(e) => {
+                warn!(
+                    "face_watch: list_unscanned_candidates failed for library '{}': {:?}",
+                    library.name, e
+                );
+                return;
+            }
+        }
+    };
+    if rows.is_empty() {
+        return;
+    }
+
+    info!(
+        "face_watch: backlog drain — running detection on {} candidate(s) for library '{}' (cap={})",
+        rows.len(),
+        library.name,
+        cap
+    );
+
+    let candidates: Vec<face_watch::FaceCandidate> = rows
+        .into_iter()
+        .map(|(rel_path, content_hash)| face_watch::FaceCandidate {
+            rel_path,
+            content_hash,
+        })
+        .collect();
+
+    face_watch::run_face_detection_pass(
+        library,
+        excluded_dirs,
+        face_client,
+        Arc::clone(face_dao),
+        Arc::clone(tag_dao),
+        candidates,
+    );
+}
+
+fn backfill_missing_content_hashes(
+    context: &opentelemetry::Context,
+    files: &[(PathBuf, String)],
+    library: &libraries::Library,
+    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
+) {
+    let image_paths: Vec<String> = files
+        .iter()
+        .filter(|(p, _)| !is_video_file(p))
+        .map(|(_, rel)| rel.clone())
+        .collect();
+    if image_paths.is_empty() {
+        return;
+    }
+
+    let exif_records = {
+        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+        dao.get_exif_batch(context, &image_paths)
+            .unwrap_or_default()
+    };
+    // Cheap lookup back from rel_path → absolute file_path so
+    // content_hash::compute can read the bytes.
+    let path_by_rel: HashMap<String, &PathBuf> =
+        files.iter().map(|(p, rel)| (rel.clone(), p)).collect();
+
+    let cap: usize = dotenv::var("FACE_HASH_BACKFILL_MAX_PER_TICK")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|n: &usize| *n > 0)
+        .unwrap_or(2000);
+
+    // Count the unhashed backlog up front so we can surface "still needs
+    // backfill: N" in the log — without it, a face-scan that's stuck at
+    // 44% looks stalled when really it's chipping through hashes.
+    let unhashed_total = exif_records
+        .iter()
+        .filter(|r| r.content_hash.is_none())
+        .count();
+
+    let mut backfilled = 0usize;
+    let mut errors = 0usize;
+    for record in &exif_records {
+        // Cap on successes only — earlier this counted errors too, so a
+        // pocket of chronically-unhashable files at the front of the
+        // table (vanished mid-scan, permission denied, etc.) burned the
+        // budget every tick and the rest of the backlog never advanced.
+        // Errors are still bounded by `unhashed_total` (the loop walks
+        // each unhashed record at most once per tick).
+        if backfilled >= cap {
+            break;
+        }
+        if record.content_hash.is_some() {
+            continue;
+        }
+        let Some(file_path) = path_by_rel.get(&record.file_path) else {
+            // Walked file went missing between the directory scan and now;
+            // next tick will retry naturally.
+            continue;
+        };
+        match content_hash::compute(file_path) {
+            Ok(id) => {
+                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+                if let Err(e) = dao.backfill_content_hash(
+                    context,
+                    library.id,
+                    &record.file_path,
+                    &id.content_hash,
+                    id.size_bytes,
+                ) {
+                    warn!(
+                        "face_watch: backfill_content_hash failed for {}: {:?}",
+                        record.file_path, e
+                    );
+                    errors += 1;
+                } else {
+                    backfilled += 1;
+                }
+            }
+            Err(e) => {
+                debug!(
+                    "face_watch: hash compute failed for {} ({:?})",
+                    file_path.display(),
+                    e
+                );
+                errors += 1;
+            }
+        }
+    }
+    // Always log when there's an unhashed backlog so an operator
+    // looking at "scan stuck at 44%" can see backfill is running and
+    // how much remains. Quiet only when there's nothing to do.
+    if unhashed_total > 0 || backfilled > 0 || errors > 0 {
+        let remaining = unhashed_total.saturating_sub(backfilled);
+        info!(
+            "face_watch: backfilled {}/{} content_hash for library '{}' ({} error(s); {} still need backfill; cap={})",
+            backfilled, unhashed_total, library.name, errors, remaining, cap
+        );
+    }
+}
+
+/// Build the face-detection candidate list for a scan tick.
+///
+/// We need `(rel_path, content_hash)` for every image file that has a
+/// content_hash recorded in image_exif but no row in face_detections yet.
+/// Re-querying image_exif here picks up rows the EXIF write loop just
+/// inserted alongside any pre-existing rows the watcher walked over —
+/// covers both new uploads and the initial backlog scan.
+fn build_face_candidates(
+    context: &opentelemetry::Context,
+    files: &[(PathBuf, String)],
+    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
+    face_dao: &Arc<Mutex<Box<dyn faces::FaceDao>>>,
+) -> Vec<face_watch::FaceCandidate> {
+    // Restrict to image files; videos aren't face-scanned in v1 (kamadak
+    // doesn't even register them in image_exif).
+    let image_paths: Vec<String> = files
+        .iter()
+        .filter(|(p, _)| !is_video_file(p))
+        .map(|(_, rel)| rel.clone())
+        .collect();
+    if image_paths.is_empty() {
+        return Vec::new();
+    }
+
+    let exif_records = {
+        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+        dao.get_exif_batch(context, &image_paths)
+            .unwrap_or_default()
+    };
+    // rel_path → content_hash (only rows with a hash; without one we have
+    // nothing to key face data against).
+    let mut hash_by_path: HashMap<String, String> = HashMap::with_capacity(exif_records.len());
+    for record in exif_records {
+        if let Some(h) = record.content_hash {
+            hash_by_path.insert(record.file_path, h);
+        }
+    }
+
+    let mut candidates = Vec::new();
+    let mut dao = face_dao.lock().expect("face dao");
+    for rel_path in image_paths {
+        let Some(hash) = hash_by_path.get(&rel_path) else {
+            continue;
+        };
+        match dao.already_scanned(context, hash) {
+            Ok(true) => continue,
+            Ok(false) => candidates.push(face_watch::FaceCandidate {
+                rel_path,
+                content_hash: hash.clone(),
+            }),
+            Err(e) => {
+                warn!("face_watch: already_scanned errored for {}: {:?}", hash, e);
+            }
+        }
+    }
+    candidates
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/memories.rs b/src/memories.rs
index 0e2aad5..95de714 100644
--- a/src/memories.rs
+++ b/src/memories.rs
@@ -23,7 +23,8 @@ use crate::utils::earliest_fs_time;
 
 // Helper that encapsulates path-exclusion semantics
 #[derive(Debug)]
-struct PathExcluder {
+pub struct PathExcluder {
+    base: PathBuf,
     excluded_dirs: Vec<PathBuf>,
     excluded_patterns: Vec<String>,
 }
@@ -34,9 +35,12 @@ impl PathExcluder {
     /// Rules:
     /// - Entries starting with '/' are interpreted as "absolute under base"
     ///   (e.g. "/photos/private" -> base/photos/private).
-    /// - Entries without '/' are treated as substring patterns that match
-    ///   anywhere in the full path string (still scoped under base).
-    fn new(base: &Path, raw_excluded: &[String]) -> Self {
+    /// - Entries without '/' are treated as path-component patterns that
+    ///   match a directory or file name *under* `base`. The base prefix is
+    ///   stripped before matching so a system-level component (e.g. the
+    ///   `tmp` in `/tmp/...` when running tests) doesn't masquerade as a
+    ///   user-defined exclude.
+    pub fn new(base: &Path, raw_excluded: &[String]) -> Self {
         let mut excluded_dirs = Vec::new();
         let mut excluded_patterns = Vec::new();
 
@@ -53,18 +57,19 @@ impl PathExcluder {
         }
 
         debug!(
-            "PathExcluder created. dirs={:?}, patterns={:?}",
-            excluded_dirs, excluded_patterns
+            "PathExcluder created. base={:?}, dirs={:?}, patterns={:?}",
+            base, excluded_dirs, excluded_patterns
         );
 
         Self {
+            base: base.to_path_buf(),
             excluded_dirs,
             excluded_patterns,
         }
     }
 
     /// Returns true if `path` should be excluded.
-    fn is_excluded(&self, path: &Path) -> bool {
+    pub fn is_excluded(&self, path: &Path) -> bool {
         // Directory-based exclusions
         for excluded in &self.excluded_dirs {
             if path.starts_with(excluded) {
@@ -76,19 +81,24 @@ impl PathExcluder {
             }
         }
 
-        // Pattern-based exclusions: match whole path components (dir or file name),
-        // not substrings.
-        if !self.excluded_patterns.is_empty() {
-            for component in path.components() {
-                if let Some(comp_str) = component.as_os_str().to_str()
-                    && self.excluded_patterns.iter().any(|pat| pat == comp_str)
-                {
-                    trace!(
-                        "PathExcluder: excluded by component pattern: {:?} (component: {:?}, patterns: {:?})",
-                        path, comp_str, self.excluded_patterns
-                    );
-                    return true;
-                }
+        if self.excluded_patterns.is_empty() {
+            return false;
+        }
+
+        // Strip the base prefix before scanning components. Without this,
+        // every path component above `base` (e.g. `tmp` in `/tmp/test123`
+        // under tempdir, or the user's `home` in `/home/user/Pictures`)
+        // would match user-defined patterns and produce false positives.
+        let scan_root = path.strip_prefix(&self.base).unwrap_or(path);
+        for component in scan_root.components() {
+            if let Some(comp_str) = component.as_os_str().to_str()
+                && self.excluded_patterns.iter().any(|pat| pat == comp_str)
+            {
+                trace!(
+                    "PathExcluder: excluded by component pattern: {:?} (component: {:?}, patterns: {:?})",
+                    path, comp_str, self.excluded_patterns
+                );
+                return true;
             }
         }
 
diff --git a/src/state.rs b/src/state.rs
index 5682d43..18eab29 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -1,4 +1,5 @@
 use crate::ai::apollo_client::ApolloClient;
+use crate::ai::face_client::FaceClient;
 use crate::ai::insight_chat::{ChatLockMap, InsightChatService};
 use crate::ai::openrouter::OpenRouterClient;
 use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
@@ -48,6 +49,11 @@ pub struct AppState {
     pub insight_generator: InsightGenerator,
     /// Chat continuation service. Hold an Arc so handlers can clone cheaply.
     pub insight_chat: Arc<InsightChatService>,
+    /// Face inference client (calls Apollo's `/api/internal/faces/*`).
+    /// Disabled (`is_enabled() == false`) when neither `APOLLO_FACE_API_BASE_URL`
+    /// nor `APOLLO_API_BASE_URL` is set; the file-watch hook (Phase 3) and
+    /// manual-face-create handler short-circuit in that case.
+    pub face_client: FaceClient,
 }
 
 impl AppState {
@@ -82,6 +88,7 @@ impl AppState {
         insight_generator: InsightGenerator,
         insight_chat: Arc<InsightChatService>,
         preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
+        face_client: FaceClient,
     ) -> Self {
         assert!(
             !libraries_vec.is_empty(),
@@ -115,6 +122,7 @@ impl AppState {
             sms_client,
             insight_generator,
             insight_chat,
+            face_client,
         }
     }
 
@@ -161,6 +169,15 @@ impl Default for AppState {
         // generator silently falls through to the legacy Nominatim path.
         let apollo_client = ApolloClient::new(env::var("APOLLO_API_BASE_URL").ok());
 
+        // Face inference client. Falls back to APOLLO_API_BASE_URL when
+        // APOLLO_FACE_API_BASE_URL is unset (single-Apollo deploys are the
+        // common case). Both unset = feature disabled, file-watch hook
+        // and manual-face handlers short-circuit silently.
+        let face_client_url = env::var("APOLLO_FACE_API_BASE_URL")
+            .ok()
+            .or_else(|| env::var("APOLLO_API_BASE_URL").ok());
+        let face_client = FaceClient::new(face_client_url);
+
         // Initialize DAOs
         let insight_dao: Arc<Mutex<Box<dyn InsightDao>>> =
             Arc::new(Mutex::new(Box::new(SqliteInsightDao::new())));
@@ -244,6 +261,7 @@ impl Default for AppState {
             insight_generator,
             insight_chat,
             preview_dao,
+            face_client,
         )
     }
 }
@@ -382,6 +400,7 @@ impl AppState {
             insight_generator,
             insight_chat,
             preview_dao,
+            FaceClient::new(None), // disabled in test
         )
     }
 }
diff --git a/src/tags.rs b/src/tags.rs
index b94cb3b..bdd5abd 100644
--- a/src/tags.rs
+++ b/src/tags.rs
@@ -32,6 +32,7 @@ where
     )
     .service(web::resource("image/tags/all").route(web::get().to(get_all_tags::<TagD>)))
     .service(web::resource("image/tags/batch").route(web::post().to(update_tags::<TagD>)))
+    .service(web::resource("image/tags/lookup").route(web::post().to(lookup_tags_batch::<TagD>)))
 }
 
 async fn add_tag<D: TagDao>(
@@ -238,6 +239,149 @@ async fn update_tags<D: TagDao>(
         .into_http_internal_err()
 }
 
+#[derive(Deserialize, Debug)]
+pub struct LookupTagsBatchRequest {
+    pub paths: Vec<String>,
+}
+
+/// Bulk per-path tag lookup with cross-library content-hash sibling
+/// expansion. Apollo's photo-match flow used to fan out one
+/// ``GET /image/tags?path=`` per record (~4k for a wide window) —
+/// each call locked the dao briefly and the round-trip cost dwarfed
+/// the actual SQL. This collapses the whole fan-out into:
+///
+/// 1. one ``image_exif`` batch lookup → query path → content_hash
+/// 2. one ``image_exif`` JOIN by content_hash → all sibling rel_paths
+///    (so a tag applied under library A surfaces under library B
+///    when the content hashes match — important once a backup mount
+///    holds copies of files from the primary library)
+/// 3. one ``tagged_photo`` JOIN over the union of (query + sibling)
+///    rel_paths
+///
+/// Body: ``{paths: [...]}``; response: ``{path: [{id, name, ...}]}``
+/// with only paths that have at least one tag (caller treats absence
+/// as empty). Each chunk is capped to stay under SQLite's variable
+/// limit; five queries per 4k photos is still ~800x cheaper than
+/// per-path HTTP fan-out.
+async fn lookup_tags_batch<D: TagDao>(
+    _: Claims,
+    http_request: HttpRequest,
+    body: web::Json<LookupTagsBatchRequest>,
+    tag_dao: web::Data<Mutex<D>>,
+    exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
+) -> impl Responder {
+    use std::collections::{HashMap, HashSet};
+    let context = extract_context_from_request(&http_request);
+    let span = global_tracer().start_with_context("lookup_tags_batch", &context);
+    let span_context = opentelemetry::Context::current_with_span(span);
+
+    if body.paths.is_empty() {
+        return HttpResponse::Ok().json(HashMap::<String, Vec<Tag>>::new());
+    }
+
+    let query_paths: Vec<String> = body.paths.iter().map(|p| normalize_path(p)).collect();
+
+    // Stage 1: query → content_hash mapping. Files without a hash yet
+    // (just-indexed, hash compute failed, etc.) skip the sibling
+    // expansion and only get tags from their own rel_path.
+    let exif_records = {
+        let mut dao = exif_dao.lock().expect("Unable to get ExifDao");
+        match dao.get_exif_batch(&span_context, &query_paths) {
+            Ok(rows) => rows,
+            Err(e) => {
+                return HttpResponse::InternalServerError()
+                    .body(format!("exif batch lookup failed: {:?}", e));
+            }
+        }
+    };
+    let mut hash_by_path: HashMap<String, String> = HashMap::with_capacity(exif_records.len());
+    for record in exif_records {
+        if let Some(h) = record.content_hash {
+            hash_by_path.insert(record.file_path, h);
+        }
+    }
+    let unique_hashes: Vec<String> = hash_by_path
+        .values()
+        .cloned()
+        .collect::<HashSet<_>>()
+        .into_iter()
+        .collect();
+
+    // Stage 2: hash → all sibling rel_paths.
+    let paths_by_hash = if unique_hashes.is_empty() {
+        HashMap::new()
+    } else {
+        let mut dao = exif_dao.lock().expect("Unable to get ExifDao");
+        match dao.get_rel_paths_for_hashes(&span_context, &unique_hashes) {
+            Ok(map) => map,
+            Err(e) => {
+                return HttpResponse::InternalServerError()
+                    .body(format!("hash sibling lookup failed: {:?}", e));
+            }
+        }
+    };
+
+    // Stage 3: build expanded path set and the reverse map
+    // sibling → [original query paths whose tag bucket should include
+    // the sibling's tags]. A query path always attributes to itself
+    // (covers the no-content-hash case).
+    let mut originals_by_sibling: HashMap<String, Vec<String>> = HashMap::new();
+    let mut all_paths: HashSet<String> = HashSet::new();
+    for query_path in &query_paths {
+        all_paths.insert(query_path.clone());
+        originals_by_sibling
+            .entry(query_path.clone())
+            .or_default()
+            .push(query_path.clone());
+        if let Some(hash) = hash_by_path.get(query_path)
+            && let Some(siblings) = paths_by_hash.get(hash)
+        {
+            for sibling in siblings {
+                if sibling == query_path {
+                    continue;
+                }
+                all_paths.insert(sibling.clone());
+                originals_by_sibling
+                    .entry(sibling.clone())
+                    .or_default()
+                    .push(query_path.clone());
+            }
+        }
+    }
+
+    // Stage 4: tags grouped by rel_path for the union.
+    let all_paths_vec: Vec<String> = all_paths.into_iter().collect();
+    let tags_by_sibling = {
+        let mut dao = tag_dao.lock().expect("Unable to get TagDao");
+        match dao.get_tags_grouped_by_paths(&span_context, &all_paths_vec) {
+            Ok(map) => map,
+            Err(e) => {
+                return HttpResponse::InternalServerError().body(format!("{}", e));
+            }
+        }
+    };
+
+    // Stage 5: aggregate sibling tags back to original query paths,
+    // de-duped by tag id. Empty buckets stay out of the response so
+    // the caller's "missing key = []" contract holds.
+    let mut result: HashMap<String, Vec<Tag>> = HashMap::new();
+    for (sibling_path, originals) in originals_by_sibling {
+        if let Some(tags) = tags_by_sibling.get(&sibling_path) {
+            for orig in originals {
+                let entry = result.entry(orig).or_default();
+                for t in tags {
+                    if !entry.iter().any(|e| e.id == t.id) {
+                        entry.push(t.clone());
+                    }
+                }
+            }
+        }
+    }
+
+    span_context.span().set_status(Status::Ok);
+    HttpResponse::Ok().json(result)
+}
+
 #[derive(Serialize, Queryable, Clone, Debug, PartialEq)]
 pub struct Tag {
     pub id: i32,
@@ -317,6 +461,14 @@ pub trait TagDao: Send + Sync {
         context: &opentelemetry::Context,
         paths: &[String],
     ) -> anyhow::Result<Vec<Tag>>;
+    /// Per-path grouped lookup: ``rel_path → [tags]``. Used by the
+    /// ``/image/tags/lookup`` batch endpoint. Returns only paths that
+    /// have at least one tag; the caller treats absence as empty.
+    fn get_tags_grouped_by_paths(
+        &mut self,
+        context: &opentelemetry::Context,
+        paths: &[String],
+    ) -> anyhow::Result<std::collections::HashMap<String, Vec<Tag>>>;
     fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result<Tag>;
     fn remove_tag(
         &mut self,
@@ -470,6 +622,51 @@ impl TagDao for SqliteTagDao {
         })
     }
 
+    fn get_tags_grouped_by_paths(
+        &mut self,
+        context: &opentelemetry::Context,
+        paths: &[String],
+    ) -> anyhow::Result<std::collections::HashMap<String, Vec<Tag>>> {
+        use std::collections::HashMap;
+        let mut out: HashMap<String, Vec<Tag>> = HashMap::new();
+        if paths.is_empty() {
+            return Ok(out);
+        }
+        let mut conn = self
+            .connection
+            .lock()
+            .expect("Unable to lock SqliteTagDao connection");
+        trace_db_call(context, "query", "get_tags_grouped_by_paths", |span| {
+            span.set_attribute(KeyValue::new("path_count", paths.len() as i64));
+            // SQLite's default SQLITE_LIMIT_VARIABLE_NUMBER is 32766 in
+            // modern builds (999 in old ones). Chunk at 500 to stay
+            // safely under both — five queries for a 4k-photo grid is
+            // still ~800x cheaper than 4k single-row HTTP calls.
+            const CHUNK: usize = 500;
+            for chunk in paths.chunks(CHUNK) {
+                let rows: Vec<(String, i32, String, i64)> = tagged_photo::table
+                    .inner_join(tags::table)
+                    .filter(tagged_photo::rel_path.eq_any(chunk))
+                    .select((
+                        tagged_photo::rel_path,
+                        tags::id,
+                        tags::name,
+                        tags::created_time,
+                    ))
+                    .get_results(conn.deref_mut())
+                    .with_context(|| "Unable to get tags grouped from Sqlite")?;
+                for (rel_path, id, name, created_time) in rows {
+                    out.entry(rel_path).or_default().push(Tag {
+                        id,
+                        name,
+                        created_time,
+                    });
+                }
+            }
+            Ok(out)
+        })
+    }
+
     fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result<Tag> {
         let mut conn = self
             .connection
@@ -893,6 +1090,23 @@ mod tests {
             Ok(out)
         }
 
+        fn get_tags_grouped_by_paths(
+            &mut self,
+            _context: &opentelemetry::Context,
+            paths: &[String],
+        ) -> anyhow::Result<std::collections::HashMap<String, Vec<Tag>>> {
+            let tagged = self.tagged_photos.borrow();
+            let mut out = std::collections::HashMap::new();
+            for p in paths {
+                if let Some(tags) = tagged.get(p)
+                    && !tags.is_empty()
+                {
+                    out.insert(p.clone(), tags.clone());
+                }
+            }
+            Ok(out)
+        }
+
         fn create_tag(
             &mut self,
             _context: &opentelemetry::Context,
@@ -1026,6 +1240,42 @@ mod tests {
         }
     }
 
+    #[actix_rt::test]
+    async fn get_tags_grouped_by_paths_returns_per_path_buckets() {
+        // Backstop for the batch tag-lookup endpoint: confirms the
+        // grouped variant returns one bucket per path with at least
+        // one tag, and omits paths with no tags entirely (the caller
+        // treats absence as []). The handler stacks sibling expansion
+        // on top via image_exif content_hash; the DAO method itself
+        // just needs to honour rel_path → tags directly.
+        let mut dao = TestTagDao::new();
+        let ctx = opentelemetry::Context::current();
+        // Seed: two paths tagged, one path untagged.
+        dao.tagged_photos.borrow_mut().insert(
+            "a.jpg".into(),
+            vec![Tag { id: 1, name: "alpha".into(), created_time: 0 }],
+        );
+        dao.tagged_photos.borrow_mut().insert(
+            "b.jpg".into(),
+            vec![
+                Tag { id: 2, name: "beta".into(), created_time: 0 },
+                Tag { id: 3, name: "gamma".into(), created_time: 0 },
+            ],
+        );
+        let grouped = dao
+            .get_tags_grouped_by_paths(
+                &ctx,
+                &["a.jpg".into(), "b.jpg".into(), "c.jpg".into()],
+            )
+            .unwrap();
+        assert_eq!(grouped.get("a.jpg").map(|v| v.len()), Some(1));
+        assert_eq!(grouped.get("b.jpg").map(|v| v.len()), Some(2));
+        assert!(
+            !grouped.contains_key("c.jpg"),
+            "untagged paths must be absent so caller's missing-key=[] contract holds"
+        );
+    }
+
     #[actix_rt::test]
     async fn add_new_tag_test() {
         let tag_dao = TestTagDao::new();