diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b520940 --- /dev/null +++ b/.env.example @@ -0,0 +1,85 @@ +# ImageApi configuration template. Copy to `.env` and fill in for your +# deploy. Comments mirror the canonical docs in CLAUDE.md — see there +# for the full picture (especially the AI-Insights / Apollo / face +# integration sections). + +# ── Required ──────────────────────────────────────────────────────────── +DATABASE_URL=./database.db +BASE_PATH=/path/to/media +THUMBNAILS=/path/to/thumbnails +VIDEO_PATH=/path/to/video/hls +GIFS_DIRECTORY=/path/to/gifs +PREVIEW_CLIPS_DIRECTORY=/path/to/preview-clips +BIND_URL=0.0.0.0:8080 +CORS_ALLOWED_ORIGINS=http://localhost:3000 +SECRET_KEY=replace-me-with-a-long-random-secret +RUST_LOG=info + +# ── File watching ─────────────────────────────────────────────────────── +# Quick scan = recently-modified-files only; full scan = comprehensive walk. +WATCH_QUICK_INTERVAL_SECONDS=60 +WATCH_FULL_INTERVAL_SECONDS=3600 +# Comma-separated path prefixes / component names to skip in /memories +# AND in face detection (e.g. @eaDir, .thumbnails, /private). +EXCLUDED_DIRS= + +# ── Video / HLS ───────────────────────────────────────────────────────── +HLS_CONCURRENCY=2 +HLS_TIMEOUT_SECONDS=900 +PLAYLIST_CLEANUP_INTERVAL_SECONDS=86400 + +# ── Telemetry (release builds only) ───────────────────────────────────── +# OTLP_OTLS_ENDPOINT=http://localhost:4317 + +# ── AI Insights — Ollama (local LLM) ──────────────────────────────────── +OLLAMA_PRIMARY_URL=http://localhost:11434 +OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b +# Optional fallback server tried on connection failure. +# OLLAMA_FALLBACK_URL=http://server:11434 +# OLLAMA_FALLBACK_MODEL=llama3.2:3b +OLLAMA_REQUEST_TIMEOUT_SECONDS=120 +# Cap on tool-calling iterations per chat turn / agentic insight. +AGENTIC_MAX_ITERATIONS=6 +AGENTIC_CHAT_MAX_ITERATIONS=6 + +# ── AI Insights — OpenRouter (hybrid backend, optional) ───────────────── +# Set OPENROUTER_API_KEY to enable the hybrid backend (vision stays +# local on Ollama, chat routes to OpenRouter). +# OPENROUTER_API_KEY=sk-or-... +# OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4 +# OPENROUTER_ALLOWED_MODELS=openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash +# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 +# OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small +# OPENROUTER_HTTP_REFERER=https://your-site.example +# OPENROUTER_APP_TITLE=ImageApi + +# ── AI Insights — sibling services (optional) ─────────────────────────── +# Apollo (places + face inference). Single Apollo deploys typically set +# only APOLLO_API_BASE_URL and let the face client fall back to it. +# APOLLO_API_BASE_URL=http://apollo.lan:8000 +# APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000 +# SMS_API_URL=http://localhost:8000 +# SMS_API_TOKEN= + +# Display name used in agentic prompts when the LLM refers to "you". +USER_NAME= + +# ── Face detection (Phase 3+) ─────────────────────────────────────────── +# Cosine-sim floor for auto-binding a detected face to an existing +# same-named person on detection. 0.4 ≈ moderate-confidence match. +FACE_AUTOBIND_MIN_COS=0.4 +# Per-scan-tick fan-out into Apollo's detect endpoint. Apollo's GPU +# pool serializes server-side; this just overlaps file-IO with +# inference RTT. +FACE_DETECT_CONCURRENCY=8 +# Per-detect HTTP timeout. CPU-only Apollo deploys may need higher. +FACE_DETECT_TIMEOUT_SEC=60 +# Per-tick caps on the two backlog drains (independent of WATCH_* +# quick / full scans). Tune up if you have a large unscanned backlog +# and want it to clear faster; tune down if Apollo is overloaded. +FACE_BACKLOG_MAX_PER_TICK=64 +FACE_HASH_BACKFILL_MAX_PER_TICK=2000 + +# ── RAG / search ──────────────────────────────────────────────────────── +# Set to `1` to enable cross-encoder reranking on /search results. +SEARCH_RAG_RERANK=0 diff --git a/CLAUDE.md b/CLAUDE.md index 86515d2..6d4a751 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -210,7 +210,34 @@ Centralized in `file_types.rs` with constants `IMAGE_EXTENSIONS` and `VIDEO_EXTE All database operations and HTTP handlers wrapped in spans. In release builds, exports to OTLP endpoint via `OTLP_OTLS_ENDPOINT`. Debug builds use basic logger. **Memory Exclusion:** -`PathExcluder` in `memories.rs` filters out directories from memories API via `EXCLUDED_DIRS` environment variable (comma-separated paths or substring patterns). +`PathExcluder` in `memories.rs` filters out directories from memories API via `EXCLUDED_DIRS` environment variable (comma-separated paths or substring patterns). The same excluder is applied to face-detection candidates (`face_watch::filter_excluded`) so junk directories like `@eaDir` / `.thumbnails` don't burn detect calls on Apollo. + +### Face detection system + +ImageApi owns the face data; Apollo (sibling repo) hosts the insightface inference service. Inference is triggered automatically by the file watcher and persisted into two tables: + +- `persons(id, name UNIQUE COLLATE NOCASE, cover_face_id, entity_id, created_from_tag, notes, ...)` — operator-managed, name is the user-visible identity. +- `face_detections(id, library_id, content_hash, rel_path, bbox_*, embedding BLOB, confidence, source, person_id, status, model_version, ...)` — keyed on `content_hash` so a photo duplicated across libraries is detected once. Marker rows for `status IN ('no_faces','failed')` carry NULL bbox/embedding (CHECK constraint enforces this). + +**Why content_hash and not (library_id, rel_path):** ties face data to the bytes, not the path. A backup mount that copies files from the primary library naturally inherits the existing detections without re-running inference. + +**File-watch hook** (`src/main.rs::process_new_files`): for each photo with a populated `content_hash`, check `FaceDao::already_scanned(hash)`; if not, send bytes (or embedded JPEG preview for RAW via `exif::extract_embedded_jpeg_preview`) to Apollo's `/api/internal/faces/detect`. K=`FACE_DETECT_CONCURRENCY` (default 8) parallel calls per scan tick; Apollo serializes them via its single-worker GPU pool. `face_watch.rs` is the Tokio orchestration layer. + +**Per-tick backlog drain** (also `src/main.rs`): two passes that run on every watcher tick regardless of quick-vs-full scan: +- `backfill_unhashed_backlog` — populates `image_exif.content_hash` for photos that arrived before the hash field was retroactive. Capped by `FACE_HASH_BACKFILL_MAX_PER_TICK` (default 2000); errors don't burn the cap. +- `process_face_backlog` — runs detection on photos that have a hash but no `face_detections` row. Capped by `FACE_BACKLOG_MAX_PER_TICK` (default 64). Selected via a SQL anti-join (`FaceDao::list_unscanned_candidates`); videos and EXCLUDED_DIRS paths filtered out client-side via `face_watch::filter_excluded` so they never reach Apollo. + +**Auto-bind on detection:** when a photo carries a tag whose name matches a `persons.name` (case-insensitive), the new face binds automatically iff cosine similarity to the person's existing-face mean is ≥ `FACE_AUTOBIND_MIN_COS` (default 0.4). Persons with no existing faces bind unconditionally and the new face becomes the cover. + +**Manual face create** (`POST /image/faces`): crops the image to the user-supplied bbox, applies EXIF orientation via `exif::apply_orientation` (the `image` crate hands raw pre-rotation pixels — without this, manually-drawn bboxes never resolved a face on re-detection), pads to ~50% of bbox dims (RetinaFace anchor scales need ~50% face-fill at det_size=640), then calls Apollo's embed endpoint. A `force` flag lets the operator save a face the detector couldn't see (e.g. profile shots, occluded faces) — the row gets a zero-vector embedding so it's manually-bound only and won't participate in clustering. + +**Rerun preserves manual rows** (`POST /image/faces/{id}/rerun`): only `source='auto'` rows are deleted before re-running detection. `already_scanned` returns true on ANY row, so a photo whose only faces are manually drawn never auto-redetects. + +Module map: +- `src/faces.rs` — `FaceDao` trait + `SqliteFaceDao` impl, route handlers for `/faces/*`, `/image/faces/*`, `/persons/*`. Mirror of `tags.rs` layout. +- `src/face_watch.rs` — Tokio orchestration for the file-watch detect pass; `filter_excluded` (PathExcluder + image-extension filter), `read_image_bytes_for_detect` (RAW preview fallback). +- `src/ai/face_client.rs` — HTTP client for Apollo's inference. Configured by `APOLLO_FACE_API_BASE_URL`, falls back to `APOLLO_API_BASE_URL`. Both unset → feature disabled, file-watch hook is a no-op. +- `migrations/2026-04-29-000000_add_faces/` — schema. ### Startup Sequence @@ -286,6 +313,15 @@ SMS_API_TOKEN=your-api-token # SMS API authentication token (o # `get_personal_place_at` tool. Unset = legacy Nominatim-only path. APOLLO_API_BASE_URL=http://apollo.lan:8000 # Base URL of the sibling Apollo backend +# Face inference (optional). Apollo also hosts the insightface inference +# service; ImageApi calls it from the file-watch hook (Phase 3) and from +# the manual face-create endpoint. Falls back to APOLLO_API_BASE_URL when +# unset (typical single-Apollo deploy). Both unset = feature disabled. +APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000 # Override if face service runs separately +FACE_AUTOBIND_MIN_COS=0.4 # Phase 3: cosine-sim floor for tag-name auto-bind +FACE_DETECT_CONCURRENCY=8 # Phase 3: per-scan-tick parallel detect calls +FACE_DETECT_TIMEOUT_SEC=60 # reqwest client timeout (CPU inference can be slow) + # OpenRouter (Hybrid Backend) - keeps embeddings + vision local, routes chat to OpenRouter OPENROUTER_API_KEY=sk-or-... # Required to enable hybrid backend OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4 # Used when client doesn't pick a model diff --git a/Cargo.lock b/Cargo.lock index 891a2f9..6f6575b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1913,7 +1913,7 @@ dependencies = [ [[package]] name = "image-api" -version = "1.0.0" +version = "1.1.0" dependencies = [ "actix", "actix-cors", @@ -3229,6 +3229,7 @@ dependencies = [ "js-sys", "log", "mime", + "mime_guess", "native-tls", "percent-encoding", "pin-project-lite", diff --git a/Cargo.toml b/Cargo.toml index 0a25252..2432869 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "image-api" -version = "1.0.0" +version = "1.1.0" authors = ["Cameron Cordes "] edition = "2024" @@ -49,7 +49,7 @@ opentelemetry-appender-log = "0.31.0" tempfile = "3.20.0" regex = "1.11.1" exif = { package = "kamadak-exif", version = "0.6.1" } -reqwest = { version = "0.12", features = ["json", "stream"] } +reqwest = { version = "0.12", features = ["json", "stream", "multipart"] } async-stream = "0.3" tokio-util = { version = "0.7", features = ["io"] } bytes = "1" diff --git a/README.md b/README.md index fceba81..b6d764b 100644 --- a/README.md +++ b/README.md @@ -159,3 +159,34 @@ Daily conversation summaries are generated automatically on server startup. Conf - Contacts to process - Model version used for embeddings: `nomic-embed-text:v1.5` +### Apollo + Face Recognition (Optional) + +Apollo (sibling project) hosts both the Places API and the local insightface +inference service. Both integrations are optional and degrade gracefully when +unset. + +- `APOLLO_API_BASE_URL` - Base URL of the sibling Apollo backend. + - When set, photo-insight enrichment folds the user's personal place name + (Home, Work, Cabin, ...) into the location string, and the agentic loop + gains a `get_personal_place_at` tool. Unset = legacy Nominatim-only path. +- `APOLLO_FACE_API_BASE_URL` - Base URL for the face-detection service. + - Falls back to `APOLLO_API_BASE_URL` when unset (typical single-Apollo + deploy). Both unset = face feature disabled (file-watch hook and + manual-face endpoints short-circuit silently). +- `FACE_AUTOBIND_MIN_COS` (Phase 3) - Cosine-sim floor for auto-binding a + detected face to an existing same-named person via people-tag bootstrap + [default: `0.4`]. +- `FACE_DETECT_CONCURRENCY` (Phase 3) - Per-scan-tick concurrent detect + calls fired by the file watcher [default: `8`]. Apollo serializes them + via its single-worker GPU pool. +- `FACE_DETECT_TIMEOUT_SEC` - reqwest client timeout per detect call + [default: `60`]. CPU inference on a backlog can take many seconds. +- `FACE_BACKLOG_MAX_PER_TICK` - Cap on the per-tick backlog drain (photos + with a content_hash but no face_detections row) [default: `64`]. Runs + every watcher tick regardless of quick-vs-full scan, so the unscanned + set drains independently of the file walk. +- `FACE_HASH_BACKFILL_MAX_PER_TICK` - Cap on the per-tick content_hash + backfill (photos that were registered before the hash field was + populated retroactively) [default: `2000`]. Errors don't burn the cap; + only successful hashes count. + diff --git a/migrations/2026-04-29-000000_add_faces/down.sql b/migrations/2026-04-29-000000_add_faces/down.sql new file mode 100644 index 0000000..bae8303 --- /dev/null +++ b/migrations/2026-04-29-000000_add_faces/down.sql @@ -0,0 +1,2 @@ +DROP TABLE IF EXISTS face_detections; +DROP TABLE IF EXISTS persons; diff --git a/migrations/2026-04-29-000000_add_faces/up.sql b/migrations/2026-04-29-000000_add_faces/up.sql new file mode 100644 index 0000000..4f4f4c8 --- /dev/null +++ b/migrations/2026-04-29-000000_add_faces/up.sql @@ -0,0 +1,67 @@ +-- Local face recognition tables. +-- +-- `persons` are visual identities (the "who" of a face). The optional +-- `entity_id` bridges to the existing knowledge graph `entities` table — +-- when set, this person is the visual side of an LLM-extracted entity. +-- Don't auto-create entities from persons; the entity table represents +-- LLM-extracted knowledge with its own confidence semantics, and silently +-- filling it from face detections muddies the provenance. +-- +-- `face_detections` carries one row per detected face on a content_hash, +-- plus marker rows with `status='no_faces'` or `status='failed'` so the +-- file watcher knows not to re-scan a hash. Keying on `content_hash` +-- (cross-library dedup) rather than `(library_id, rel_path)` means the +-- same JPEG in two libraries is scanned once. The denormalized `rel_path` +-- carries the most-recently-seen path — useful for cluster-thumb URL +-- generation; canonical path lookup goes through image_exif. + +CREATE TABLE persons ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT NOT NULL, + cover_face_id INTEGER, -- backfilled when the first face binds + entity_id INTEGER, -- optional bridge to entities(id) + created_from_tag BOOLEAN NOT NULL DEFAULT 0, + notes TEXT, + created_at BIGINT NOT NULL, + updated_at BIGINT NOT NULL, + CONSTRAINT fk_persons_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE SET NULL, + UNIQUE(name COLLATE NOCASE) +); + +CREATE INDEX idx_persons_entity ON persons(entity_id); + +CREATE TABLE face_detections ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + library_id INTEGER NOT NULL, + content_hash TEXT NOT NULL, -- canonical key (cross-library dedup) + rel_path TEXT NOT NULL, -- denormalized; most recently seen + bbox_x REAL, -- normalized 0..1; NULL on marker rows + bbox_y REAL, + bbox_w REAL, + bbox_h REAL, + embedding BLOB, -- 512×f32 = 2048 bytes; NULL on marker rows + confidence REAL, -- detector score + source TEXT NOT NULL, -- 'auto' | 'manual' + person_id INTEGER, + status TEXT NOT NULL DEFAULT 'detected', -- 'detected' | 'no_faces' | 'failed' + model_version TEXT NOT NULL, -- e.g. 'buffalo_l'; embedding lineage + created_at BIGINT NOT NULL, + CONSTRAINT fk_fd_library FOREIGN KEY (library_id) REFERENCES libraries(id), + CONSTRAINT fk_fd_person FOREIGN KEY (person_id) REFERENCES persons(id) ON DELETE SET NULL, + -- Detected rows carry geometry + embedding; marker rows ('no_faces', + -- 'failed') carry neither. CHECK enforces the invariant so manual + -- inserts can't slip through with half a row. + CONSTRAINT chk_marker CHECK ( + (status = 'detected' AND bbox_x IS NOT NULL AND embedding IS NOT NULL) + OR (status IN ('no_faces','failed') AND bbox_x IS NULL AND embedding IS NULL) + ) +); + +CREATE INDEX idx_face_detections_hash ON face_detections(content_hash); +CREATE INDEX idx_face_detections_lib_path ON face_detections(library_id, rel_path); +CREATE INDEX idx_face_detections_person ON face_detections(person_id); +CREATE INDEX idx_face_detections_status ON face_detections(status); +-- One marker row per (content_hash, status='no_faces') so the file watcher +-- doesn't double-mark when a hash is seen on multiple full-scan passes. +CREATE UNIQUE INDEX idx_face_detections_no_faces_unique + ON face_detections(content_hash) WHERE status = 'no_faces'; diff --git a/migrations/2026-04-29-000200_add_is_ignored/down.sql b/migrations/2026-04-29-000200_add_is_ignored/down.sql new file mode 100644 index 0000000..41f7c00 --- /dev/null +++ b/migrations/2026-04-29-000200_add_is_ignored/down.sql @@ -0,0 +1,2 @@ +DROP INDEX IF EXISTS idx_persons_is_ignored; +ALTER TABLE persons DROP COLUMN is_ignored; diff --git a/migrations/2026-04-29-000200_add_is_ignored/up.sql b/migrations/2026-04-29-000200_add_is_ignored/up.sql new file mode 100644 index 0000000..d8fdac9 --- /dev/null +++ b/migrations/2026-04-29-000200_add_is_ignored/up.sql @@ -0,0 +1,20 @@ +-- IGNORE / junk bucket for the face recognition feature. +-- +-- An "Ignored" person is the destination for strangers, faces the user +-- doesn't want tagged, and false detections. It looks like any other +-- person row (so face_detections.person_id stays a clean foreign key) +-- but `is_ignored=1` flags it for special UI treatment: +-- - hidden from the persons list by default +-- - excluded from `find_persons_by_names_ci` so a tag-name match +-- can never auto-bind a real face to the ignore bucket +-- - cluster-suggest already filters by `person_id IS NULL`, so faces +-- bound to an ignored person are naturally excluded from future +-- re-clustering +-- +-- Partial index because the WHERE-clause is small (typically 1 row), +-- and we only ever query for `is_ignored = 1` to find the bucket. + +ALTER TABLE persons ADD COLUMN is_ignored BOOLEAN NOT NULL DEFAULT 0; + +CREATE INDEX idx_persons_is_ignored + ON persons(is_ignored) WHERE is_ignored = 1; diff --git a/src/ai/face_client.rs b/src/ai/face_client.rs new file mode 100644 index 0000000..8a52812 --- /dev/null +++ b/src/ai/face_client.rs @@ -0,0 +1,370 @@ +//! Thin async HTTP client for Apollo's `/api/internal/faces/*` endpoints. +//! +//! Apollo (the personal location-history viewer at the sibling repo) hosts the +//! insightface inference service. This client is the ImageApi side of the +//! contract — it shoves image bytes through `/detect` and returns boxes + +//! 512-d ArcFace embeddings, plus a single-embedding `/embed` for the manual +//! face-create flow. +//! +//! Mirrors `apollo_client.rs` shape: optional base URL (None = disabled, the +//! file watcher and manual-create handlers no-op), reqwest client with a +//! generous timeout because CPU inference on a backlog can take many seconds +//! per photo. +//! +//! Configured via `APOLLO_FACE_API_BASE_URL`, falling back to +//! `APOLLO_API_BASE_URL` when the dedicated var is unset (single-Apollo +//! deploys are the common case). Both unset → `is_enabled()` returns false. +//! +//! Wire format: multipart/form-data with `file=` and `meta=`. +//! `meta` carries `{content_hash, library_id, rel_path, orientation?, +//! model_version?}` — useful for Apollo-side logging and idempotency, ignored +//! by Apollo today but part of the stable wire contract so future versions +//! can act on it without a client change. +//! +//! Error mapping (reflected in [`FaceDetectError`]): +//! - 422 `decode_failed` → permanent: ImageApi marks `status='failed'` and +//! doesn't retry until manual rerun. +//! - 200 with `faces:[]` → `status='no_faces'` marker row. +//! - 503 `cuda_oom` / `engine_unavailable` → defer-and-retry: no marker +//! written. +//! - Any other 5xx / network error → defer. + +use anyhow::{Context, Result}; +use base64::Engine; +use reqwest::Client; +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +#[derive(Debug, Clone, Serialize)] +pub struct DetectMeta { + pub content_hash: String, + pub library_id: i32, + pub rel_path: String, + /// EXIF orientation int (1..8). Apollo applies `exif_transpose` on the + /// bytes before inference, so this is informational only — supply when + /// the bytes were extracted from a RAW preview that lost the tag. + #[serde(skip_serializing_if = "Option::is_none")] + pub orientation: Option, + /// Echoed back in the response. ImageApi stores it in + /// `face_detections.model_version`. + #[serde(skip_serializing_if = "Option::is_none")] + pub model_version: Option, +} + +// Wire shape for the bbox sub-object Apollo returns. Read by Phase 3's +// file-watch hook; silence the dead-code lint until then. +#[allow(dead_code)] +#[derive(Debug, Clone, Deserialize)] +pub struct DetectedBbox { + pub x: f32, + pub y: f32, + pub w: f32, + pub h: f32, +} + +#[allow(dead_code)] // bbox consumed by Phase 3 file-watch hook +#[derive(Debug, Clone, Deserialize)] +pub struct DetectedFace { + pub bbox: DetectedBbox, + pub confidence: f32, + /// base64 of 2048 bytes (512×f32 LE). ImageApi stores the raw bytes + /// verbatim as a BLOB — see `decode_embedding` for the unpack. + pub embedding: String, +} + +impl DetectedFace { + /// Decode the wire-format embedding back into raw bytes for storage. + /// Returns the 2048-byte little-endian f32 buffer or an error if the + /// base64 is malformed or the wrong length. + pub fn decode_embedding(&self) -> Result> { + let bytes = base64::engine::general_purpose::STANDARD + .decode(self.embedding.as_bytes()) + .context("face embedding base64 decode")?; + if bytes.len() != 2048 { + anyhow::bail!( + "face embedding wrong size: got {} bytes, expected 2048", + bytes.len() + ); + } + Ok(bytes) + } +} + +#[allow(dead_code)] // duration_ms logged by Phase 3 file-watch hook +#[derive(Debug, Clone, Deserialize)] +pub struct DetectResponse { + pub model_version: String, + pub duration_ms: i64, + pub faces: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +#[allow(dead_code)] // Reported by Apollo; useful for future health-driven backoff +pub struct FaceHealth { + pub loaded: bool, + pub providers: Vec, + pub model_version: String, + pub det_size: i32, + #[serde(default)] + pub load_error: Option, +} + +/// Distinguishes permanent failures (don't retry) from transient ones +/// (defer and retry on next scan tick). The file-watch hook keys its +/// marker-row decision on this — a `Permanent` outcome writes +/// `status='failed'`, a `Transient` outcome writes nothing so the next +/// pass tries again. +#[derive(Debug)] +pub enum FaceDetectError { + /// Apollo refused the bytes for a reason that won't change on retry + /// (decode failure, zero-dim image). Mark `status='failed'`. + Permanent(anyhow::Error), + /// Apollo couldn't process this turn but might next time (CUDA OOM, + /// engine not loaded yet, network hiccup). Don't mark anything. + Transient(anyhow::Error), + /// Feature is disabled (no `APOLLO_FACE_API_BASE_URL`). Caller should + /// silently no-op — same shape as `apollo_client::is_enabled()` false. + Disabled, +} + +impl std::fmt::Display for FaceDetectError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + FaceDetectError::Permanent(e) => write!(f, "permanent: {e}"), + FaceDetectError::Transient(e) => write!(f, "transient: {e}"), + FaceDetectError::Disabled => write!(f, "face client disabled"), + } + } +} + +impl std::error::Error for FaceDetectError {} + +#[derive(Clone)] +pub struct FaceClient { + client: Client, + /// `None` → disabled. Trim trailing slash at construction so url + /// building doesn't double up. + base_url: Option, +} + +impl FaceClient { + pub fn new(base_url: Option) -> Self { + // 60 s timeout: CPU inference on a backlog can take many seconds + // per photo, especially the first call into a cold GPU. Apollo's + // bounded threadpool (1 worker on CUDA) means concurrent calls + // queue server-side; 60 s is enough headroom for a few items in + // the queue without surfacing a false transient. + let timeout_secs = std::env::var("FACE_DETECT_TIMEOUT_SEC") + .ok() + .and_then(|s| s.parse::().ok()) + .unwrap_or(60); + let client = Client::builder() + .timeout(Duration::from_secs(timeout_secs)) + .build() + .expect("reqwest client build"); + Self { + client, + base_url: base_url.map(|u| u.trim_end_matches('/').to_string()), + } + } + + pub fn is_enabled(&self) -> bool { + self.base_url.is_some() + } + + /// Detect every face in `bytes`. ImageApi calls this from the file-watch + /// hook (Phase 3) and from the manual rerun handler. Empty `faces[]` in + /// the response is the no-faces signal — caller writes a marker row. + #[allow(dead_code)] // Phase 3 file-watch hook + rerun handler + pub async fn detect( + &self, + bytes: Vec, + meta: DetectMeta, + ) -> std::result::Result { + let Some(base) = self.base_url.as_deref() else { + return Err(FaceDetectError::Disabled); + }; + let url = format!("{}/api/internal/faces/detect", base); + self.post_multipart(&url, bytes, &meta).await + } + + /// Single-embedding endpoint for the manual face-create flow. Caller + /// crops the image to the user-drawn bbox and passes those bytes; we + /// run detection inside the crop and return the highest-confidence + /// face's embedding. Apollo returns 422 `no_face_in_crop` when the + /// box missed — surfaced here as `Permanent`. + pub async fn embed( + &self, + bytes: Vec, + meta: DetectMeta, + ) -> std::result::Result { + let Some(base) = self.base_url.as_deref() else { + return Err(FaceDetectError::Disabled); + }; + let url = format!("{}/api/internal/faces/embed", base); + self.post_multipart(&url, bytes, &meta).await + } + + /// Engine reachability + provider/model report. Used by ImageApi for a + /// startup sanity check; not on the hot path. + #[allow(dead_code)] // Phase 3 startup probe + pub async fn health(&self) -> Result { + let base = self.base_url.as_deref().context("face client disabled")?; + let url = format!("{}/api/internal/faces/health", base); + let resp = self.client.get(&url).send().await?.error_for_status()?; + let body: FaceHealth = resp.json().await?; + Ok(body) + } + + async fn post_multipart( + &self, + url: &str, + bytes: Vec, + meta: &DetectMeta, + ) -> std::result::Result { + let meta_json = serde_json::to_string(meta) + .map_err(|e| FaceDetectError::Permanent(anyhow::anyhow!("meta serialize: {e}")))?; + let form = reqwest::multipart::Form::new() + .text("meta", meta_json) + .part( + "file", + reqwest::multipart::Part::bytes(bytes) + .file_name(meta.rel_path.clone()) + .mime_str("application/octet-stream") + .unwrap_or_else(|_| reqwest::multipart::Part::bytes(Vec::new())), + ); + + let resp = match self.client.post(url).multipart(form).send().await { + Ok(r) => r, + Err(e) if e.is_timeout() || e.is_connect() => { + return Err(FaceDetectError::Transient(anyhow::anyhow!( + "face client network: {e}" + ))); + } + Err(e) => { + return Err(FaceDetectError::Transient(anyhow::anyhow!( + "face client request: {e}" + ))); + } + }; + + let status = resp.status(); + if status.is_success() { + let body: DetectResponse = resp.json().await.map_err(|e| { + FaceDetectError::Transient(anyhow::anyhow!("face response decode: {e}")) + })?; + return Ok(body); + } + + let body_text = resp.text().await.unwrap_or_default(); + Err(classify_error_response(status.as_u16(), &body_text)) + } +} + +/// Map an Apollo HTTP error response to a FaceDetectError. Pulled out as a +/// pure function so the marker-row contract (422 → Permanent, 503 → +/// Transient) is unit-testable without spinning up an HTTP server. +fn classify_error_response(status: u16, body_text: &str) -> FaceDetectError { + // Apollo encodes its error class in the JSON body's `detail`. Try to + // parse it; fall back to status-only classification. + let detail_code = serde_json::from_str::(body_text) + .ok() + .and_then(|v| { + // detail can be a string ("decode_failed") or an object + // ({"code": "cuda_oom", ...}) depending on the endpoint and + // Apollo's response shape — handle both. + v.get("detail") + .and_then(|d| d.as_str().map(str::to_string)) + .or_else(|| { + v.get("detail") + .and_then(|d| d.get("code")) + .and_then(|c| c.as_str()) + .map(str::to_string) + }) + }) + .unwrap_or_default(); + + if status == 422 { + return FaceDetectError::Permanent(anyhow::anyhow!( + "face detect 422 {}: {}", + detail_code, + body_text + )); + } + if status == 503 { + return FaceDetectError::Transient(anyhow::anyhow!( + "face detect 503 {}: {}", + detail_code, + body_text + )); + } + // Any other 4xx: be conservative and treat as Permanent so we don't + // loop forever on a stable rejection. Any other 5xx: Transient — + // likely intermittent. + if (400..500).contains(&status) { + FaceDetectError::Permanent(anyhow::anyhow!( + "face detect {} {}: {}", + status, + detail_code, + body_text + )) + } else { + FaceDetectError::Transient(anyhow::anyhow!( + "face detect {} {}: {}", + status, + detail_code, + body_text + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn is_permanent(e: &FaceDetectError) -> bool { + matches!(e, FaceDetectError::Permanent(_)) + } + fn is_transient(e: &FaceDetectError) -> bool { + matches!(e, FaceDetectError::Transient(_)) + } + + #[test] + fn classify_422_decode_failed_is_permanent() { + // Permanent → ImageApi marks status='failed' and stops retrying. + let e = classify_error_response(422, r#"{"detail":"decode_failed: bad bytes"}"#); + assert!(is_permanent(&e), "422 decode_failed must be Permanent"); + assert!(format!("{e}").contains("decode_failed")); + } + + #[test] + fn classify_503_cuda_oom_is_transient() { + // Transient → ImageApi must NOT write a marker so the next scan + // retries. The detail.code is nested in an object rather than a + // bare string; the parser handles both. + let e = classify_error_response( + 503, + r#"{"detail":{"code":"cuda_oom","error":"out of memory"}}"#, + ); + assert!(is_transient(&e), "503 cuda_oom must be Transient"); + assert!(format!("{e}").contains("cuda_oom")); + } + + #[test] + fn classify_500_is_transient_other_4xx_is_permanent() { + // Conservative split: 5xx defers (intermittent), other 4xx + // is treated as a stable rejection so we don't loop forever. + assert!(is_transient(&classify_error_response(500, ""))); + assert!(is_transient(&classify_error_response(502, "{}"))); + assert!(is_permanent(&classify_error_response(400, "{}"))); + assert!(is_permanent(&classify_error_response(404, "{}"))); + } + + #[test] + fn classify_handles_unparseable_body() { + // Apollo can return non-JSON on misroute / proxy errors; the + // classifier must still produce a useful variant. + let e = classify_error_response(503, "nginx"); + assert!(is_transient(&e)); + } +} diff --git a/src/ai/mod.rs b/src/ai/mod.rs index a9d55bf..d6fda90 100644 --- a/src/ai/mod.rs +++ b/src/ai/mod.rs @@ -1,5 +1,6 @@ pub mod apollo_client; pub mod daily_summary_job; +pub mod face_client; pub mod handlers; pub mod insight_chat; pub mod insight_generator; diff --git a/src/database/mod.rs b/src/database/mod.rs index 07406d6..a4e348a 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -386,6 +386,16 @@ pub trait ExifDao: Sync + Send { hash: &str, ) -> Result, DbError>; + /// Batch version of [`get_rel_paths_by_hash`]. Returns a + /// `hash → Vec` map for every hash that has at least one + /// rel_path. Used by the batch tag lookup endpoint to expand + /// content-hash siblings without firing a query per hash. + fn get_rel_paths_for_hashes( + &mut self, + context: &opentelemetry::Context, + hashes: &[String], + ) -> Result>, DbError>; + /// List `(library_id, rel_path)` pairs for the given libraries, optionally /// restricted to rows whose rel_path starts with `path_prefix`. When /// `library_ids` is empty, rows from every library are returned. Used by @@ -956,6 +966,40 @@ impl ExifDao for SqliteExifDao { .map_err(|_| DbError::new(DbErrorKind::QueryError)) } + fn get_rel_paths_for_hashes( + &mut self, + context: &opentelemetry::Context, + hashes: &[String], + ) -> Result>, DbError> { + use std::collections::HashMap; + let mut out: HashMap> = HashMap::new(); + if hashes.is_empty() { + return Ok(out); + } + trace_db_call(context, "query", "get_rel_paths_for_hashes", |_span| { + use schema::image_exif::dsl::*; + + let mut connection = self.connection.lock().expect("Unable to get ExifDao"); + + // Chunk the IN clause to stay safely under SQLite's + // SQLITE_LIMIT_VARIABLE_NUMBER (32766 modern, 999 legacy). + const CHUNK: usize = 500; + for chunk in hashes.chunks(CHUNK) { + let rows: Vec<(String, String)> = image_exif + .filter(content_hash.eq_any(chunk)) + .select((content_hash.assume_not_null(), rel_path)) + .distinct() + .load::<(String, String)>(connection.deref_mut()) + .map_err(|_| anyhow::anyhow!("Query error"))?; + for (hash, path) in rows { + out.entry(hash).or_default().push(path); + } + } + Ok(out) + }) + .map_err(|_| DbError::new(DbErrorKind::QueryError)) + } + fn list_rel_paths_for_libraries( &mut self, context: &opentelemetry::Context, diff --git a/src/database/schema.rs b/src/database/schema.rs index e49f21f..55ad9e5 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -70,6 +70,26 @@ diesel::table! { } } +diesel::table! { + face_detections (id) { + id -> Integer, + library_id -> Integer, + content_hash -> Text, + rel_path -> Text, + bbox_x -> Nullable, + bbox_y -> Nullable, + bbox_w -> Nullable, + bbox_h -> Nullable, + embedding -> Nullable, + confidence -> Nullable, + source -> Text, + person_id -> Nullable, + status -> Text, + model_version -> Text, + created_at -> BigInt, + } +} + diesel::table! { favorites (id) { id -> Integer, @@ -130,6 +150,20 @@ diesel::table! { } } +diesel::table! { + persons (id) { + id -> Integer, + name -> Text, + cover_face_id -> Nullable, + entity_id -> Nullable, + created_from_tag -> Bool, + notes -> Nullable, + created_at -> BigInt, + updated_at -> BigInt, + is_ignored -> Bool, + } +} + diesel::table! { photo_insights (id) { id -> Integer, @@ -201,7 +235,10 @@ diesel::table! { diesel::joinable!(entity_facts -> photo_insights (source_insight_id)); diesel::joinable!(entity_photo_links -> entities (entity_id)); diesel::joinable!(entity_photo_links -> libraries (library_id)); +diesel::joinable!(face_detections -> libraries (library_id)); +diesel::joinable!(face_detections -> persons (person_id)); diesel::joinable!(image_exif -> libraries (library_id)); +diesel::joinable!(persons -> entities (entity_id)); diesel::joinable!(photo_insights -> libraries (library_id)); diesel::joinable!(tagged_photo -> tags (tag_id)); diesel::joinable!(video_preview_clips -> libraries (library_id)); @@ -212,10 +249,12 @@ diesel::allow_tables_to_appear_in_same_query!( entities, entity_facts, entity_photo_links, + face_detections, favorites, image_exif, libraries, location_history, + persons, photo_insights, search_history, tagged_photo, diff --git a/src/face_watch.rs b/src/face_watch.rs new file mode 100644 index 0000000..42c6128 --- /dev/null +++ b/src/face_watch.rs @@ -0,0 +1,590 @@ +//! Face-detection pass for the file watcher. +//! +//! `process_new_files` calls [`run_face_detection_pass`] after the EXIF +//! registration loop. We walk the candidates (images, not yet face-scanned, +//! not excluded by EXCLUDED_DIRS), fan out parallel detect calls to Apollo, +//! and persist the results — detected faces, `no_faces` markers when Apollo +//! found nothing, `failed` markers on permanent decode errors, no marker on +//! transient failures so the next scan retries. +//! +//! The watcher runs in a plain `std::thread`, so we build a short-lived +//! tokio runtime per pass and `block_on` a join of K detect futures. K is +//! configurable via `FACE_DETECT_CONCURRENCY` (default 8). Apollo's +//! threadpool is bounded to 1–2 workers anyway, so the runs queue +//! server-side; the client-side fan-out is purely about overlapping IO +//! (file read + JSON encode) with someone else's inference. + +use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError}; +use crate::exif; +use crate::faces::{self, FaceDao, InsertFaceDetectionInput}; +use crate::file_types; +use crate::libraries::Library; +use crate::memories::PathExcluder; +use crate::tags::TagDao; +use log::{debug, info, warn}; +use std::path::Path; +use std::sync::{Arc, Mutex}; +use tokio::sync::Semaphore; + +/// One file the watcher would like to face-scan. Built by the caller from +/// the EXIF batch (we need `content_hash` to key everything against). +#[derive(Debug, Clone)] +pub struct FaceCandidate { + pub rel_path: String, + pub content_hash: String, +} + +/// Synchronous entry point. Returns once every candidate has been +/// processed (or definitively skipped). When `face_client.is_enabled()` +/// is false this is a no-op so the watcher can call unconditionally. +pub fn run_face_detection_pass( + library: &Library, + excluded_dirs: &[String], + face_client: &FaceClient, + face_dao: Arc>>, + tag_dao: Arc>>, + candidates: Vec, +) { + if !face_client.is_enabled() { + return; + } + if candidates.is_empty() { + return; + } + + let base = Path::new(&library.root_path); + let filtered = filter_excluded(base, excluded_dirs, candidates, Some(&library.name)); + if filtered.is_empty() { + return; + } + + let concurrency: usize = std::env::var("FACE_DETECT_CONCURRENCY") + .ok() + .and_then(|s| s.parse().ok()) + .filter(|n: &usize| *n > 0) + .unwrap_or(8); + + info!( + "face_watch: running detection on {} candidates (library '{}', concurrency {})", + filtered.len(), + library.name, + concurrency + ); + + // Per-pass tokio runtime. The watcher thread isn't in any pre-existing + // async context — building one here keeps the rest of the watcher + // sync-only. Worker count is small; the parallelism we care about is + // task-level (semaphore) not thread-level. + let rt = match tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .enable_all() + .build() + { + Ok(rt) => rt, + Err(e) => { + warn!("face_watch: failed to build tokio runtime: {e}"); + return; + } + }; + + let library_id = library.id; + let library_root = library.root_path.clone(); + rt.block_on(async move { + let sem = Arc::new(Semaphore::new(concurrency)); + let mut handles = Vec::with_capacity(filtered.len()); + for cand in filtered { + let permit_sem = sem.clone(); + let face_client = face_client.clone(); + let face_dao = face_dao.clone(); + let tag_dao = tag_dao.clone(); + let library_root = library_root.clone(); + handles.push(tokio::spawn(async move { + // acquire_owned would let us drop the permit explicitly + // before await points; for a one-shot call into Apollo + // the simpler bounded acquire is enough. + let _permit = permit_sem.acquire().await.expect("face semaphore"); + process_one( + library_id, + &library_root, + cand, + &face_client, + face_dao, + tag_dao, + ) + .await; + })); + } + for h in handles { + // join; per-task panics are logged inside process_one before + // they reach here, so we don't propagate. + let _ = h.await; + } + }); +} + +async fn process_one( + library_id: i32, + library_root: &str, + cand: FaceCandidate, + face_client: &FaceClient, + face_dao: Arc>>, + tag_dao: Arc>>, +) { + let abs = Path::new(library_root).join(&cand.rel_path); + // Read the bytes off disk in a blocking-friendly task. Filesystem IO + // is sync but cheap; a small spawn_blocking would be overkill. + let bytes = match read_image_bytes_for_detect(&abs) { + Ok(b) => b, + Err(e) => { + // Don't mark — file may have been moved/renamed mid-scan; let + // the next pass try again. Future-bug check: a permanently + // unreadable file would loop forever; we accept that for v1 + // because process_new_files already prunes vanished rows on + // full scans. + warn!( + "face_watch: read failed for {} ({}): {}", + cand.rel_path, library_id, e + ); + return; + } + }; + + let meta = DetectMeta { + content_hash: cand.content_hash.clone(), + library_id, + rel_path: cand.rel_path.clone(), + orientation: None, + model_version: None, + }; + let ctx = opentelemetry::Context::current(); + + match face_client.detect(bytes, meta).await { + Ok(resp) => { + // Stage 1: persist detections, holding the dao lock only + // across synchronous DB writes. + let mut stored_for_autobind: Vec<(i32, Vec)> = Vec::new(); + { + let mut dao = face_dao.lock().expect("face dao"); + if resp.faces.is_empty() { + if let Err(e) = dao.mark_status( + &ctx, + library_id, + &cand.content_hash, + &cand.rel_path, + "no_faces", + &resp.model_version, + ) { + warn!( + "face_watch: mark no_faces failed for {}: {:?}", + cand.rel_path, e + ); + } + debug!( + "face_watch: {} → no faces (model {})", + cand.rel_path, resp.model_version + ); + } else { + let face_count = resp.faces.len(); + for face in &resp.faces { + let emb = match face.decode_embedding() { + Ok(b) => b, + Err(e) => { + warn!("face_watch: bad embedding for {}: {:?}", cand.rel_path, e); + continue; + } + }; + // Decode the f32 vector once for auto-bind comparison. + let emb_floats = faces::decode_embedding_bytes(&emb); + match dao.store_detection( + &ctx, + InsertFaceDetectionInput { + library_id, + content_hash: cand.content_hash.clone(), + rel_path: cand.rel_path.clone(), + bbox: Some((face.bbox.x, face.bbox.y, face.bbox.w, face.bbox.h)), + embedding: Some(emb), + confidence: Some(face.confidence), + source: "auto".to_string(), + person_id: None, + status: "detected".to_string(), + model_version: resp.model_version.clone(), + }, + ) { + Ok(row) => { + if let Some(floats) = emb_floats { + stored_for_autobind.push((row.id, floats)); + } + } + Err(e) => warn!( + "face_watch: store_detection failed for {}: {:?}", + cand.rel_path, e + ), + } + } + info!( + "face_watch: {} → {} face(s) ({}ms, {})", + cand.rel_path, face_count, resp.duration_ms, resp.model_version + ); + } + } + + // Stage 2: auto-bind newly-stored faces against same-named + // people-tags. Done outside the dao lock so the lookups don't + // serialize with concurrent detect tasks. + if !stored_for_autobind.is_empty() { + try_auto_bind( + &ctx, + &cand.rel_path, + &resp.model_version, + stored_for_autobind, + &tag_dao, + &face_dao, + ); + } + } + Err(FaceDetectError::Permanent(e)) => { + warn!( + "face_watch: permanent failure on {}: {} — marking failed", + cand.rel_path, e + ); + let mut dao = face_dao.lock().expect("face dao"); + // model_version is best-effort here — the engine that rejected + // the bytes may not have echoed one. Empty string is fine; this + // row is purely a "don't retry" sentinel. + if let Err(e) = dao.mark_status( + &ctx, + library_id, + &cand.content_hash, + &cand.rel_path, + "failed", + "", + ) { + warn!( + "face_watch: mark failed errored for {}: {:?}", + cand.rel_path, e + ); + } + } + Err(FaceDetectError::Transient(e)) => { + // Don't mark anything; next scan tick retries naturally. + // Demoted to debug because OOM and engine-not-ready are noisy + // and self-resolving. + debug!( + "face_watch: transient on {}: {} (will retry next pass)", + cand.rel_path, e + ); + } + Err(FaceDetectError::Disabled) => { + // Caller already checked is_enabled(); this branch is defensive. + } + } +} + +/// Auto-bind newly-detected faces to a same-named person, when a tag on the +/// photo unambiguously identifies one. Driven by `FACE_AUTOBIND_MIN_COS` +/// (default 0.4): the new face's embedding must reach this cosine +/// similarity against the L2-normalized mean of the person's existing +/// faces. The first face for a person binds unconditionally — there's +/// nothing to compare against, and the alternative ("never bind without +/// a reference") would mean bootstrap never kicks off. +/// +/// Multi-match (the photo carries tags for two different known persons) +/// is intentionally a no-op — we can't tell which face is which without +/// additional matching. Those faces stay unassigned for the cluster +/// suggester (Phase 6) to handle. +fn try_auto_bind( + ctx: &opentelemetry::Context, + rel_path: &str, + model_version: &str, + new_faces: Vec<(i32, Vec)>, // (face_id, decoded embedding) + tag_dao: &Arc>>, + face_dao: &Arc>>, +) { + // 1. Pull the photo's tags. + let tag_names: Vec = { + let mut td = tag_dao.lock().expect("tag dao"); + match td.get_tags_for_path(ctx, rel_path) { + Ok(tags) => tags.into_iter().map(|t| t.name).collect(), + Err(e) => { + warn!( + "face_watch: get_tags_for_path failed for {}: {:?}", + rel_path, e + ); + return; + } + } + }; + if tag_names.is_empty() { + return; + } + + // 2. Find tags that map to existing persons (case-insensitive). + let person_for_tag: std::collections::HashMap = { + let mut fd = face_dao.lock().expect("face dao"); + match fd.find_persons_by_names_ci(ctx, &tag_names) { + Ok(m) => m, + Err(e) => { + warn!( + "face_watch: find_persons_by_names_ci failed for {}: {:?}", + rel_path, e + ); + return; + } + } + }; + + // 3. Multi-match: ambiguous, skip. Single match: candidate person. + let unique_person_ids: std::collections::HashSet = + person_for_tag.values().copied().collect(); + if unique_person_ids.len() != 1 { + if !unique_person_ids.is_empty() { + debug!( + "face_watch: {} carries tags for {} different persons; skipping auto-bind", + rel_path, + unique_person_ids.len() + ); + } + return; + } + let person_id = *unique_person_ids.iter().next().expect("nonempty set"); + + let threshold: f32 = std::env::var("FACE_AUTOBIND_MIN_COS") + .ok() + .and_then(|s| s.parse().ok()) + .filter(|t: &f32| *t >= 0.0 && *t <= 1.0) + .unwrap_or(0.4); + + // 4. Reference embedding (if any) under the same model_version. + let reference: Option> = { + let mut fd = face_dao.lock().expect("face dao"); + match fd.person_reference_embedding(ctx, person_id, model_version) { + Ok(r) => r, + Err(e) => { + warn!( + "face_watch: person_reference_embedding failed for person {}: {:?}", + person_id, e + ); + return; + } + } + }; + + // 5. Bind each new face that meets the criterion. Hold the lock once + // for the whole batch; assign_face_to_person uses its own short + // transaction internally. + let mut fd = face_dao.lock().expect("face dao"); + for (face_id, emb) in new_faces { + let bind = match &reference { + None => { + // Person has no faces yet — first one wins so bootstrap + // can ever produce a usable reference. After this row + // commits, future faces evaluate against it. + debug!( + "face_watch: auto-binding first face {} → person {} (no reference yet)", + face_id, person_id + ); + true + } + Some(ref_vec) => { + let sim = faces::cosine_similarity(&emb, ref_vec); + if sim >= threshold { + debug!( + "face_watch: auto-binding face {} → person {} (cos={:.3} ≥ {:.3})", + face_id, person_id, sim, threshold + ); + true + } else { + debug!( + "face_watch: leaving face {} unassigned (cos={:.3} < {:.3} for person {})", + face_id, sim, threshold, person_id + ); + false + } + } + }; + if bind && let Err(e) = fd.assign_face_to_person(ctx, face_id, person_id) { + warn!( + "face_watch: assign_face_to_person failed (face={}, person={}): {:?}", + face_id, person_id, e + ); + } + } +} + +/// Drop candidates whose path matches the watcher's `EXCLUDED_DIRS` rules. +/// Pulled out for unit testing — the same `PathExcluder` /memories uses, +/// just applied at the face-detect candidate set instead of the memories +/// listing. Skip @eaDir / .thumbnails / user-defined paths before we burn +/// a detect call (and Apollo's GPU memory) on junk. Also drops anything +/// that isn't an image file — the backlog drain pulls every hashed row in +/// `image_exif`, which includes videos; sending those to Apollo just +/// produces `failed` markers and inflates the FAILED stat. +pub(crate) fn filter_excluded( + base: &Path, + excluded_dirs: &[String], + candidates: Vec, + library_name: Option<&str>, +) -> Vec { + let excluder = if excluded_dirs.is_empty() { + None + } else { + Some(PathExcluder::new(base, excluded_dirs)) + }; + candidates + .into_iter() + .filter(|c| { + let abs = base.join(&c.rel_path); + if !file_types::is_image_file(&abs) { + debug!( + "face_watch: skipping non-image path {} (library {})", + c.rel_path, + library_name.unwrap_or("") + ); + return false; + } + if let Some(ex) = excluder.as_ref() + && ex.is_excluded(&abs) + { + debug!( + "face_watch: skipping excluded path {} (library {})", + c.rel_path, + library_name.unwrap_or("") + ); + return false; + } + true + }) + .collect() +} + +/// Read image bytes for face detection. Insightface (via opencv) can't +/// decode RAW or HEIC — for those we extract the embedded JPEG preview +/// the way the thumbnail pipeline does. Plain JPEG/PNG/WebP/etc. go +/// through a direct read. +pub(crate) fn read_image_bytes_for_detect(path: &Path) -> std::io::Result> { + if file_types::needs_ffmpeg_thumbnail(path) + && let Some(preview) = exif::extract_embedded_jpeg_preview(path) + { + return Ok(preview); + } + // Plain read for everything else. RAW/HEIC files without an embedded + // preview fall through here too; Apollo will then 422 and the caller + // marks the row failed. That's fine; we tried. + std::fs::read(path) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn cand(rel_path: &str) -> FaceCandidate { + FaceCandidate { + rel_path: rel_path.to_string(), + content_hash: format!("hash-{rel_path}"), + } + } + + #[test] + fn filter_excluded_pattern_drops_dir_components() { + // A pattern matches a path *component* under base, not a substring. + // Phase 3 needs this for @eaDir / .thumbnails skipping. + let tmp = tempfile::tempdir().unwrap(); + let base = tmp.path(); + let candidates = vec![ + cand("photos/a.jpg"), // keep + cand("photos/@eaDir/SYNOPHOTO_THUMB"), // drop (component match) + cand("photos/eaDir-not-a-thing.jpg"), // keep (substring, not component) + ]; + let kept = filter_excluded(base, &["@eaDir".to_string()], candidates, Some("test")); + let kept_paths: Vec<_> = kept.iter().map(|c| c.rel_path.as_str()).collect(); + assert_eq!( + kept_paths, + vec!["photos/a.jpg", "photos/eaDir-not-a-thing.jpg"] + ); + } + + #[test] + fn filter_excluded_absolute_dir_drops_subtree() { + // Absolute (under-base) entries drop the whole subtree. + let tmp = tempfile::tempdir().unwrap(); + let base = tmp.path(); + let candidates = vec![ + cand("public/a.jpg"), + cand("private/a.jpg"), + cand("private/sub/b.jpg"), + ]; + let kept = filter_excluded(base, &["/private".to_string()], candidates, None); + let kept_paths: Vec<_> = kept.iter().map(|c| c.rel_path.as_str()).collect(); + assert_eq!(kept_paths, vec!["public/a.jpg"]); + } + + #[test] + fn filter_excluded_empty_rules_passes_all() { + // EXCLUDED_DIRS unset still lets every image through — only the + // PathExcluder is skipped, the image-extension gate still runs. + let tmp = tempfile::tempdir().unwrap(); + let base = tmp.path(); + let candidates = vec![cand("a.jpg"), cand("b.jpg")]; + let kept = filter_excluded(base, &[], candidates, None); + assert_eq!(kept.len(), 2); + } + + #[test] + fn filter_excluded_drops_videos_and_non_media() { + // Backlog drain pulls every hashed row in image_exif (videos + // included). Videos must never reach Apollo — opencv can't + // decode them, every call would 422 and write a `failed` marker. + let tmp = tempfile::tempdir().unwrap(); + let base = tmp.path(); + let candidates = vec![ + cand("photos/a.jpg"), + cand("photos/clip.mp4"), + cand("photos/clip.MOV"), + cand("photos/notes.txt"), + cand("photos/b.heic"), + ]; + let kept = filter_excluded(base, &[], candidates, Some("test")); + let kept_paths: Vec<_> = kept.iter().map(|c| c.rel_path.as_str()).collect(); + assert_eq!(kept_paths, vec!["photos/a.jpg", "photos/b.heic"]); + } + + #[test] + fn read_bytes_passes_through_for_jpeg() { + // JPEG goes through plain read — we DON'T want to lose orientation + // metadata or re-encode here; insightface's exif_transpose handles + // orientation on its end. + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("test.jpg"); + let mut buf = Vec::new(); + // Tiny 4x4 grey JPEG — encoded by image crate so we know it round-trips. + let img = image::DynamicImage::ImageRgb8(image::RgbImage::from_pixel( + 4, + 4, + image::Rgb([128, 128, 128]), + )); + img.write_to( + &mut std::io::Cursor::new(&mut buf), + image::ImageFormat::Jpeg, + ) + .unwrap(); + fs::write(&path, &buf).unwrap(); + + let read = read_image_bytes_for_detect(&path).expect("read jpeg"); + assert_eq!(read, buf, "JPEG bytes must pass through verbatim"); + } + + #[test] + fn read_bytes_falls_back_when_raw_has_no_preview() { + // A `.nef` file with non-RAW bytes won't have an embedded preview — + // the helper falls through to plain read rather than refusing. This + // matches the docstring contract; Apollo will then 422 and we'll + // mark the row as failed. + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("not_really.nef"); + fs::write(&path, b"definitely-not-a-raw-file").unwrap(); + + let read = read_image_bytes_for_detect(&path).expect("fallback read"); + assert_eq!(read, b"definitely-not-a-raw-file"); + } +} diff --git a/src/faces.rs b/src/faces.rs new file mode 100644 index 0000000..20fd700 --- /dev/null +++ b/src/faces.rs @@ -0,0 +1,3403 @@ +//! Local face recognition: data layer + HTTP surface. +//! +//! Phase 2 ships the persistence model and the manual CRUD endpoints; the +//! file-watch hook that drives automatic detection lives in `process_new_files` +//! (Phase 3) and is not registered yet. Inference is delegated to Apollo over +//! HTTP via [`crate::ai::face_client`]; this module never imports onnxruntime. +//! +//! Data model: +//! - `persons` are visual identities (the "who" of a face). +//! - `face_detections` rows are either real detections (`status='detected'`) +//! or markers (`status='no_faces' | 'failed'`). Both are keyed on +//! `content_hash` so the same JPEG in two libraries is scanned once. +//! - The `(library_id, rel_path)` pair is the *display* lookup; we resolve +//! it through `image_exif.content_hash` on every read so renames don't +//! strand face rows. +//! +//! The `FaceDao` trait abstracts persistence; `SqliteFaceDao` is the +//! production impl. The Phase 2 endpoints use it directly. A test impl +//! (in-memory) lives at the bottom of the module behind `#[cfg(test)]`. + +use crate::Claims; +use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError}; +use crate::exif; +use crate::database::schema::{face_detections, image_exif, persons}; +use crate::error::IntoHttpError; +use crate::libraries::{self, Library}; +use crate::otel::{extract_context_from_request, global_tracer, trace_db_call}; +use crate::state::AppState; +use crate::utils::normalize_path; +use crate::{ThumbnailRequest, connect}; +use actix_web::dev::{ServiceFactory, ServiceRequest}; +use actix_web::{App, HttpRequest, HttpResponse, Responder, web}; +use anyhow::{Context, anyhow}; +use chrono::Utc; +use diesel::prelude::*; +use image::GenericImageView; +use log::{info, warn}; +use opentelemetry::KeyValue; +use opentelemetry::trace::{Span, Status, TraceContextExt, Tracer}; +use serde::{Deserialize, Serialize}; +use std::ops::DerefMut; +use std::sync::{Arc, Mutex}; + +// ── Wire types ────────────────────────────────────────────────────────────── + +/// Visual identity. The optional `entity_id` bridges this person to an +/// LLM-extracted knowledge-graph entity (textual side). Persons are NOT +/// auto-bridged at creation — only when the user explicitly links them in +/// the management UI, or when bootstrap finds an exact-name match. +#[derive(Serialize, Queryable, Clone, Debug)] +pub struct Person { + pub id: i32, + pub name: String, + pub cover_face_id: Option, + pub entity_id: Option, + pub created_from_tag: bool, + pub notes: Option, + pub created_at: i64, + pub updated_at: i64, + /// True for the IGNORE / junk bucket. Hidden from the default + /// persons list, skipped by `find_persons_by_names_ci` (so a tag + /// match can never auto-bind a real face into the ignore bucket), + /// and excluded from cluster suggestions because cluster-suggest + /// already filters by `person_id IS NULL` and ignored faces have + /// a non-null person_id. + pub is_ignored: bool, +} + +#[derive(Insertable, Debug)] +#[diesel(table_name = persons)] +struct InsertPerson { + name: String, + notes: Option, + created_from_tag: bool, + is_ignored: bool, + created_at: i64, + updated_at: i64, +} + +#[derive(Serialize, Queryable, Clone, Debug)] +pub struct FaceDetectionRow { + pub id: i32, + pub library_id: i32, + pub content_hash: String, + pub rel_path: String, + pub bbox_x: Option, + pub bbox_y: Option, + pub bbox_w: Option, + pub bbox_h: Option, + /// Skip on the wire — clients call /faces/embeddings explicitly when + /// they need it. Saves ~2 KB per face on every list response. + #[serde(skip_serializing)] + pub embedding: Option>, + pub confidence: Option, + pub source: String, + pub person_id: Option, + pub status: String, + pub model_version: String, + pub created_at: i64, +} + +/// Row shape for `list_unscanned_candidates`'s raw SQL. Diesel's +/// `sql_query` requires a `QueryableByName` row type with explicit +/// column SQL types; using a tuple isn't supported. +#[derive(diesel::QueryableByName, Debug)] +struct UnscannedRow { + #[diesel(sql_type = diesel::sql_types::Text)] + rel_path: String, + #[diesel(sql_type = diesel::sql_types::Text)] + content_hash: String, +} + +#[derive(Insertable, Debug)] +#[diesel(table_name = face_detections)] +struct InsertFaceDetection { + library_id: i32, + content_hash: String, + rel_path: String, + bbox_x: Option, + bbox_y: Option, + bbox_w: Option, + bbox_h: Option, + embedding: Option>, + confidence: Option, + source: String, + person_id: Option, + status: String, + model_version: String, + created_at: i64, +} + +/// Build a [`FaceWithPerson`] from a freshly-mutated row by resolving the +/// person name via [`FaceDao::get_person`]. Used by `create_face_handler` +/// and `update_face_handler` so PATCH/POST responses match the join shape +/// `/image/faces` returns — without this the carousel overlay's +/// optimistic-replace would clobber the rendered name (the bare +/// [`FaceDetectionRow`] doesn't carry it). +fn hydrate_face_with_person( + dao: &mut D, + ctx: &opentelemetry::Context, + row: FaceDetectionRow, +) -> anyhow::Result { + let person_name = match row.person_id { + Some(pid) => dao.get_person(ctx, pid)?.map(|p| p.name), + None => None, + }; + Ok(FaceWithPerson { + id: row.id, + bbox_x: row.bbox_x.unwrap_or(0.0), + bbox_y: row.bbox_y.unwrap_or(0.0), + bbox_w: row.bbox_w.unwrap_or(0.0), + bbox_h: row.bbox_h.unwrap_or(0.0), + confidence: row.confidence.unwrap_or(0.0), + source: row.source, + person_id: row.person_id, + person_name, + model_version: row.model_version, + }) +} + +/// Face row decorated with its assigned person's name. Returned by +/// `/image/faces` for the rendering side (carousel overlay, person chips). +#[derive(Serialize, Debug, Clone)] +pub struct FaceWithPerson { + pub id: i32, + pub bbox_x: f32, + pub bbox_y: f32, + pub bbox_w: f32, + pub bbox_h: f32, + pub confidence: f32, + pub source: String, + pub person_id: Option, + pub person_name: Option, + pub model_version: String, +} + +/// Face row plus the photo it lives on. Powers the per-person photo grid +/// (`GET /persons/{id}/faces`) and unassigned-cluster surfacing in Apollo. +#[derive(Serialize, Debug, Clone)] +pub struct FaceWithPath { + pub id: i32, + pub library_id: i32, + pub rel_path: String, + pub bbox_x: f32, + pub bbox_y: f32, + pub bbox_w: f32, + pub bbox_h: f32, + pub confidence: f32, + pub person_id: Option, + pub model_version: String, +} + +/// Embedding-bearing face row. Returned by `/faces/embeddings` for Apollo's +/// clustering layer; embedding is base64-encoded so the JSON payload is +/// self-contained (Apollo's DBSCAN runs over numpy arrays decoded from this). +#[derive(Serialize, Debug, Clone)] +pub struct FaceEmbeddingRow { + pub id: i32, + pub library_id: i32, + pub rel_path: String, + pub content_hash: String, + pub person_id: Option, + pub model_version: String, + /// base64 of 2048 bytes (512×f32 LE). + pub embedding: String, + /// Normalized bbox 0..1, included so the cluster suggester UI can + /// crop a face thumbnail without an extra round-trip per cluster. + /// Shouldn't be NULL for `status='detected'` rows (CHECK constraint + /// in the migration), but the DB type is nullable so we mirror it. + pub bbox_x: Option, + pub bbox_y: Option, + pub bbox_w: Option, + pub bbox_h: Option, +} + +#[derive(Serialize, Debug, Default)] +pub struct FaceStats { + pub library_id: Option, + pub total_photos: i64, + pub scanned: i64, + pub with_faces: i64, + pub no_faces: i64, + pub failed: i64, + pub persons_count: i64, + pub unassigned_faces: i64, +} + +#[derive(Serialize, Debug, Clone)] +pub struct PersonSummary { + pub id: i32, + pub name: String, + pub cover_face_id: Option, + pub entity_id: Option, + pub created_from_tag: bool, + pub notes: Option, + pub is_ignored: bool, + pub face_count: i64, +} + +// ── Request bodies ────────────────────────────────────────────────────────── + +#[derive(Deserialize, Debug)] +pub struct CreatePersonReq { + pub name: String, + #[serde(default)] + pub notes: Option, + /// Optional bridge to an existing entity. NULL/missing leaves it + /// unbridged; set explicitly to wire the person to LLM-extracted facts. + #[serde(default)] + pub entity_id: Option, + /// True for the IGNORE / junk bucket. The frontend sets this when + /// lazily creating the Ignored person via the dedicated endpoint; + /// hand-rolled callers leave it false. + #[serde(default)] + pub is_ignored: bool, +} + +#[derive(Deserialize, Debug)] +pub struct UpdatePersonReq { + #[serde(default)] + pub name: Option, + #[serde(default)] + pub notes: Option, + #[serde(default)] + pub cover_face_id: Option, + #[serde(default)] + pub entity_id: Option, + /// Toggle the ignore flag. Mostly used by the UI to "un-ignore" a + /// person that was previously bound to the bucket. + #[serde(default)] + pub is_ignored: Option, +} + +#[derive(Deserialize, Debug)] +pub struct MergePersonsReq { + /// Person id to merge *into*. The source (`{id}` in the path) is + /// re-pointed to this id, then deleted. + pub into: i32, +} + +#[derive(Deserialize, Debug)] +pub struct DeletePersonQuery { + /// `set_null` (default) leaves face rows orphaned (person_id NULL); + /// `delete` cascades through and removes the face rows entirely. + /// Default is set_null because deleting the person almost never means + /// "delete every photo of them ever existed." + #[serde(default)] + pub cascade: Option, +} + +#[derive(Deserialize, Debug)] +pub struct CreateFaceReq { + /// Photo path (library-relative). Resolved to content_hash via + /// image_exif before any face row is inserted. + pub path: String, + pub library: Option, + pub bbox: BboxReq, + /// Optional initial person assignment. Use this when the user draws a + /// box and immediately picks a name from the autocomplete. + #[serde(default)] + pub person_id: Option, + /// Skip the embedding step. Set when the user wants to tag a region + /// the detector can't find a face in (back of head, profile partly + /// occluded, etc.). The row is stored with a zero-vector embedding, + /// which the cluster suggester filters on `norm <= 0` and auto-bind + /// cosine resolves to 0 against — so the row participates only as a + /// browse-by-person tag, not in similarity matching. The frontend + /// only sets this after a 422 from a strict create plus an explicit + /// operator confirmation. + #[serde(default)] + pub force: bool, +} + +#[derive(Deserialize, Debug)] +pub struct BboxReq { + pub x: f32, + pub y: f32, + pub w: f32, + pub h: f32, +} + +#[derive(Deserialize, Debug)] +pub struct UpdateFaceReq { + /// `null` literally clears the assignment; missing leaves it alone. + /// Distinguish via `Option>` is tricky in serde without + /// custom deserialization; encode "clear" as `clear_person: true` + /// instead. + #[serde(default)] + pub person_id: Option, + #[serde(default)] + pub clear_person: bool, + #[serde(default)] + pub bbox: Option, +} + +#[derive(Deserialize, Debug)] +pub struct EmbeddingsQuery { + pub library: Option, + /// Default true — clustering only cares about unassigned faces. Set + /// false to dump all embeddings (e.g. for re-clustering everything). + #[serde(default = "default_unassigned")] + pub unassigned: bool, + #[serde(default = "default_embeddings_limit")] + pub limit: i64, + #[serde(default)] + pub offset: i64, +} + +fn default_unassigned() -> bool { + true +} +fn default_embeddings_limit() -> i64 { + 500 +} + +// ── DAO trait ─────────────────────────────────────────────────────────────── + +// File-watch hook (Phase 3) and the rerun handler (Phase 6) consume the +// methods the Phase 2 routes don't. Allow dead_code on the trait so we +// don't have to sprinkle attributes on every method that's wired up later. +#[allow(dead_code)] +pub trait FaceDao: Send + Sync { + fn already_scanned( + &mut self, + ctx: &opentelemetry::Context, + content_hash: &str, + ) -> anyhow::Result; + /// Find image_exif rows in `library_id` that have a populated + /// content_hash but no matching face_detections row yet. Used by + /// the watcher's quick-scan path to drain the backlog without + /// re-walking the filesystem. Returns `(rel_path, content_hash)` + /// pairs, capped at `limit`. Distinct on content_hash so the same + /// hash that lives at multiple rel_paths only fires one detection. + fn list_unscanned_candidates( + &mut self, + ctx: &opentelemetry::Context, + library_id: i32, + limit: i64, + ) -> anyhow::Result>; + fn store_detection( + &mut self, + ctx: &opentelemetry::Context, + row: InsertFaceDetectionInput, + ) -> anyhow::Result; + fn mark_status( + &mut self, + ctx: &opentelemetry::Context, + library_id: i32, + content_hash: &str, + rel_path: &str, + status: &str, + model_version: &str, + ) -> anyhow::Result<()>; + fn list_for_content_hash( + &mut self, + ctx: &opentelemetry::Context, + content_hash: &str, + ) -> anyhow::Result>; + fn list_for_person( + &mut self, + ctx: &opentelemetry::Context, + person_id: i32, + library_id: Option, + ) -> anyhow::Result>; + fn list_embeddings( + &mut self, + ctx: &opentelemetry::Context, + library_id: Option, + unassigned: bool, + limit: i64, + offset: i64, + ) -> anyhow::Result>; + fn get_face( + &mut self, + ctx: &opentelemetry::Context, + id: i32, + ) -> anyhow::Result>; + fn update_face( + &mut self, + ctx: &opentelemetry::Context, + id: i32, + person_id: Option>, // None=leave; Some(None)=clear; Some(Some(id))=set + bbox: Option<(f32, f32, f32, f32)>, + embedding: Option>, + ) -> anyhow::Result; + fn delete_face(&mut self, ctx: &opentelemetry::Context, id: i32) -> anyhow::Result; + fn delete_auto_for_hash( + &mut self, + ctx: &opentelemetry::Context, + content_hash: &str, + ) -> anyhow::Result; + fn stats( + &mut self, + ctx: &opentelemetry::Context, + library_id: Option, + ) -> anyhow::Result; + + // ── Persons ───────────────────────────────────────────────────────── + fn create_person( + &mut self, + ctx: &opentelemetry::Context, + req: &CreatePersonReq, + from_tag: bool, + ) -> anyhow::Result; + fn get_person( + &mut self, + ctx: &opentelemetry::Context, + id: i32, + ) -> anyhow::Result>; + fn list_persons( + &mut self, + ctx: &opentelemetry::Context, + library_id: Option, + include_ignored: bool, + ) -> anyhow::Result>; + /// Get the IGNORE/junk bucket, creating it lazily on first call. + /// Idempotent — returns the same row across calls. Single global + /// bucket per database; the frontend never sees the literal name. + fn get_or_create_ignored_person( + &mut self, + ctx: &opentelemetry::Context, + ) -> anyhow::Result; + fn update_person( + &mut self, + ctx: &opentelemetry::Context, + id: i32, + patch: &UpdatePersonReq, + ) -> anyhow::Result; + /// Delete a person. `cascade=true` removes face rows; otherwise the + /// rows have their `person_id` set NULL by the FK constraint. + fn delete_person( + &mut self, + ctx: &opentelemetry::Context, + id: i32, + cascade_delete_faces: bool, + ) -> anyhow::Result; + fn merge_persons( + &mut self, + ctx: &opentelemetry::Context, + src: i32, + into: i32, + ) -> anyhow::Result; + + /// Resolve `(library_id, rel_path)` → `content_hash` via image_exif. + /// Returns None when the photo hasn't been EXIF-indexed yet (no row + /// in image_exif) or when the row exists but content_hash is NULL. + fn resolve_content_hash( + &mut self, + ctx: &opentelemetry::Context, + library_id: i32, + rel_path: &str, + ) -> anyhow::Result>; + + // ── Auto-bind support (Phase 4) ───────────────────────────────────── + + /// Map case-insensitive person names → person id. Used by the + /// auto-bind path to look up "is this tag a known person?". Names + /// passed in are matched LOWER(persons.name); collisions resolve to + /// the person with the lowest id (stable, but the UNIQUE constraint + /// on persons.name COLLATE NOCASE prevents collisions in practice). + fn find_persons_by_names_ci( + &mut self, + ctx: &opentelemetry::Context, + names: &[String], + ) -> anyhow::Result>; + + /// Mean of a person's existing face embeddings. Returns the L2- + /// normalized 512-d reference vector, or None when the person has + /// no detected faces yet (auto-bind treats that as "first face wins + /// unconditionally"). Filters by the same model_version that produced + /// the candidate embedding so cross-model averaging never happens. + fn person_reference_embedding( + &mut self, + ctx: &opentelemetry::Context, + person_id: i32, + model_version: &str, + ) -> anyhow::Result>>; + + /// Set face_detections.person_id and, when the target person has no + /// cover_face_id yet, set it to this face. One transaction so a + /// half-bound state can't survive a SQLite write error. + fn assign_face_to_person( + &mut self, + ctx: &opentelemetry::Context, + face_id: i32, + person_id: i32, + ) -> anyhow::Result<()>; +} + +/// Free-standing input struct; the DAO copies it into [`InsertFaceDetection`] +/// so callers don't need to import the diesel-derived insertable. +#[derive(Debug, Clone)] +pub struct InsertFaceDetectionInput { + pub library_id: i32, + pub content_hash: String, + pub rel_path: String, + pub bbox: Option<(f32, f32, f32, f32)>, + pub embedding: Option>, + pub confidence: Option, + pub source: String, + pub person_id: Option, + pub status: String, + pub model_version: String, +} + +// ── SqliteFaceDao impl ────────────────────────────────────────────────────── + +pub struct SqliteFaceDao { + connection: Arc>, +} + +impl SqliteFaceDao { + pub fn new() -> Self { + Self { + connection: Arc::new(Mutex::new(connect())), + } + } + + /// Test helper — bind to a pre-built (typically in-memory) connection. + #[cfg(test)] + pub fn from_connection(connection: Arc>) -> Self { + Self { connection } + } +} + +impl Default for SqliteFaceDao { + fn default() -> Self { + Self::new() + } +} + +impl FaceDao for SqliteFaceDao { + fn already_scanned( + &mut self, + ctx: &opentelemetry::Context, + content_hash: &str, + ) -> anyhow::Result { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "face_already_scanned", |span| { + span.set_attribute(KeyValue::new("content_hash", content_hash.to_string())); + face_detections::table + .filter(face_detections::content_hash.eq(content_hash)) + .select(face_detections::id) + .first::(conn.deref_mut()) + .optional() + .map(|x| x.is_some()) + .with_context(|| "already_scanned query") + }) + } + + fn list_unscanned_candidates( + &mut self, + ctx: &opentelemetry::Context, + library_id: i32, + limit: i64, + ) -> anyhow::Result> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "list_unscanned_candidates", |span| { + span.set_attribute(KeyValue::new("library_id", library_id as i64)); + // Pick the smallest-id rel_path per content_hash so we don't + // fire multiple detect calls for the same hash if it lives + // under several rel_paths in the same library. The + // anti-join (NOT EXISTS) drains hashes that have no row in + // face_detections at all. + let rows: Vec<(String, String)> = diesel::sql_query( + "SELECT rel_path, content_hash \ + FROM image_exif e \ + WHERE library_id = ? \ + AND content_hash IS NOT NULL \ + AND NOT EXISTS ( \ + SELECT 1 FROM face_detections f \ + WHERE f.content_hash = e.content_hash \ + ) \ + GROUP BY content_hash \ + LIMIT ?", + ) + .bind::(library_id) + .bind::(limit) + .load::(conn.deref_mut()) + .with_context(|| "list_unscanned_candidates")? + .into_iter() + .map(|r| (r.rel_path, r.content_hash)) + .collect(); + Ok(rows) + }) + } + + fn store_detection( + &mut self, + ctx: &opentelemetry::Context, + row: InsertFaceDetectionInput, + ) -> anyhow::Result { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "insert", "store_detection", |span| { + span.set_attribute(KeyValue::new("status", row.status.clone())); + span.set_attribute(KeyValue::new("source", row.source.clone())); + let now = Utc::now().timestamp(); + let (bx, by, bw, bh) = match row.bbox { + Some((x, y, w, h)) => (Some(x), Some(y), Some(w), Some(h)), + None => (None, None, None, None), + }; + let insert = InsertFaceDetection { + library_id: row.library_id, + content_hash: row.content_hash, + rel_path: row.rel_path, + bbox_x: bx, + bbox_y: by, + bbox_w: bw, + bbox_h: bh, + embedding: row.embedding, + confidence: row.confidence, + source: row.source, + person_id: row.person_id, + status: row.status, + model_version: row.model_version, + created_at: now, + }; + diesel::insert_into(face_detections::table) + .values(&insert) + .execute(conn.deref_mut()) + .with_context(|| "insert face_detection")?; + define_sql_function! { fn last_insert_rowid() -> diesel::sql_types::Integer; } + let id = diesel::select(last_insert_rowid()) + .get_result::(conn.deref_mut()) + .with_context(|| "last_insert_rowid")?; + face_detections::table + .find(id) + .first::(conn.deref_mut()) + .with_context(|| "fetch inserted face") + }) + } + + fn mark_status( + &mut self, + ctx: &opentelemetry::Context, + library_id: i32, + content_hash: &str, + rel_path: &str, + status: &str, + model_version: &str, + ) -> anyhow::Result<()> { + // Marker rows have NULL bbox + NULL embedding (CHECK enforces + // this). We let the UNIQUE partial index on (content_hash) WHERE + // status='no_faces' guard against double-marking; for 'failed' we + // do a manual exists-check. + let exists = self.already_scanned(ctx, content_hash)?; + if exists { + // Don't write a second marker if any row already exists for + // this hash — that includes detected rows from a prior run + // that succeeded; the file watcher's already_scanned() check + // should have caught this, but stay idempotent. + return Ok(()); + } + self.store_detection( + ctx, + InsertFaceDetectionInput { + library_id, + content_hash: content_hash.to_string(), + rel_path: rel_path.to_string(), + bbox: None, + embedding: None, + confidence: None, + source: "auto".to_string(), + person_id: None, + status: status.to_string(), + model_version: model_version.to_string(), + }, + )?; + Ok(()) + } + + fn list_for_content_hash( + &mut self, + ctx: &opentelemetry::Context, + content_hash: &str, + ) -> anyhow::Result> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "faces_for_hash", |span| { + span.set_attribute(KeyValue::new("content_hash", content_hash.to_string())); + face_detections::table + .left_join(persons::table.on(persons::id.nullable().eq(face_detections::person_id))) + .filter(face_detections::content_hash.eq(content_hash)) + .filter(face_detections::status.eq("detected")) + .select(( + face_detections::id, + face_detections::bbox_x, + face_detections::bbox_y, + face_detections::bbox_w, + face_detections::bbox_h, + face_detections::confidence, + face_detections::source, + face_detections::person_id, + persons::name.nullable(), + face_detections::model_version, + )) + .load::<( + i32, + Option, + Option, + Option, + Option, + Option, + String, + Option, + Option, + String, + )>(conn.deref_mut()) + .with_context(|| "list faces for hash") + .map(|rows| { + rows.into_iter() + .map(|r| FaceWithPerson { + id: r.0, + bbox_x: r.1.unwrap_or(0.0), + bbox_y: r.2.unwrap_or(0.0), + bbox_w: r.3.unwrap_or(0.0), + bbox_h: r.4.unwrap_or(0.0), + confidence: r.5.unwrap_or(0.0), + source: r.6, + person_id: r.7, + person_name: r.8, + model_version: r.9, + }) + .collect() + }) + }) + } + + fn list_for_person( + &mut self, + ctx: &opentelemetry::Context, + person_id: i32, + library_id: Option, + ) -> anyhow::Result> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "faces_for_person", |span| { + span.set_attribute(KeyValue::new("person_id", person_id as i64)); + let mut query = face_detections::table + .filter(face_detections::person_id.eq(person_id)) + .filter(face_detections::status.eq("detected")) + .into_boxed(); + if let Some(lib) = library_id { + query = query.filter(face_detections::library_id.eq(lib)); + } + query + .select(( + face_detections::id, + face_detections::library_id, + face_detections::rel_path, + face_detections::bbox_x, + face_detections::bbox_y, + face_detections::bbox_w, + face_detections::bbox_h, + face_detections::confidence, + face_detections::person_id, + face_detections::model_version, + )) + .load::<( + i32, + i32, + String, + Option, + Option, + Option, + Option, + Option, + Option, + String, + )>(conn.deref_mut()) + .with_context(|| "list faces for person") + .map(|rows| { + rows.into_iter() + .map(|r| FaceWithPath { + id: r.0, + library_id: r.1, + rel_path: r.2, + bbox_x: r.3.unwrap_or(0.0), + bbox_y: r.4.unwrap_or(0.0), + bbox_w: r.5.unwrap_or(0.0), + bbox_h: r.6.unwrap_or(0.0), + confidence: r.7.unwrap_or(0.0), + person_id: r.8, + model_version: r.9, + }) + .collect() + }) + }) + } + + fn list_embeddings( + &mut self, + ctx: &opentelemetry::Context, + library_id: Option, + unassigned: bool, + limit: i64, + offset: i64, + ) -> anyhow::Result> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "list_embeddings", |span| { + span.set_attribute(KeyValue::new("limit", limit)); + span.set_attribute(KeyValue::new("offset", offset)); + let mut query = face_detections::table + .filter(face_detections::status.eq("detected")) + .into_boxed(); + if let Some(lib) = library_id { + query = query.filter(face_detections::library_id.eq(lib)); + } + if unassigned { + query = query.filter(face_detections::person_id.is_null()); + } + let rows = query + .order(face_detections::id.asc()) + .limit(limit) + .offset(offset) + .load::(conn.deref_mut()) + .with_context(|| "list embeddings")?; + // Pair with the base64-encoded embedding string so the handler + // doesn't need to know the wire format. Skip rows with NULL + // embedding (shouldn't happen on detected rows, but defensive). + use base64::Engine; + Ok(rows + .into_iter() + .filter_map(|r| { + r.embedding.as_ref().map(|bytes| { + let b64 = base64::engine::general_purpose::STANDARD.encode(bytes); + (r.clone(), b64) + }) + }) + .collect()) + }) + } + + fn get_face( + &mut self, + ctx: &opentelemetry::Context, + id: i32, + ) -> anyhow::Result> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "get_face", |span| { + span.set_attribute(KeyValue::new("id", id as i64)); + face_detections::table + .find(id) + .first::(conn.deref_mut()) + .optional() + .with_context(|| "get_face") + }) + } + + fn update_face( + &mut self, + ctx: &opentelemetry::Context, + id: i32, + person_id: Option>, + bbox: Option<(f32, f32, f32, f32)>, + embedding: Option>, + ) -> anyhow::Result { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "update", "update_face", |span| { + span.set_attribute(KeyValue::new("id", id as i64)); + // Apply patches one at a time so each set() has the right type. + // Diesel's update DSL is type-driven and combining heterogeneous + // optional sets in one statement is awkward. + if let Some(pid) = person_id { + diesel::update(face_detections::table.find(id)) + .set(face_detections::person_id.eq(pid)) + .execute(conn.deref_mut()) + .with_context(|| "update person_id")?; + } + if let Some((x, y, w, h)) = bbox { + diesel::update(face_detections::table.find(id)) + .set(( + face_detections::bbox_x.eq(x), + face_detections::bbox_y.eq(y), + face_detections::bbox_w.eq(w), + face_detections::bbox_h.eq(h), + )) + .execute(conn.deref_mut()) + .with_context(|| "update bbox")?; + } + if let Some(emb) = embedding { + diesel::update(face_detections::table.find(id)) + .set(face_detections::embedding.eq(emb)) + .execute(conn.deref_mut()) + .with_context(|| "update embedding")?; + } + face_detections::table + .find(id) + .first::(conn.deref_mut()) + .with_context(|| "fetch updated face") + }) + } + + fn delete_face(&mut self, ctx: &opentelemetry::Context, id: i32) -> anyhow::Result { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "delete", "delete_face", |span| { + span.set_attribute(KeyValue::new("id", id as i64)); + let n = diesel::delete(face_detections::table.find(id)) + .execute(conn.deref_mut()) + .with_context(|| "delete face")?; + Ok(n > 0) + }) + } + + fn delete_auto_for_hash( + &mut self, + ctx: &opentelemetry::Context, + content_hash: &str, + ) -> anyhow::Result { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "delete", "delete_auto_for_hash", |span| { + span.set_attribute(KeyValue::new("content_hash", content_hash.to_string())); + diesel::delete( + face_detections::table + .filter(face_detections::content_hash.eq(content_hash)) + .filter(face_detections::source.eq("auto")), + ) + .execute(conn.deref_mut()) + .with_context(|| "delete auto rows") + }) + } + + fn stats( + &mut self, + ctx: &opentelemetry::Context, + library_id: Option, + ) -> anyhow::Result { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "face_stats", |span| { + if let Some(lib) = library_id { + span.set_attribute(KeyValue::new("library_id", lib as i64)); + } + // Count distinct content_hashes per status by status — one + // hash can have many rows (multiple detected faces) but we + // want it counted once. + let scanned: i64 = { + let mut q = face_detections::table.into_boxed(); + if let Some(lib) = library_id { + q = q.filter(face_detections::library_id.eq(lib)); + } + q.select(diesel::dsl::count_distinct(face_detections::content_hash)) + .first(conn.deref_mut()) + .with_context(|| "stats: scanned")? + }; + let with_faces: i64 = { + let mut q = face_detections::table + .filter(face_detections::status.eq("detected")) + .into_boxed(); + if let Some(lib) = library_id { + q = q.filter(face_detections::library_id.eq(lib)); + } + q.select(diesel::dsl::count_distinct(face_detections::content_hash)) + .first(conn.deref_mut()) + .with_context(|| "stats: with_faces")? + }; + let no_faces: i64 = { + let mut q = face_detections::table + .filter(face_detections::status.eq("no_faces")) + .into_boxed(); + if let Some(lib) = library_id { + q = q.filter(face_detections::library_id.eq(lib)); + } + q.select(diesel::dsl::count_distinct(face_detections::content_hash)) + .first(conn.deref_mut()) + .with_context(|| "stats: no_faces")? + }; + let failed: i64 = { + let mut q = face_detections::table + .filter(face_detections::status.eq("failed")) + .into_boxed(); + if let Some(lib) = library_id { + q = q.filter(face_detections::library_id.eq(lib)); + } + q.select(diesel::dsl::count_distinct(face_detections::content_hash)) + .first(conn.deref_mut()) + .with_context(|| "stats: failed")? + }; + let total_photos: i64 = { + let mut q = image_exif::table.into_boxed(); + if let Some(lib) = library_id { + q = q.filter(image_exif::library_id.eq(lib)); + } + q.select(diesel::dsl::count_star()) + .first(conn.deref_mut()) + .with_context(|| "stats: total_photos")? + }; + let persons_count: i64 = persons::table + .select(diesel::dsl::count_star()) + .first(conn.deref_mut()) + .with_context(|| "stats: persons")?; + let unassigned_faces: i64 = { + let mut q = face_detections::table + .filter(face_detections::status.eq("detected")) + .filter(face_detections::person_id.is_null()) + .into_boxed(); + if let Some(lib) = library_id { + q = q.filter(face_detections::library_id.eq(lib)); + } + q.select(diesel::dsl::count_star()) + .first(conn.deref_mut()) + .with_context(|| "stats: unassigned")? + }; + + Ok(FaceStats { + library_id, + total_photos, + scanned, + with_faces, + no_faces, + failed, + persons_count, + unassigned_faces, + }) + }) + } + + fn create_person( + &mut self, + ctx: &opentelemetry::Context, + req: &CreatePersonReq, + from_tag: bool, + ) -> anyhow::Result { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "insert", "create_person", |span| { + span.set_attribute(KeyValue::new("name", req.name.clone())); + let now = Utc::now().timestamp(); + let insert = InsertPerson { + name: req.name.clone(), + notes: req.notes.clone(), + created_from_tag: from_tag, + is_ignored: req.is_ignored, + created_at: now, + updated_at: now, + }; + diesel::insert_into(persons::table) + .values(&insert) + .execute(conn.deref_mut()) + .with_context(|| format!("insert person {}", req.name))?; + define_sql_function! { fn last_insert_rowid() -> diesel::sql_types::Integer; } + let id = diesel::select(last_insert_rowid()) + .get_result::(conn.deref_mut()) + .with_context(|| "last_insert_rowid persons")?; + // Optional entity bridge — do this as a follow-up update so + // schema's UNIQUE(name COLLATE NOCASE) can fire on insert + // before we touch entity_id. + if let Some(entity_id) = req.entity_id { + diesel::update(persons::table.find(id)) + .set(persons::entity_id.eq(entity_id)) + .execute(conn.deref_mut()) + .with_context(|| "set entity_id on new person")?; + } + persons::table + .find(id) + .first::(conn.deref_mut()) + .with_context(|| "fetch new person") + }) + } + + fn get_or_create_ignored_person( + &mut self, + ctx: &opentelemetry::Context, + ) -> anyhow::Result { + // Fast path: there's already an is_ignored row → return it. + // Slow path on first use: create one with a stable display name + // ("Ignored"). Race-safe because the UNIQUE(name COLLATE NOCASE) + // index forces only one ever to exist (we trip and look up). + { + let mut conn = self.connection.lock().expect("face dao lock"); + if let Some(p) = persons::table + .filter(persons::is_ignored.eq(true)) + .order(persons::id.asc()) + .first::(conn.deref_mut()) + .optional() + .with_context(|| "lookup ignored person")? + { + return Ok(p); + } + } + // Drop the lock before delegating to create_person — that + // method takes its own lock. + match self.create_person( + ctx, + &CreatePersonReq { + name: "Ignored".to_string(), + notes: Some( + "Bucket for strangers, false detections, and faces \ + you don't want bound to a real person." + .to_string(), + ), + entity_id: None, + is_ignored: true, + }, + /*from_tag*/ false, + ) { + Ok(p) => Ok(p), + Err(e) if is_unique_violation(&e) => { + // Race: someone else created the row. Re-read. + let mut conn = self.connection.lock().expect("face dao lock"); + persons::table + .filter(persons::is_ignored.eq(true)) + .order(persons::id.asc()) + .first::(conn.deref_mut()) + .with_context(|| "load ignored person after race") + } + Err(e) => Err(e), + } + } + + fn get_person( + &mut self, + ctx: &opentelemetry::Context, + id: i32, + ) -> anyhow::Result> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "get_person", |span| { + span.set_attribute(KeyValue::new("id", id as i64)); + persons::table + .find(id) + .first::(conn.deref_mut()) + .optional() + .with_context(|| "get_person") + }) + } + + fn list_persons( + &mut self, + ctx: &opentelemetry::Context, + library_id: Option, + include_ignored: bool, + ) -> anyhow::Result> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "list_persons", |_| { + // Two-step: load all persons, then a single grouped count + // query for face counts. Using a LEFT JOIN + GROUP BY in + // Diesel here gets noisy with the optional library filter; a + // second roundtrip is cheap and clearer. + let mut person_query = persons::table.into_boxed(); + if !include_ignored { + // Default — hide the IGNORE/junk bucket from the list. + // The frontend asks include_ignored=true explicitly when + // it needs to surface ignored persons (e.g. a "show + // ignored" toggle in the management UI). + person_query = person_query.filter(persons::is_ignored.eq(false)); + } + let person_rows: Vec = person_query + .order(persons::name.asc()) + .load::(conn.deref_mut()) + .with_context(|| "load persons")?; + + // Diesel's BoxedSelectStatement + group_by trips the trait + // resolver into recursion, so this aggregation goes through + // sql_query. The shape is small and the bind list is at most + // one parameter — readability isn't really worse than the DSL. + let counts: Vec<(i32, i64)> = { + use diesel::sql_types::*; + #[derive(QueryableByName)] + struct PersonCountRow { + #[diesel(sql_type = Integer)] + person_id: i32, + #[diesel(sql_type = BigInt)] + count: i64, + } + let sql = if library_id.is_some() { + "SELECT person_id, COUNT(*) AS count FROM face_detections \ + WHERE status='detected' AND person_id IS NOT NULL AND library_id = ? \ + GROUP BY person_id" + } else { + "SELECT person_id, COUNT(*) AS count FROM face_detections \ + WHERE status='detected' AND person_id IS NOT NULL \ + GROUP BY person_id" + }; + let mut q = diesel::sql_query(sql).into_boxed(); + if let Some(lib) = library_id { + q = q.bind::(lib); + } + q.load::(conn.deref_mut()) + .with_context(|| "person face counts")? + .into_iter() + .map(|r| (r.person_id, r.count)) + .collect() + }; + use std::collections::HashMap; + let count_map: HashMap = counts.into_iter().collect(); + + Ok(person_rows + .into_iter() + .map(|p| { + let face_count = count_map.get(&p.id).copied().unwrap_or(0); + PersonSummary { + id: p.id, + name: p.name, + cover_face_id: p.cover_face_id, + entity_id: p.entity_id, + created_from_tag: p.created_from_tag, + notes: p.notes, + is_ignored: p.is_ignored, + face_count, + } + }) + .collect()) + }) + } + + fn update_person( + &mut self, + ctx: &opentelemetry::Context, + id: i32, + patch: &UpdatePersonReq, + ) -> anyhow::Result { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "update", "update_person", |span| { + span.set_attribute(KeyValue::new("id", id as i64)); + let now = Utc::now().timestamp(); + // Apply each patched column individually for the same + // reason as update_face — heterogeneous optional sets are + // painful in Diesel's type-driven update DSL. + if let Some(name) = &patch.name { + diesel::update(persons::table.find(id)) + .set((persons::name.eq(name), persons::updated_at.eq(now))) + .execute(conn.deref_mut()) + .with_context(|| "update person name")?; + } + if let Some(notes) = &patch.notes { + diesel::update(persons::table.find(id)) + .set((persons::notes.eq(notes), persons::updated_at.eq(now))) + .execute(conn.deref_mut()) + .with_context(|| "update person notes")?; + } + if let Some(cover) = patch.cover_face_id { + diesel::update(persons::table.find(id)) + .set(( + persons::cover_face_id.eq(cover), + persons::updated_at.eq(now), + )) + .execute(conn.deref_mut()) + .with_context(|| "update person cover")?; + } + if let Some(eid) = patch.entity_id { + diesel::update(persons::table.find(id)) + .set((persons::entity_id.eq(eid), persons::updated_at.eq(now))) + .execute(conn.deref_mut()) + .with_context(|| "update person entity_id")?; + } + if let Some(flag) = patch.is_ignored { + diesel::update(persons::table.find(id)) + .set((persons::is_ignored.eq(flag), persons::updated_at.eq(now))) + .execute(conn.deref_mut()) + .with_context(|| "update person is_ignored")?; + } + persons::table + .find(id) + .first::(conn.deref_mut()) + .with_context(|| "fetch updated person") + }) + } + + fn delete_person( + &mut self, + ctx: &opentelemetry::Context, + id: i32, + cascade_delete_faces: bool, + ) -> anyhow::Result { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "delete", "delete_person", |span| { + span.set_attribute(KeyValue::new("id", id as i64)); + span.set_attribute(KeyValue::new("cascade", cascade_delete_faces)); + if cascade_delete_faces { + diesel::delete(face_detections::table.filter(face_detections::person_id.eq(id))) + .execute(conn.deref_mut()) + .with_context(|| "cascade delete faces for person")?; + } + // Always clear cover_face_id pointers that referenced this + // person's faces (otherwise the FK from persons.cover_face_id + // could hang). cover_face_id has no FK constraint in SQLite + // so this is documentation-only — the explicit nuke is on + // the face rows above. + let n = diesel::delete(persons::table.find(id)) + .execute(conn.deref_mut()) + .with_context(|| "delete person")?; + Ok(n > 0) + }) + } + + fn merge_persons( + &mut self, + ctx: &opentelemetry::Context, + src: i32, + into: i32, + ) -> anyhow::Result { + if src == into { + anyhow::bail!("cannot merge a person into itself"); + } + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "update", "merge_persons", |span| { + span.set_attribute(KeyValue::new("src", src as i64)); + span.set_attribute(KeyValue::new("into", into as i64)); + // Wrap in a transaction so a half-merged state can't survive + // a SQLite write error mid-operation. + conn.deref_mut().transaction::<_, anyhow::Error, _>(|tx| { + // Re-point face_detections. + diesel::update(face_detections::table.filter(face_detections::person_id.eq(src))) + .set(face_detections::person_id.eq(into)) + .execute(tx) + .with_context(|| "repoint faces on merge")?; + // Copy notes from src into target if the target is empty. + let src_person: Person = persons::table + .find(src) + .first(tx) + .with_context(|| "load src person for merge")?; + let into_person: Person = persons::table + .find(into) + .first(tx) + .with_context(|| "load target person for merge")?; + if into_person.notes.as_deref().unwrap_or("").is_empty() + && src_person + .notes + .as_deref() + .map(|s| !s.is_empty()) + .unwrap_or(false) + { + diesel::update(persons::table.find(into)) + .set(persons::notes.eq(src_person.notes)) + .execute(tx) + .with_context(|| "copy notes on merge")?; + } + diesel::delete(persons::table.find(src)) + .execute(tx) + .with_context(|| "delete src person on merge")?; + persons::table + .find(into) + .first::(tx) + .with_context(|| "fetch merged person") + }) + }) + } + + fn resolve_content_hash( + &mut self, + ctx: &opentelemetry::Context, + library_id: i32, + rel_path: &str, + ) -> anyhow::Result> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "resolve_content_hash", |_| { + image_exif::table + .filter(image_exif::library_id.eq(library_id)) + .filter(image_exif::rel_path.eq(rel_path)) + .select(image_exif::content_hash) + .first::>(conn.deref_mut()) + .optional() + .map(|outer| outer.and_then(|inner| inner)) + .with_context(|| "resolve content_hash") + }) + } + + fn find_persons_by_names_ci( + &mut self, + ctx: &opentelemetry::Context, + names: &[String], + ) -> anyhow::Result> { + if names.is_empty() { + return Ok(std::collections::HashMap::new()); + } + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "find_persons_by_names_ci", |span| { + span.set_attribute(KeyValue::new("count", names.len() as i64)); + // Lowercase comparison both sides. Use sql_query to keep the + // bind list dynamic without fighting Diesel's type system on + // the LOWER() function. + use diesel::sql_types::*; + let placeholders = std::iter::repeat_n("?", names.len()) + .collect::>() + .join(","); + // Filter out is_ignored persons so the auto-bind path can + // never target the IGNORE/junk bucket — even if a tag name + // happens to match it (e.g. someone tags photos as "Ignored" + // by hand). Ignore-bucket assignment is an explicit operator + // action through the dedicated endpoint, never a heuristic. + let sql = format!( + "SELECT id, LOWER(name) AS lower_name FROM persons \ + WHERE is_ignored = 0 AND LOWER(name) IN ({}) \ + ORDER BY id ASC", + placeholders + ); + #[derive(QueryableByName)] + struct Row { + #[diesel(sql_type = Integer)] + id: i32, + #[diesel(sql_type = Text)] + lower_name: String, + } + let mut q = diesel::sql_query(sql).into_boxed(); + for n in names { + q = q.bind::(n.to_lowercase()); + } + let rows = q + .load::(conn.deref_mut()) + .with_context(|| "find_persons_by_names_ci")?; + // Lowest id wins on collision (UNIQUE COLLATE NOCASE on the + // table prevents that today, but the deduplication is a + // defensive belt-and-braces). + let mut out = std::collections::HashMap::with_capacity(rows.len()); + for r in rows { + out.entry(r.lower_name).or_insert(r.id); + } + Ok(out) + }) + } + + fn person_reference_embedding( + &mut self, + ctx: &opentelemetry::Context, + person_id: i32, + model_version: &str, + ) -> anyhow::Result>> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "query", "person_reference_embedding", |span| { + span.set_attribute(KeyValue::new("person_id", person_id as i64)); + span.set_attribute(KeyValue::new("model_version", model_version.to_string())); + // Pull only the embedding bytes; we average them in Rust. A + // SQL aggregate over 512-d vectors isn't meaningfully faster + // and would tie us to a specific embedding length. + let blobs: Vec>> = face_detections::table + .filter(face_detections::person_id.eq(person_id)) + .filter(face_detections::status.eq("detected")) + .filter(face_detections::model_version.eq(model_version)) + .select(face_detections::embedding) + .load(conn.deref_mut()) + .with_context(|| "load person embeddings")?; + let vectors: Vec> = blobs + .into_iter() + .filter_map(|b| b.and_then(|bytes| decode_embedding_bytes(&bytes))) + .collect(); + if vectors.is_empty() { + return Ok(None); + } + Ok(Some(mean_normalized(&vectors))) + }) + } + + fn assign_face_to_person( + &mut self, + ctx: &opentelemetry::Context, + face_id: i32, + person_id: i32, + ) -> anyhow::Result<()> { + let mut conn = self.connection.lock().expect("face dao lock"); + trace_db_call(ctx, "update", "assign_face_to_person", |span| { + span.set_attribute(KeyValue::new("face_id", face_id as i64)); + span.set_attribute(KeyValue::new("person_id", person_id as i64)); + conn.deref_mut().transaction::<_, anyhow::Error, _>(|tx| { + diesel::update(face_detections::table.find(face_id)) + .set(face_detections::person_id.eq(person_id)) + .execute(tx) + .with_context(|| "set face person_id")?; + // If this person has no cover yet, claim this face. + // Don't overwrite an existing cover — the user may have + // hand-picked one in the UI. + let cover: Option = persons::table + .find(person_id) + .select(persons::cover_face_id) + .first::>(tx) + .with_context(|| "load person cover")?; + if cover.is_none() { + diesel::update(persons::table.find(person_id)) + .set(persons::cover_face_id.eq(face_id)) + .execute(tx) + .with_context(|| "set cover_face_id")?; + } + Ok(()) + }) + }) + } +} + +// ── Embedding helpers ─────────────────────────────────────────────────────── + +/// Decode a 2048-byte little-endian f32 BLOB into a Vec of length 512. +/// Returns None on malformed input rather than erroring — the caller treats +/// "no usable embedding" the same as "no embedding at all" (skip averaging). +pub(crate) fn decode_embedding_bytes(bytes: &[u8]) -> Option> { + if bytes.len() != 2048 { + return None; + } + let mut out = Vec::with_capacity(512); + for chunk in bytes.chunks_exact(4) { + out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])); + } + Some(out) +} + +/// Mean of L2-normalized vectors, then re-normalize. ArcFace embeddings +/// from insightface are already L2-normalized, so re-normalizing the +/// average is a one-step "average direction" operation. +fn mean_normalized(vectors: &[Vec]) -> Vec { + debug_assert!( + !vectors.is_empty(), + "mean_normalized requires non-empty input" + ); + let dim = vectors[0].len(); + let mut acc = vec![0.0f32; dim]; + for v in vectors { + debug_assert_eq!(v.len(), dim, "mismatched embedding dim"); + for (i, x) in v.iter().enumerate() { + acc[i] += *x; + } + } + let n = vectors.len() as f32; + for x in &mut acc { + *x /= n; + } + let norm = acc.iter().map(|x| x * x).sum::().sqrt(); + if norm > 0.0 { + for x in &mut acc { + *x /= norm; + } + } + acc +} + +/// Cosine similarity of two embeddings. Both must be the same length; +/// neither needs to be pre-normalized. Returns 0.0 on length mismatch +/// or zero-magnitude input rather than NaN — the auto-bind path +/// interprets 0.0 as "no useful similarity, leave unassigned". +pub(crate) fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { + if a.len() != b.len() || a.is_empty() { + return 0.0; + } + let mut dot = 0.0f32; + let mut na = 0.0f32; + let mut nb = 0.0f32; + for (x, y) in a.iter().zip(b.iter()) { + dot += x * y; + na += x * x; + nb += y * y; + } + let denom = na.sqrt() * nb.sqrt(); + if denom <= 0.0 { 0.0 } else { dot / denom } +} + +// ── Handlers ──────────────────────────────────────────────────────────────── + +pub fn add_face_services(app: App) -> App +where + T: ServiceFactory, +{ + app.service(web::resource("/faces/stats").route(web::get().to(stats_handler::))) + .service(web::resource("/faces/embeddings").route(web::get().to(embeddings_handler::))) + .service( + web::resource("/image/faces") + .route(web::get().to(list_faces_handler::)) + .route(web::post().to(create_face_handler::)), + ) + .service( + web::resource("/image/faces/{id}") + .route(web::patch().to(update_face_handler::)) + .route(web::delete().to(delete_face_handler::)), + ) + .service( + web::resource("/persons") + .route(web::get().to(list_persons_handler::)) + .route(web::post().to(create_person_handler::)), + ) + .service( + web::resource("/persons/bootstrap") + .route(web::post().to(bootstrap_persons_handler::)), + ) + .service( + web::resource("/persons/ignore-bucket") + .route(web::post().to(ignore_bucket_handler::)), + ) + .service( + web::resource("/tags/people-bootstrap-candidates") + .route(web::get().to(bootstrap_candidates_handler::)), + ) + .service( + web::resource("/persons/{id}") + .route(web::get().to(get_person_handler::)) + .route(web::patch().to(update_person_handler::)) + .route(web::delete().to(delete_person_handler::)), + ) + .service( + web::resource("/persons/{id}/merge").route(web::post().to(merge_persons_handler::)), + ) + .service( + web::resource("/persons/{id}/faces").route(web::get().to(person_faces_handler::)), + ) +} + +// ── Bootstrap (Phase 4) ───────────────────────────────────────────────────── + +#[derive(Serialize, Debug, Clone)] +pub struct BootstrapCandidate { + /// Display name — most-frequent capitalization across the case-insensitive + /// group, or simply the first one seen if it's a tie. + pub name: String, + /// Lowercased name; the stable key for grouping and the auto-bind path. + pub normalized_name: String, + /// Sum of `tagged_photo` counts across all capitalizations of this name. + pub usage_count: i64, + /// Heuristic suggestion; the UI defaults this to checked but the user + /// confirms before [`bootstrap_persons_handler`] actually creates rows. + pub looks_like_person: bool, + /// True when a `persons` row already exists for this name (any case). + /// The UI hides these — re-running bootstrap is idempotent so it's fine + /// either way, but the noise isn't worth showing. + pub already_exists: bool, +} + +#[derive(Serialize, Debug)] +pub struct BootstrapCandidatesResponse { + pub candidates: Vec, +} + +#[derive(Deserialize, Debug)] +pub struct BootstrapPersonsReq { + pub names: Vec, +} + +#[derive(Serialize, Debug)] +pub struct BootstrapPersonsResponse { + pub created: Vec, + pub skipped: Vec, +} + +#[derive(Serialize, Debug)] +pub struct BootstrapSkipped { + pub name: String, + pub reason: String, +} + +/// Hard filter for the bootstrap candidate list. Returns true if the tag +/// could plausibly be a person name; returns false to drop it from the +/// candidates entirely (not just leave looks_like_person=false). +/// +/// Rules — all required: +/// - At least 3 characters after trimming. Two-letter tags ("AB", "OK") +/// are almost always abbreviations or markers, not names. +/// - No emoji or symbol-class characters. SQL-side string sort already +/// surfaces those at the top of the tag list; filtering them keeps +/// the candidate UI focused on names rather than chart-junk. +/// - No control characters or null bytes. +pub(crate) fn is_plausible_name_token(raw: &str) -> bool { + let trimmed = raw.trim(); + if trimmed.chars().count() < 3 { + return false; + } + for c in trimmed.chars() { + // Letter / mark / decimal-digit / connector-punctuation / + // dash / apostrophe / period / whitespace are all plausible in a + // name. Anything else (emoji, symbols, math operators, arrows, + // box drawing, control codes) disqualifies the whole tag. + if c.is_alphabetic() + || c.is_whitespace() + || matches!(c, '\'' | '-' | '.' | '_' | '\u{2019}') + { + continue; + } + if c.is_ascii_digit() { + // Digits don't disqualify here — `looks_like_person` rejects + // them later, but `is_plausible_name_token` is just about + // "could this be in the candidate list at all?". A tag like + // "Sarah2" stays as a candidate (display-flagged not-a-person + // by looks_like_person) so the operator can still spot and + // confirm it manually if it's an alias. + continue; + } + return false; + } + true +} + +/// Conservative "this tag *might* be a person name" heuristic. False +/// negatives are fine — the operator confirms in the UI before any row +/// is created. False positives are also fine for the same reason; the +/// goal is just to default sensible candidates to checked. +/// +/// Rules: +/// - 1–2 whitespace-separated words +/// - Each word starts with an uppercase character +/// - No digits anywhere (rejects "Trip 2018", "2024", etc.) +/// - Single-word names not on a small denylist of common non-person +/// tags (cat, christmas, beach, ...). Two-word names skip the +/// denylist because a real two-word person name is the dominant +/// case ("Sarah Smith") and false-blocking it is worse than false- +/// accepting "Sunset Walk". +pub(crate) fn looks_like_person(raw: &str) -> bool { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return false; + } + let words: Vec<&str> = trimmed.split_whitespace().collect(); + if !(1..=2).contains(&words.len()) { + return false; + } + for w in &words { + let Some(first) = w.chars().next() else { + return false; + }; + if !first.is_uppercase() { + return false; + } + if w.chars().any(|c| c.is_ascii_digit()) { + return false; + } + } + if words.len() == 1 { + const DENY: &[&str] = &[ + // Pets / animals + "cat", + "dog", + "kitten", + "puppy", + "bird", + "fish", + "pet", + "pets", + // Events / occasions + "birthday", + "christmas", + "halloween", + "easter", + "thanksgiving", + "wedding", + "anniversary", + "vacation", + "holiday", + "party", + "trip", + "graduation", + "concert", + // Places (generic) + "home", + "work", + "beach", + "park", + "hotel", + "restaurant", + "office", + "house", + "garden", + // Subjects / styles + "food", + "sunset", + "sunrise", + "landscape", + "portrait", + "selfie", + "nature", + "flowers", + "flower", + "snow", + "rain", + "sky", + // Buckets + "untagged", + "favorites", + "favourites", + "misc", + "other", + "random", + ]; + let lower = trimmed.to_lowercase(); + if DENY.iter().any(|w| *w == lower) { + return false; + } + } + true +} + +async fn bootstrap_candidates_handler( + _: Claims, + request: HttpRequest, + face_dao: web::Data>, + tag_dao: web::Data>, +) -> impl Responder { + use std::collections::HashMap; + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("faces.bootstrap_candidates", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + // All tags + their counts. Path filter unused — bootstrap is library-wide. + let tags_with_counts = { + let mut td = tag_dao.lock().expect("tag dao lock"); + match crate::tags::TagDao::get_all_tags(&mut *td, &span_context, None) { + Ok(t) => t, + Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)), + } + }; + + // Group by lowercase name. Pick the most-frequent capitalization + // for the display name (ties broken by first-seen). Filter out + // short tags and tags carrying non-name characters (emojis, symbols) + // before grouping — they're noise no operator would tick, so showing + // them just makes the candidate list harder to scan. + struct Group { + display: String, + display_freq: i64, + total_count: i64, + } + let mut groups: HashMap = HashMap::new(); + for (count, tag) in tags_with_counts { + if !is_plausible_name_token(&tag.name) { + continue; + } + let lower = tag.name.to_lowercase(); + let g = groups.entry(lower).or_insert_with(|| Group { + display: tag.name.clone(), + display_freq: 0, + total_count: 0, + }); + g.total_count += count; + if count > g.display_freq { + g.display = tag.name.clone(); + g.display_freq = count; + } + } + + // Cross-reference against existing persons (bulk one-query lookup). + let lower_names: Vec = groups.keys().cloned().collect(); + let existing = { + let mut fd = face_dao.lock().expect("face dao lock"); + match fd.find_persons_by_names_ci(&span_context, &lower_names) { + Ok(m) => m, + Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)), + } + }; + + let mut candidates: Vec = groups + .into_iter() + .map(|(lower, g)| BootstrapCandidate { + looks_like_person: looks_like_person(&g.display), + already_exists: existing.contains_key(&lower), + name: g.display, + normalized_name: lower, + usage_count: g.total_count, + }) + .collect(); + // Sort: persons-first heuristic by descending count, then alphabetical. + // Persons-likely candidates surface near the top so the user doesn't + // scroll past dozens of "vacation"-style tags to find them. + candidates.sort_by(|a, b| { + b.looks_like_person + .cmp(&a.looks_like_person) + .then(b.usage_count.cmp(&a.usage_count)) + .then(a.normalized_name.cmp(&b.normalized_name)) + }); + + HttpResponse::Ok().json(BootstrapCandidatesResponse { candidates }) +} + +async fn bootstrap_persons_handler( + _: Claims, + request: HttpRequest, + body: web::Json, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("faces.bootstrap_persons", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + let mut created: Vec = Vec::new(); + let mut skipped: Vec = Vec::new(); + + let mut dao = face_dao.lock().expect("face dao lock"); + + // Pre-fetch the existing-name set so a duplicate request reports + // "already exists" (skipped) rather than firing N inserts that all + // 409 against the UNIQUE COLLATE NOCASE constraint. + let lower_names: Vec = body.names.iter().map(|n| n.to_lowercase()).collect(); + let existing = match dao.find_persons_by_names_ci(&span_context, &lower_names) { + Ok(m) => m, + Err(e) => return HttpResponse::InternalServerError().body(format!("{:#}", e)), + }; + + for name in &body.names { + let trimmed = name.trim(); + if trimmed.is_empty() { + skipped.push(BootstrapSkipped { + name: name.clone(), + reason: "empty name".into(), + }); + continue; + } + let lower = trimmed.to_lowercase(); + if existing.contains_key(&lower) { + skipped.push(BootstrapSkipped { + name: trimmed.to_string(), + reason: "person already exists".into(), + }); + continue; + } + match dao.create_person( + &span_context, + &CreatePersonReq { + name: trimmed.to_string(), + notes: None, + entity_id: None, + is_ignored: false, + }, + /*from_tag*/ true, + ) { + Ok(p) => created.push(p), + Err(e) => { + if is_unique_violation(&e) { + // Race with a concurrent create; treat as skipped. + skipped.push(BootstrapSkipped { + name: trimmed.to_string(), + reason: "person already exists".into(), + }); + } else { + skipped.push(BootstrapSkipped { + name: trimmed.to_string(), + reason: format!("{:#}", e), + }); + } + } + } + } + + HttpResponse::Ok().json(BootstrapPersonsResponse { created, skipped }) +} + +// ── Stats / list ──────────────────────────────────────────────────────────── + +#[derive(Deserialize)] +pub struct LibraryQuery { + pub library: Option, +} + +/// `GET /persons` query: optional library scope, optional include of +/// the IGNORE/junk bucket. The bucket is hidden by default so the +/// management UI shows only "real" persons; the persons-management +/// screen requests it explicitly when it needs to surface ignored. +#[derive(Deserialize)] +pub struct ListPersonsQuery { + pub library: Option, + #[serde(default)] + pub include_ignored: bool, +} + +async fn stats_handler( + _: Claims, + request: HttpRequest, + app_state: web::Data, + query: web::Query, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("faces.stats", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + .ok() + .flatten() + .map(|l| l.id); + let mut dao = face_dao.lock().expect("face dao lock"); + dao.stats(&span_context, library_id) + .map(|s| { + span_context.span().set_status(Status::Ok); + HttpResponse::Ok().json(s) + }) + .into_http_internal_err() +} + +async fn list_faces_handler( + _: Claims, + request: HttpRequest, + query: web::Query, + app_state: web::Data, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("faces.list", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + let normalized_path = normalize_path(&query.path); + // resolve_library_param returns Option<&Library>; clone so the result + // is owned (matching the primary_library fallback's type). + let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref()) + .ok() + .flatten() + .cloned() + .unwrap_or_else(|| app_state.primary_library().clone()); + + let mut dao = face_dao.lock().expect("face dao lock"); + let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) { + Ok(Some(h)) => h, + Ok(None) => { + // Photo not yet hashed — empty face list is a graceful answer. + // The carousel falls back to "no overlay" which is fine until + // the watcher catches up. + return HttpResponse::Ok().json(Vec::::new()); + } + Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), + }; + match dao.list_for_content_hash(&span_context, &hash) { + Ok(faces) => HttpResponse::Ok().json(faces), + Err(e) => HttpResponse::InternalServerError().body(e.to_string()), + } +} + +async fn embeddings_handler( + _: Claims, + request: HttpRequest, + query: web::Query, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("faces.embeddings", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + let limit = query.limit.clamp(1, 5_000); + let offset = query.offset.max(0); + let mut dao = face_dao.lock().expect("face dao lock"); + dao.list_embeddings( + &span_context, + query.library, + query.unassigned, + limit, + offset, + ) + .map(|rows| { + let out: Vec = rows + .into_iter() + .map(|(r, b64)| FaceEmbeddingRow { + id: r.id, + library_id: r.library_id, + rel_path: r.rel_path, + content_hash: r.content_hash, + person_id: r.person_id, + model_version: r.model_version, + embedding: b64, + bbox_x: r.bbox_x, + bbox_y: r.bbox_y, + bbox_w: r.bbox_w, + bbox_h: r.bbox_h, + }) + .collect(); + HttpResponse::Ok().json(out) + }) + .into_http_internal_err() +} + +// ── Manual face create / update / delete ──────────────────────────────────── + +async fn create_face_handler( + _: Claims, + request: HttpRequest, + body: web::Json, + app_state: web::Data, + face_client: web::Data, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("faces.create_manual", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + // The force path doesn't need Apollo at all (no embed call); the + // strict path does. Surface the disabled state only when we'd + // actually use the client. + if !body.force && !face_client.is_enabled() { + return HttpResponse::ServiceUnavailable().body("face client disabled"); + } + + let normalized_path = normalize_path(&body.path); + let library: Library = match libraries::resolve_library_param( + &app_state, + body.library.as_ref().map(|i| i.to_string()).as_deref(), + ) { + Ok(Some(lib)) => lib.clone(), + _ => app_state.primary_library().clone(), + }; + + // 1. Resolve content_hash for the photo. + let hash = { + let mut dao = face_dao.lock().expect("face dao lock"); + match dao.resolve_content_hash(&span_context, library.id, &normalized_path) { + Ok(Some(h)) => h, + Ok(None) => { + return HttpResponse::Conflict() + .body("photo not yet hashed; wait for next watcher pass"); + } + Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), + } + }; + + // 2 + 3. Crop + embed via Apollo (strict path), or skip both (force). + // + // Force is the "tag a face the detector can't see" path — back of + // head, heavily-occluded profile, etc. We store a zero-vector + // embedding under a sentinel model_version so the row participates + // only as a browse-by-person tag: clustering filters norm<=0 (see + // face_clustering._decode_b64_embedding) and auto-bind cosine + // resolves to 0 / NaN, never crossing the threshold. Cluster + // suggester also groups by model_version so this sentinel never + // mixes with real buffalo_l rows. + let (embedding_bytes, model_version, confidence) = if body.force { + info!( + "manual face (force): skipping detection for {:?} bbox=({},{},{},{})", + normalized_path, body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h + ); + (vec![0u8; 2048], "manual_no_embed".to_string(), 0.0_f32) + } else { + let abs_path = library.resolve(&normalized_path); + let crop_bytes = match crop_image_to_bbox( + &abs_path, + body.bbox.x, + body.bbox.y, + body.bbox.w, + body.bbox.h, + ) { + Ok(b) => b, + Err(e) => { + warn!("crop_image_to_bbox failed for {:?}: {:?}", abs_path, e); + return HttpResponse::BadRequest().body(format!("cannot crop photo: {}", e)); + } + }; + + let meta = DetectMeta { + content_hash: hash.clone(), + library_id: library.id, + rel_path: normalized_path.clone(), + orientation: None, + model_version: None, + }; + let detect = match face_client.embed(crop_bytes, meta).await { + Ok(r) => r, + Err(FaceDetectError::Permanent(e)) => { + return HttpResponse::UnprocessableEntity().body(format!("{}", e)); + } + Err(FaceDetectError::Transient(e)) => { + return HttpResponse::ServiceUnavailable().body(format!("{}", e)); + } + Err(FaceDetectError::Disabled) => { + return HttpResponse::ServiceUnavailable().body("face client disabled"); + } + }; + + let detected = match detect.faces.first() { + Some(f) => f.clone(), + None => { + // Apollo would have returned 422 on no_face_in_crop; defensive. + return HttpResponse::UnprocessableEntity().body("no face in crop"); + } + }; + let bytes = match detected.decode_embedding() { + Ok(b) => b, + Err(e) => { + warn!("manual face: decode embedding failed: {:?}", e); + return HttpResponse::BadGateway().body("invalid embedding from face service"); + } + }; + (bytes, detect.model_version, detected.confidence) + }; + + // 4. Insert the manual row using the bbox the user drew (NOT the + // detector's tighter box around their drawing — they get what they + // asked for; cluster matching uses the embedding which is from the + // detector's true box anyway). + let mut dao = face_dao.lock().expect("face dao lock"); + let row = match dao.store_detection( + &span_context, + InsertFaceDetectionInput { + library_id: library.id, + content_hash: hash, + rel_path: normalized_path, + bbox: Some((body.bbox.x, body.bbox.y, body.bbox.w, body.bbox.h)), + embedding: Some(embedding_bytes), + confidence: Some(confidence), + source: "manual".to_string(), + person_id: body.person_id, + status: "detected".to_string(), + model_version, + }, + ) { + Ok(r) => r, + Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), + }; + info!( + "Created manual face id={} library={} hash={} person_id={:?}", + row.id, row.library_id, row.content_hash, row.person_id + ); + match hydrate_face_with_person(&mut *dao, &span_context, row) { + Ok(joined) => HttpResponse::Created().json(joined), + Err(e) => HttpResponse::InternalServerError().body(e.to_string()), + } +} + +async fn update_face_handler( + _: Claims, + request: HttpRequest, + path: web::Path, + body: web::Json, + app_state: web::Data, + face_client: web::Data, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("faces.update", &context); + let span_context = opentelemetry::Context::current_with_span(span); + let id = path.into_inner(); + + let person_patch: Option> = if body.clear_person { + Some(None) + } else { + body.person_id.map(Some) + }; + let bbox_patch = body.bbox.as_ref().map(|b| (b.x, b.y, b.w, b.h)); + + // Bbox change → re-embed. The embedding is what auto-bind and the + // cluster suggester key on, so leaving it stale would silently + // corrupt every downstream similarity match. We crop the new bbox, + // pass it through face_client.embed, and store the fresh vector. + // Net cost: one Apollo round-trip per bbox edit (~100-500ms on + // CPU); acceptable for a manual operator action. + let mut new_embedding: Option> = None; + if let Some((bx, by, bw, bh)) = bbox_patch { + if !face_client.is_enabled() { + return HttpResponse::ServiceUnavailable() + .body("face client disabled — bbox edit requires Apollo"); + } + // Look up the current row so we know which photo to crop. + let current = { + let mut dao = face_dao.lock().expect("face dao lock"); + match dao.get_face(&span_context, id) { + Ok(Some(r)) => r, + Ok(None) => return HttpResponse::NotFound().finish(), + Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), + } + }; + let library = match app_state.library_by_id(current.library_id) { + Some(l) => l.clone(), + None => { + return HttpResponse::InternalServerError().body(format!( + "face row references unknown library_id {}", + current.library_id + )); + } + }; + let abs_path = library.resolve(¤t.rel_path); + let crop_bytes = match crop_image_to_bbox(&abs_path, bx, by, bw, bh) { + Ok(b) => b, + Err(e) => { + warn!( + "PATCH /image/faces/{}: crop failed for {:?}: {:?}", + id, abs_path, e + ); + return HttpResponse::BadRequest() + .body(format!("cannot crop new bbox: {}", e)); + } + }; + let meta = DetectMeta { + content_hash: current.content_hash.clone(), + library_id: current.library_id, + rel_path: current.rel_path.clone(), + orientation: None, + model_version: Some(current.model_version.clone()), + }; + // Soft contract on the re-embed: we'd LIKE a fresh ArcFace + // vector for the new crop, but the operator's bbox edit is + // sacred. If detection finds no face in the new region (they + // dragged the box slightly off-center, or moved it to a back- + // of-head shot they've already manually tagged), or returns a + // bad embedding, we keep the old embedding and apply the bbox + // anyway. Cost: stale embedding for that row, which slightly + // pollutes clustering for files re-detected against this + // person — accepted because dropping the user's drag is a + // worse UX. Transient failures (cuda_oom, engine unavailable) + // still 503 so the operator can retry once Apollo recovers. + match face_client.embed(crop_bytes, meta).await { + Ok(resp) => { + if let Some(face) = resp.faces.first() { + match face.decode_embedding() { + Ok(b) => new_embedding = Some(b), + Err(e) => { + warn!( + "PATCH /image/faces/{}: bad embedding from face service ({:?}); keeping old embedding, bbox still applied", + id, e + ); + } + } + } else { + info!( + "PATCH /image/faces/{}: no face detected in new bbox — keeping old embedding, bbox still applied", + id + ); + } + } + Err(FaceDetectError::Permanent(e)) => { + info!( + "PATCH /image/faces/{}: embed permanent error ({}); keeping old embedding, bbox still applied", + id, e + ); + } + Err(FaceDetectError::Transient(e)) => { + return HttpResponse::ServiceUnavailable().body(format!("{}", e)); + } + Err(FaceDetectError::Disabled) => { + return HttpResponse::ServiceUnavailable() + .body("face client disabled mid-flight"); + } + } + } + + let mut dao = face_dao.lock().expect("face dao lock"); + let row = match dao.update_face(&span_context, id, person_patch, bbox_patch, new_embedding) { + Ok(r) => r, + Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), + }; + // Hydrate person_name so the response shape matches GET /image/faces + // — the carousel overlay does an optimistic replace on this row, and + // a bare FaceDetectionRow with no person_name would visibly drop the + // VFD label off the bbox even though the assignment didn't change. + match hydrate_face_with_person(&mut *dao, &span_context, row) { + Ok(joined) => HttpResponse::Ok().json(joined), + Err(e) => HttpResponse::InternalServerError().body(e.to_string()), + } +} + +async fn delete_face_handler( + _: Claims, + request: HttpRequest, + path: web::Path, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("faces.delete", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + let mut dao = face_dao.lock().expect("face dao lock"); + match dao.delete_face(&span_context, path.into_inner()) { + Ok(true) => HttpResponse::NoContent().finish(), + Ok(false) => HttpResponse::NotFound().finish(), + Err(e) => HttpResponse::InternalServerError().body(e.to_string()), + } +} + +// ── Persons ───────────────────────────────────────────────────────────────── + +async fn list_persons_handler( + _: Claims, + request: HttpRequest, + app_state: web::Data, + query: web::Query, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("persons.list", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + .ok() + .flatten() + .map(|l| l.id); + let mut dao = face_dao.lock().expect("face dao lock"); + dao.list_persons(&span_context, library_id, query.include_ignored) + .map(|p| HttpResponse::Ok().json(p)) + .into_http_internal_err() +} + +async fn ignore_bucket_handler( + _: Claims, + request: HttpRequest, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("persons.ignore_bucket", &context); + let span_context = opentelemetry::Context::current_with_span(span); + let mut dao = face_dao.lock().expect("face dao lock"); + dao.get_or_create_ignored_person(&span_context) + .map(|p| HttpResponse::Ok().json(p)) + .into_http_internal_err() +} + +async fn create_person_handler( + _: Claims, + request: HttpRequest, + body: web::Json, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("persons.create", &context); + let span_context = opentelemetry::Context::current_with_span(span); + if body.name.trim().is_empty() { + return HttpResponse::BadRequest().body("name required"); + } + + let mut dao = face_dao.lock().expect("face dao lock"); + match dao.create_person(&span_context, &body, /*from_tag*/ false) { + Ok(p) => HttpResponse::Created().json(p), + Err(e) => { + // SQLite UNIQUE(name COLLATE NOCASE) → 409 Conflict so the UI + // can show "name already exists" without parsing. Use {:#} to + // include the source chain — anyhow's plain Display only shows + // the outermost context ("insert person ...") which hides the + // diesel "UNIQUE constraint failed" we're keying on. + if is_unique_violation(&e) { + HttpResponse::Conflict().body("person name already exists") + } else { + HttpResponse::InternalServerError().body(format!("{:#}", e)) + } + } + } +} + +async fn get_person_handler( + _: Claims, + request: HttpRequest, + path: web::Path, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("persons.get", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + let mut dao = face_dao.lock().expect("face dao lock"); + match dao.get_person(&span_context, path.into_inner()) { + Ok(Some(p)) => HttpResponse::Ok().json(p), + Ok(None) => HttpResponse::NotFound().finish(), + Err(e) => HttpResponse::InternalServerError().body(e.to_string()), + } +} + +async fn update_person_handler( + _: Claims, + request: HttpRequest, + path: web::Path, + body: web::Json, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("persons.update", &context); + let span_context = opentelemetry::Context::current_with_span(span); + let mut dao = face_dao.lock().expect("face dao lock"); + match dao.update_person(&span_context, path.into_inner(), &body) { + Ok(p) => HttpResponse::Ok().json(p), + Err(e) => { + if is_unique_violation(&e) { + HttpResponse::Conflict().body("person name already exists") + } else { + HttpResponse::InternalServerError().body(format!("{:#}", e)) + } + } + } +} + +async fn delete_person_handler( + _: Claims, + request: HttpRequest, + path: web::Path, + query: web::Query, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("persons.delete", &context); + let span_context = opentelemetry::Context::current_with_span(span); + // Default cascade=set_null — don't destroy face history just because + // the user renamed/removed the identity. + let cascade = matches!(query.cascade.as_deref(), Some("delete")); + let mut dao = face_dao.lock().expect("face dao lock"); + match dao.delete_person(&span_context, path.into_inner(), cascade) { + Ok(true) => HttpResponse::NoContent().finish(), + Ok(false) => HttpResponse::NotFound().finish(), + Err(e) => HttpResponse::InternalServerError().body(e.to_string()), + } +} + +async fn merge_persons_handler( + _: Claims, + request: HttpRequest, + path: web::Path, + body: web::Json, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("persons.merge", &context); + let span_context = opentelemetry::Context::current_with_span(span); + let src = path.into_inner(); + let mut dao = face_dao.lock().expect("face dao lock"); + match dao.merge_persons(&span_context, src, body.into) { + Ok(p) => HttpResponse::Ok().json(p), + Err(e) => { + let msg = format!("{:#}", e); + if msg.contains("itself") { + HttpResponse::BadRequest().body(msg) + } else { + HttpResponse::InternalServerError().body(msg) + } + } + } +} + +async fn person_faces_handler( + _: Claims, + request: HttpRequest, + path: web::Path, + app_state: web::Data, + query: web::Query, + face_dao: web::Data>, +) -> impl Responder { + let context = extract_context_from_request(&request); + let span = global_tracer().start_with_context("persons.faces", &context); + let span_context = opentelemetry::Context::current_with_span(span); + let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref()) + .ok() + .flatten() + .map(|l| l.id); + let mut dao = face_dao.lock().expect("face dao lock"); + dao.list_for_person(&span_context, path.into_inner(), library_id) + .map(|faces| HttpResponse::Ok().json(faces)) + .into_http_internal_err() +} + +// ── Helpers ───────────────────────────────────────────────────────────────── + +/// Crop `abs_path` to the normalized bbox and re-encode as JPEG for the +/// face service. `image::open` decodes most photo formats Apollo will see; +/// HEIC/RAW are out of scope for the manual flow (the user can't draw a +/// face on a thumbnail of a non-decodable file anyway). +fn crop_image_to_bbox( + abs_path: &std::path::Path, + nx: f32, + ny: f32, + nw: f32, + nh: f32, +) -> anyhow::Result> { + if !(0.0..=1.0).contains(&nx) || !(0.0..=1.0).contains(&ny) { + return Err(anyhow!("bbox xy out of [0,1]")); + } + if nw <= 0.0 || nh <= 0.0 || nx + nw > 1.001 || ny + nh > 1.001 { + return Err(anyhow!("bbox wh out of bounds or zero")); + } + let raw = image::open(abs_path).with_context(|| format!("open {:?}", abs_path))?; + // EXIF rotation: the bbox arrives in display space (the carousel / + // overlay are rendered post-rotation by the browser), but the + // `image` crate hands us raw pre-rotation pixels. For any phone + // photo with Orientation 6/8/etc., applying the bbox without + // rotating first lands the crop on a completely different region + // of the image — which is why manually-drawn bboxes basically + // never resolved a face on re-detection. Apply the orientation + // first, then index into the canonical-oriented dims. Photos with + // no EXIF rotation tag pay nothing (apply_orientation is a no-op). + let orientation = exif::read_orientation(abs_path).unwrap_or(1); + let img = exif::apply_orientation(raw, orientation); + let (w, h) = img.dimensions(); + let px = (nx * w as f32).round().clamp(0.0, w as f32 - 1.0) as u32; + let py = (ny * h as f32).round().clamp(0.0, h as f32 - 1.0) as u32; + let pw = ((nw * w as f32).round() as u32).min(w.saturating_sub(px)); + let ph = ((nh * h as f32).round() as u32).min(h.saturating_sub(py)); + if pw == 0 || ph == 0 { + return Err(anyhow!("crop produced zero-dim image")); + } + // Generous padding so RetinaFace has anchor-friendly context. + // Insightface internally resizes to det_size=640 (square). A + // tightly-drawn 200×250 face bbox + 10 % padding becomes ~240×300, + // which after resize fills ~95 % of the input — near the upper + // edge of RetinaFace's anchor scales, where it routinely returns + // zero detections. Padding to 50 % on each side makes the crop + // 2× the bbox dims (face occupies ~50 % of the input), where + // anchors hit cleanly. Bbox is clamped to image bounds, so + // edge-of-image bboxes just get less padding on the clipped side. + let pad_x = (pw / 2).max(1); + let pad_y = (ph / 2).max(1); + let cx = px.saturating_sub(pad_x); + let cy = py.saturating_sub(pad_y); + let cw = (pw + 2 * pad_x).min(w - cx); + let ch = (ph + 2 * pad_y).min(h - cy); + let cropped = img.crop_imm(cx, cy, cw, ch); + let mut out = std::io::Cursor::new(Vec::new()); + cropped + .write_to(&mut out, image::ImageFormat::Jpeg) + .with_context(|| "encode crop as JPEG")?; + Ok(out.into_inner()) +} + +/// Returns true if `err` (or anything in its source chain) is a SQLite +/// `UNIQUE constraint failed`. Walks the chain so callers don't have to +/// know the wrapping order — anyhow `with_context` plus diesel's own +/// error layering buries the database error two levels deep. +/// +/// String matching on `format!("{:#}", e)` would also work but is +/// fragile (locale-dependent SQLite messages, false positives like +/// "uniquely identifies"). Downcasting to the actual diesel kind is +/// the contract-stable check. +fn is_unique_violation(err: &anyhow::Error) -> bool { + use diesel::result::{DatabaseErrorKind, Error as DieselError}; + err.chain().any(|cause| { + cause.downcast_ref::().is_some_and(|de| { + matches!( + de, + DieselError::DatabaseError(DatabaseErrorKind::UniqueViolation, _) + ) + }) + }) +} + +// ── Tests ─────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use crate::database::test::in_memory_db_connection; + + fn fresh_dao() -> SqliteFaceDao { + SqliteFaceDao::from_connection(Arc::new(Mutex::new(in_memory_db_connection()))) + } + + fn ctx() -> opentelemetry::Context { + opentelemetry::Context::current() + } + + #[test] + fn is_unique_violation_walks_chain() { + // The bug we hit in manual testing: anyhow's plain Display only + // shows the outermost context ("insert person Cameron"), so a + // naive `format!("{}", e).contains("unique")` check misses the + // diesel UNIQUE error nested below. Downcasting the source chain + // is the stable contract. + let mut dao = fresh_dao(); + let _ = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Cameron".into(), + notes: None, + entity_id: None, + is_ignored: false, + }, + false, + ) + .expect("first insert"); + let dup_err = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Cameron".into(), + notes: None, + entity_id: None, + is_ignored: false, + }, + false, + ) + .expect_err("second insert must fail"); + + // Plain Display hides the UNIQUE — that's the bug we're guarding + // against. We don't assert a specific outer message; we just + // confirm string-matching at the top level is unreliable. + let plain = format!("{}", dup_err); + assert!( + !plain.to_lowercase().contains("unique"), + "if Display starts surfacing UNIQUE we can drop the helper, but \ + today it doesn't and the handler must downcast" + ); + + // Alt-Display walks the chain — useful for debug body content too. + let chained = format!("{:#}", dup_err); + assert!( + chained.to_uppercase().contains("UNIQUE"), + "chained display must surface the diesel error: {chained}" + ); + + // The contract-stable check the handler actually uses. + assert!( + is_unique_violation(&dup_err), + "is_unique_violation must downcast into the diesel chain" + ); + } + + // ── Phase 4: bootstrap heuristic + cosine + DAO support ───────────── + + #[test] + fn is_plausible_name_token_filters_short_and_emoji() { + // Hard filter applied before grouping — emojis and tags shorter + // than 3 chars never make it into the candidate list, regardless + // of looks_like_person's later assessment. + assert!(is_plausible_name_token("Cameron")); + assert!(is_plausible_name_token("Sarah Smith")); + assert!(is_plausible_name_token("O'Brien")); + assert!(is_plausible_name_token("Jean-Luc")); + assert!(is_plausible_name_token("St. James")); + assert!(is_plausible_name_token("Renée")); + assert!(is_plausible_name_token("José")); + // Asian script names — the alphabetic/letter check covers any + // script, not just Latin. + assert!(is_plausible_name_token("田中太郎")); + + // Below the 3-character floor. + assert!(!is_plausible_name_token("")); + assert!(!is_plausible_name_token(" ")); + assert!(!is_plausible_name_token("Bo")); + assert!(!is_plausible_name_token("AB")); + // Trim before counting — surrounding whitespace doesn't count. + assert!(!is_plausible_name_token(" AB ")); + + // Emoji / symbol classes get the whole tag dropped. + assert!(!is_plausible_name_token("🐱cat")); + assert!(!is_plausible_name_token("Heart ❤")); + assert!(!is_plausible_name_token("📸Photo")); + assert!(!is_plausible_name_token("→ Trip")); + assert!(!is_plausible_name_token("★Vacation")); + + // Digits are kept (handled by looks_like_person, not here). + assert!(is_plausible_name_token("Trip 2018")); + assert!(is_plausible_name_token("2024")); + } + + #[test] + fn looks_like_person_accepts_typical_names() { + assert!(looks_like_person("Cameron")); + assert!(looks_like_person("Sarah Smith")); + assert!(looks_like_person("Mary Jane")); + // Non-ASCII title-cased single word still counts. + assert!(looks_like_person("Renée")); + } + + #[test] + fn looks_like_person_rejects_obvious_non_people() { + // Digits, lowercase, three-or-more words, denylist hits. + assert!(!looks_like_person("2018")); + assert!(!looks_like_person("Trip 2018")); + assert!(!looks_like_person("trip")); + assert!(!looks_like_person("Birthday Party Cake")); + assert!(!looks_like_person("cat")); + assert!(!looks_like_person("Cat")); // denied even when title-cased + assert!(!looks_like_person("Christmas")); + assert!(!looks_like_person("home")); + assert!(!looks_like_person("")); + assert!(!looks_like_person(" ")); + } + + #[test] + fn looks_like_person_two_words_skips_denylist() { + // Two-word names get a pass on the single-word denylist — + // "Sunset Walk" is much more likely a real album than a person, + // but false-accepting is fine because the operator confirms. + // What matters is we don't false-reject "Sarah Smith". + assert!(looks_like_person("Sunset Walk")); + assert!(looks_like_person("Sarah Smith")); + } + + #[test] + fn cosine_similarity_known_vectors() { + // Identical vectors → 1.0; orthogonal → 0.0; opposite → -1.0. + let a = vec![1.0, 0.0, 0.0]; + let b = vec![1.0, 0.0, 0.0]; + let c = vec![0.0, 1.0, 0.0]; + let d = vec![-1.0, 0.0, 0.0]; + assert!((cosine_similarity(&a, &b) - 1.0).abs() < 1e-6); + assert!(cosine_similarity(&a, &c).abs() < 1e-6); + assert!((cosine_similarity(&a, &d) - (-1.0)).abs() < 1e-6); + // Mismatched length → 0.0 (defensive, not NaN). + assert_eq!(cosine_similarity(&a, &[1.0, 0.0]), 0.0); + // Empty input → 0.0. + assert_eq!(cosine_similarity(&[], &[]), 0.0); + // Zero vector → 0.0 (denominator guard, not NaN). + let zero = vec![0.0, 0.0, 0.0]; + assert_eq!(cosine_similarity(&a, &zero), 0.0); + } + + #[test] + fn decode_embedding_bytes_round_trip() { + // 512×f32 LE = 2048 bytes. Anything else returns None. + let v: Vec = (0..512).map(|i| i as f32 * 0.001).collect(); + let mut bytes = Vec::with_capacity(2048); + for f in &v { + bytes.extend_from_slice(&f.to_le_bytes()); + } + let decoded = decode_embedding_bytes(&bytes).expect("decode"); + assert_eq!(decoded.len(), 512); + for (a, b) in v.iter().zip(decoded.iter()) { + assert!((a - b).abs() < 1e-9); + } + assert_eq!(decode_embedding_bytes(&[0u8; 100]), None); + assert_eq!(decode_embedding_bytes(&[0u8; 4096]), None); + } + + #[test] + fn find_persons_by_names_ci_groups_case() { + let mut dao = fresh_dao(); + let _ = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Alice".into(), + notes: None, + entity_id: None, + is_ignored: false, + }, + false, + ) + .unwrap(); + let _ = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Bob".into(), + notes: None, + entity_id: None, + is_ignored: false, + }, + false, + ) + .unwrap(); + + // Mix of cases + a name that has no person row. + let m = dao + .find_persons_by_names_ci(&ctx(), &["alice".into(), "BOB".into(), "charlie".into()]) + .expect("lookup"); + assert!(m.contains_key("alice")); + assert!(m.contains_key("bob")); + assert!(!m.contains_key("charlie")); + // Empty input is a no-op (don't fire a SQL with zero binds). + assert!( + dao.find_persons_by_names_ci(&ctx(), &[]) + .unwrap() + .is_empty() + ); + } + + #[test] + fn person_reference_embedding_filters_by_model_version() { + // A person with embeddings from buffalo_l shouldn't have its + // reference contaminated by a future buffalo_xl row. The auto- + // bind path passes the candidate's model_version so old rows + // never reach the average. + let mut dao = fresh_dao(); + diesel::sql_query( + "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \ + VALUES (1, 'main', '/tmp', 0)", + ) + .execute(dao.connection.lock().unwrap().deref_mut()) + .expect("seed libraries"); + let p = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Subject".into(), + notes: None, + entity_id: None, + is_ignored: false, + }, + false, + ) + .unwrap(); + + // 512-d unit vector along axis 0, written for buffalo_l. + let mut emb_l: Vec = vec![0.0; 512]; + emb_l[0] = 1.0; + let mut emb_l_bytes = Vec::with_capacity(2048); + for f in &emb_l { + emb_l_bytes.extend_from_slice(&f.to_le_bytes()); + } + // 512-d unit vector along axis 1, written for some-other model. + let mut emb_xl: Vec = vec![0.0; 512]; + emb_xl[1] = 1.0; + let mut emb_xl_bytes = Vec::with_capacity(2048); + for f in &emb_xl { + emb_xl_bytes.extend_from_slice(&f.to_le_bytes()); + } + + for (bytes, mv) in [(emb_l_bytes, "buffalo_l"), (emb_xl_bytes, "buffalo_xl")] { + let _ = dao + .store_detection( + &ctx(), + InsertFaceDetectionInput { + library_id: 1, + content_hash: format!("h-{mv}"), + rel_path: format!("p-{mv}.jpg"), + bbox: Some((0.1, 0.1, 0.2, 0.2)), + embedding: Some(bytes), + confidence: Some(0.9), + source: "auto".into(), + person_id: Some(p.id), + status: "detected".into(), + model_version: mv.into(), + }, + ) + .unwrap(); + } + + let ref_l = dao + .person_reference_embedding(&ctx(), p.id, "buffalo_l") + .unwrap() + .expect("buffalo_l ref"); + // Reference for buffalo_l should match emb_l (axis-0 unit). + assert!((ref_l[0] - 1.0).abs() < 1e-5, "axis 0 should be ~1.0"); + assert!(ref_l[1].abs() < 1e-5, "axis 1 should be ~0.0"); + + // Unknown model_version → None, not a cross-version average. + assert!( + dao.person_reference_embedding(&ctx(), p.id, "buffalo_xxxl") + .unwrap() + .is_none() + ); + } + + #[test] + fn assign_face_to_person_sets_cover_when_unset() { + let mut dao = fresh_dao(); + diesel::sql_query( + "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \ + VALUES (1, 'main', '/tmp', 0)", + ) + .execute(dao.connection.lock().unwrap().deref_mut()) + .expect("seed libraries"); + let p = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Cover".into(), + notes: None, + entity_id: None, + is_ignored: false, + }, + false, + ) + .unwrap(); + assert!(p.cover_face_id.is_none()); + + // Insert two faces unbound. + let face1 = dao + .store_detection( + &ctx(), + InsertFaceDetectionInput { + library_id: 1, + content_hash: "h1".into(), + rel_path: "p1.jpg".into(), + bbox: Some((0.1, 0.1, 0.2, 0.2)), + embedding: Some(vec![0u8; 2048]), + confidence: Some(0.9), + source: "auto".into(), + person_id: None, + status: "detected".into(), + model_version: "buffalo_l".into(), + }, + ) + .unwrap(); + let face2 = dao + .store_detection( + &ctx(), + InsertFaceDetectionInput { + library_id: 1, + content_hash: "h2".into(), + rel_path: "p2.jpg".into(), + bbox: Some((0.1, 0.1, 0.2, 0.2)), + embedding: Some(vec![0u8; 2048]), + confidence: Some(0.9), + source: "auto".into(), + person_id: None, + status: "detected".into(), + model_version: "buffalo_l".into(), + }, + ) + .unwrap(); + + // First assignment claims the cover. + dao.assign_face_to_person(&ctx(), face1.id, p.id).unwrap(); + let p_after_first = dao.get_person(&ctx(), p.id).unwrap().unwrap(); + assert_eq!(p_after_first.cover_face_id, Some(face1.id)); + + // Second assignment must NOT overwrite — operator may have + // hand-picked the cover after the first auto-bind. + dao.assign_face_to_person(&ctx(), face2.id, p.id).unwrap(); + let p_after_second = dao.get_person(&ctx(), p.id).unwrap().unwrap(); + assert_eq!( + p_after_second.cover_face_id, + Some(face1.id), + "cover must remain face1 after second auto-bind" + ); + } + + #[test] + fn person_crud_roundtrip() { + let mut dao = fresh_dao(); + let p = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Alice".into(), + notes: Some("the boss".into()), + entity_id: None, + is_ignored: false, + }, + false, + ) + .expect("create person"); + assert_eq!(p.name, "Alice"); + assert_eq!(p.notes.as_deref(), Some("the boss")); + assert!(!p.created_from_tag); + + // Case-insensitive uniqueness — second create with same name in + // different case must fail with a UNIQUE violation, surfacing + // as 409 Conflict at the handler layer. + let dup = dao.create_person( + &ctx(), + &CreatePersonReq { + name: "alice".into(), + notes: None, + entity_id: None, + is_ignored: false, + }, + false, + ); + assert!(dup.is_err(), "case-insensitive UNIQUE must reject 'alice'"); + + // Update notes; verify updated_at moves forward. + let prev_updated = p.updated_at; + std::thread::sleep(std::time::Duration::from_millis(1100)); // boundary cross + let updated = dao + .update_person( + &ctx(), + p.id, + &UpdatePersonReq { + name: None, + notes: Some("a new note".into()), + cover_face_id: None, + entity_id: None, + is_ignored: None, + }, + ) + .expect("update"); + assert_eq!(updated.notes.as_deref(), Some("a new note")); + assert!(updated.updated_at >= prev_updated); + + // List + delete. + let listed = dao.list_persons(&ctx(), None, false).expect("list"); + assert_eq!(listed.len(), 1); + assert_eq!(listed[0].face_count, 0); + assert!(dao.delete_person(&ctx(), p.id, false).expect("delete")); + assert!( + dao.list_persons(&ctx(), None, false) + .expect("list") + .is_empty() + ); + } + + #[test] + fn ignore_bucket_idempotent_and_filters_auto_bind() { + // First call creates the bucket; second returns the same row. + // Once it exists, find_persons_by_names_ci must skip it even if + // the search term matches its name — the auto-bind path must + // NEVER target the IGNORE/junk bucket. + let mut dao = fresh_dao(); + let first = dao + .get_or_create_ignored_person(&ctx()) + .expect("create bucket"); + assert!(first.is_ignored); + let second = dao + .get_or_create_ignored_person(&ctx()) + .expect("re-fetch bucket"); + assert_eq!(first.id, second.id, "bucket must be idempotent"); + + // Searching by the bucket's name must return nothing — the + // auto-bind look-up filters is_ignored=true. + let m = dao + .find_persons_by_names_ci(&ctx(), &["ignored".into()]) + .expect("name lookup"); + assert!( + !m.contains_key("ignored"), + "find_persons_by_names_ci must skip the ignore bucket: {m:?}" + ); + + // Default list_persons hides it; include_ignored=true surfaces it. + let visible = dao.list_persons(&ctx(), None, false).expect("list"); + assert!(visible.iter().all(|p| !p.is_ignored)); + let all = dao.list_persons(&ctx(), None, true).expect("list all"); + assert!(all.iter().any(|p| p.is_ignored && p.id == first.id)); + } + + #[test] + fn marker_rows_idempotent() { + let mut dao = fresh_dao(); + // Need a libraries row to satisfy face_detections.library_id FK + // without DEFERRED — SQLite enforces FK immediately by default. + // The :memory: DB already has the libraries seed via + // seed_or_patch_from_env? No — in_memory_db_connection just runs + // migrations; the libraries seed is a runtime path. Insert one + // manually for the test. + // Migrations may seed libraries(id=1); INSERT OR IGNORE keeps the + // test runnable either way. + diesel::sql_query( + "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \ + VALUES (1, 'main', '/tmp', 0)", + ) + .execute(dao.connection.lock().unwrap().deref_mut()) + .expect("seed libraries"); + + // Marker insert. + dao.mark_status(&ctx(), 1, "abc123", "x.jpg", "no_faces", "buffalo_l") + .expect("first mark"); + assert!( + dao.already_scanned(&ctx(), "abc123").expect("scan"), + "already_scanned should report true after marker" + ); + + // Second mark for the same hash is a no-op (the partial UNIQUE + // index would otherwise reject; the DAO short-circuits before the + // insert). + dao.mark_status(&ctx(), 1, "abc123", "x.jpg", "no_faces", "buffalo_l") + .expect("second mark idempotent"); + + // Stats reflect the no_faces marker. + let stats = dao.stats(&ctx(), Some(1)).expect("stats"); + assert_eq!(stats.no_faces, 1); + assert_eq!(stats.scanned, 1); + assert_eq!(stats.with_faces, 0); + } + + #[test] + fn merge_persons_repoints_faces() { + let mut dao = fresh_dao(); + // Migrations may seed libraries(id=1); INSERT OR IGNORE keeps the + // test runnable either way. + diesel::sql_query( + "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \ + VALUES (1, 'main', '/tmp', 0)", + ) + .execute(dao.connection.lock().unwrap().deref_mut()) + .expect("seed libraries"); + + let alice = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Alice".into(), + notes: None, + entity_id: None, + is_ignored: false, + }, + false, + ) + .unwrap(); + let alyse = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Alyse".into(), + notes: Some("dup of alice".into()), + entity_id: None, + is_ignored: false, + }, + false, + ) + .unwrap(); + + // Insert a detected face row owned by `alyse`. + let _ = dao + .store_detection( + &ctx(), + InsertFaceDetectionInput { + library_id: 1, + content_hash: "h1".into(), + rel_path: "p1.jpg".into(), + bbox: Some((0.1, 0.1, 0.2, 0.2)), + embedding: Some(vec![0u8; 2048]), + confidence: Some(0.9), + source: "auto".into(), + person_id: Some(alyse.id), + status: "detected".into(), + model_version: "buffalo_l".into(), + }, + ) + .unwrap(); + + // Merge alyse → alice. Notes from src copy when target empty. + let merged = dao.merge_persons(&ctx(), alyse.id, alice.id).unwrap(); + assert_eq!(merged.id, alice.id); + assert_eq!(merged.notes.as_deref(), Some("dup of alice")); + + // alyse is gone. + assert!(dao.get_person(&ctx(), alyse.id).unwrap().is_none()); + + // The face is now alice's. + let faces = dao.list_for_person(&ctx(), alice.id, Some(1)).unwrap(); + assert_eq!(faces.len(), 1); + assert_eq!(faces[0].person_id, Some(alice.id)); + } + + // ── crop_image_to_bbox ────────────────────────────────────────────── + // Pure helper used by the manual face-create handler. Generate a tiny + // image in memory, write it to a temp file, then exercise the bbox + // validation + crop math. + + fn write_solid_image(w: u32, h: u32) -> tempfile::NamedTempFile { + let mut img = image::RgbImage::new(w, h); + for p in img.pixels_mut() { + *p = image::Rgb([200, 200, 200]); + } + let f = tempfile::Builder::new() + .suffix(".jpg") + .tempfile() + .expect("tempfile"); + image::DynamicImage::ImageRgb8(img) + .save(f.path()) + .expect("save jpg"); + f + } + + #[test] + fn crop_rejects_invalid_bbox() { + let f = write_solid_image(64, 64); + // x out of [0,1] + assert!(crop_image_to_bbox(f.path(), -0.1, 0.0, 0.5, 0.5).is_err()); + assert!(crop_image_to_bbox(f.path(), 1.5, 0.0, 0.5, 0.5).is_err()); + // zero / negative dimensions + assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.0, 0.5).is_err()); + assert!(crop_image_to_bbox(f.path(), 0.0, 0.0, 0.5, -0.1).is_err()); + // overflows the image + assert!(crop_image_to_bbox(f.path(), 0.7, 0.0, 0.5, 0.5).is_err()); + } + + #[test] + fn crop_returns_decodable_jpeg() { + let f = write_solid_image(200, 200); + let bytes = crop_image_to_bbox(f.path(), 0.25, 0.25, 0.5, 0.5).expect("center crop"); + // Re-decode to confirm the pipeline produced a valid JPEG. Exact + // dimensions depend on the 10% padding clamp, so just assert + // sanity bounds rather than pinning numbers (padding math can + // legitimately drift if we tweak the heuristic later). + let img = image::load_from_memory(&bytes).expect("decode crop"); + let (w, h) = (img.width(), img.height()); + assert!((80..=200).contains(&w), "unexpected crop width: {w}"); + assert!((80..=200).contains(&h), "unexpected crop height: {h}"); + } + + #[test] + fn crop_padding_clamps_to_image_bounds() { + // A bbox right at the corner should pad inward as far as it can, + // never outside the image — otherwise we'd pass invalid coords + // to the embedding service. + let f = write_solid_image(100, 100); + let bytes = crop_image_to_bbox(f.path(), 0.9, 0.9, 0.1, 0.1).expect("corner crop"); + let img = image::load_from_memory(&bytes).expect("decode corner crop"); + // Padded crop must fit within the source's 100x100. + assert!(img.width() <= 100); + assert!(img.height() <= 100); + assert!(img.width() > 0 && img.height() > 0); + } + + // ── hydrate_face_with_person — PATCH/POST /image/faces response shape ── + + fn seed_library_and_face(dao: &mut SqliteFaceDao, person_id: Option) -> FaceDetectionRow { + diesel::sql_query( + "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \ + VALUES (1, 'main', '/tmp', 0)", + ) + .execute(dao.connection.lock().unwrap().deref_mut()) + .expect("seed libraries"); + dao.store_detection( + &ctx(), + InsertFaceDetectionInput { + library_id: 1, + content_hash: "h-hydrate".into(), + rel_path: "p.jpg".into(), + bbox: Some((0.1, 0.2, 0.3, 0.4)), + embedding: Some(vec![0u8; 2048]), + confidence: Some(0.9), + source: "manual".into(), + person_id, + status: "detected".into(), + model_version: "buffalo_l".into(), + }, + ) + .unwrap() + } + + #[test] + fn hydrate_face_carries_person_name_when_assigned() { + // Regression guard for the bug where PATCH /image/faces/{id} + // returned a bare FaceDetectionRow (no person_name), causing + // the carousel overlay's optimistic replace to drop the VFD + // label off the bbox after every save. The handler hydrates + // via this helper; if anyone refactors the helper to skip the + // persons join, this test fails. + let mut dao = fresh_dao(); + let p = dao + .create_person( + &ctx(), + &CreatePersonReq { + name: "Alice".into(), + notes: None, + entity_id: None, + is_ignored: false, + }, + false, + ) + .unwrap(); + let row = seed_library_and_face(&mut dao, Some(p.id)); + let joined = + hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate assigned"); + assert_eq!(joined.person_id, Some(p.id)); + assert_eq!(joined.person_name.as_deref(), Some("Alice")); + // Bbox + confidence + source must round-trip — these are what + // the optimistic-replace also keys on. + assert!((joined.bbox_x - 0.1).abs() < 1e-6); + assert!((joined.bbox_y - 0.2).abs() < 1e-6); + assert!((joined.bbox_w - 0.3).abs() < 1e-6); + assert!((joined.bbox_h - 0.4).abs() < 1e-6); + assert_eq!(joined.source, "manual"); + } + + #[test] + fn hydrate_face_leaves_person_name_null_when_unassigned() { + // Mirror branch: an unassigned face must hydrate cleanly with + // person_name = None, not a stale value left over from a + // previously-assigned row's serialization. + let mut dao = fresh_dao(); + let row = seed_library_and_face(&mut dao, None); + let joined = + hydrate_face_with_person(&mut dao, &ctx(), row).expect("hydrate unassigned"); + assert!(joined.person_id.is_none()); + assert!(joined.person_name.is_none()); + } + + #[test] + fn list_unscanned_candidates_filters_to_hashed_unscanned_in_library() { + // The watcher's per-tick backlog drain depends on this query + // returning *only* image_exif rows with a populated + // content_hash and no matching face_detections row in the + // requested library. A regression here would either silently + // re-scan files (waste of inference) or skip files that need + // scanning (the symptom we just shipped a fix for). + let mut dao = fresh_dao(); + diesel::sql_query( + "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \ + VALUES (1, 'main', '/tmp', 0), (2, 'other', '/tmp2', 0)", + ) + .execute(dao.connection.lock().unwrap().deref_mut()) + .expect("seed libraries"); + + // Seed image_exif: mix of hashed/unhashed/scanned/cross-library. + diesel::sql_query( + "INSERT INTO image_exif \ + (library_id, rel_path, content_hash, created_time, last_modified) VALUES \ + (1, 'a.jpg', 'h-a', 0, 0), \ + (1, 'b.jpg', 'h-b', 0, 0), \ + (1, 'c.jpg', NULL, 0, 0), \ + (1, 'd.jpg', 'h-d', 0, 0), \ + (2, 'e.jpg', 'h-e', 0, 0)", + ) + .execute(dao.connection.lock().unwrap().deref_mut()) + .expect("seed image_exif"); + + // 'b' has been scanned (no_faces marker) — expect it filtered out. + dao.mark_status(&ctx(), 1, "h-b", "b.jpg", "no_faces", "buffalo_l") + .expect("scanned marker"); + + let cands = dao + .list_unscanned_candidates(&ctx(), 1, 10) + .expect("list unscanned"); + + let hashes: std::collections::HashSet<_> = + cands.iter().map(|(_, h)| h.clone()).collect(); + + // Should contain a and d (hashed, unscanned, library 1). + assert!(hashes.contains("h-a"), "missing h-a: {:?}", hashes); + assert!(hashes.contains("h-d"), "missing h-d: {:?}", hashes); + // Should NOT contain b (scanned), c (no hash), e (other library). + assert!(!hashes.contains("h-b"), "expected h-b filtered (scanned)"); + assert!(!hashes.contains("h-e"), "expected h-e filtered (other library)"); + assert_eq!(cands.len(), 2, "unexpected candidates: {:?}", cands); + } + +} diff --git a/src/files.rs b/src/files.rs index b4458c9..9ab1468 100644 --- a/src/files.rs +++ b/src/files.rs @@ -1659,6 +1659,14 @@ mod tests { Ok(vec![]) } + fn get_rel_paths_for_hashes( + &mut self, + _context: &opentelemetry::Context, + _hashes: &[String], + ) -> Result>, DbError> { + Ok(std::collections::HashMap::new()) + } + fn list_rel_paths_for_libraries( &mut self, _context: &opentelemetry::Context, diff --git a/src/lib.rs b/src/lib.rs index e6d2cc1..12de818 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,8 @@ pub mod data; pub mod database; pub mod error; pub mod exif; +pub mod face_watch; +pub mod faces; pub mod file_types; pub mod files; pub mod geo; diff --git a/src/main.rs b/src/main.rs index ccdb14b..3e85cbd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -66,6 +66,8 @@ mod data; mod database; mod error; mod exif; +mod face_watch; +mod faces; mod file_types; mod files; mod geo; @@ -1459,6 +1461,8 @@ fn main() -> std::io::Result<()> { app_state.libraries.clone(), playlist_mgr_for_watcher, preview_gen_for_watcher, + app_state.face_client.clone(), + app_state.excluded_dirs.clone(), ); // Start orphaned playlist cleanup job @@ -1518,6 +1522,7 @@ fn main() -> std::io::Result<()> { let exif_dao = SqliteExifDao::new(); let insight_dao = SqliteInsightDao::new(); let preview_dao = SqlitePreviewDao::new(); + let face_dao = faces::SqliteFaceDao::new(); let cors = Cors::default() .allowed_origin_fn(|origin, _req_head| { // Allow all origins in development, or check against CORS_ALLOWED_ORIGINS env var @@ -1595,6 +1600,7 @@ fn main() -> std::io::Result<()> { .service(libraries::list_libraries) .add_feature(add_tag_services::<_, SqliteTagDao>) .add_feature(knowledge::add_knowledge_services::<_, SqliteKnowledgeDao>) + .add_feature(faces::add_face_services::<_, faces::SqliteFaceDao>) .app_data(app_data.clone()) .app_data::>(Data::new(RealFileSystem::new( app_data.base_path.clone(), @@ -1616,6 +1622,10 @@ fn main() -> std::io::Result<()> { .app_data::>>(Data::new(Mutex::new( SqliteKnowledgeDao::new(), ))) + .app_data::>>(Data::new(Mutex::new(face_dao))) + .app_data::>(Data::new( + app_data.face_client.clone(), + )) .app_data(mp::form::MultipartFormConfig::default().total_limit(1024 * 1024 * 1024)) // 1GB upload limit .app_data(web::JsonConfig::default().error_handler(|err, req| { let detail = err.to_string(); @@ -1780,6 +1790,8 @@ fn watch_files( libs: Vec, playlist_manager: Addr, preview_generator: Addr, + face_client: crate::ai::face_client::FaceClient, + excluded_dirs: Vec, ) { std::thread::spawn(move || { // Get polling intervals from environment variables @@ -1798,6 +1810,18 @@ fn watch_files( info!("Starting optimized file watcher"); info!(" Quick scan interval: {} seconds", quick_interval_secs); info!(" Full scan interval: {} seconds", full_interval_secs); + // Surface face-detection state at boot so it's obvious whether + // the watcher will hit Apollo. The branch silently no-ops when + // disabled (intentional for legacy deploys), which makes "why + // aren't faces being detected?" hard to diagnose otherwise. + if face_client.is_enabled() { + info!(" Face detection: ENABLED"); + } else { + info!( + " Face detection: DISABLED (set APOLLO_FACE_API_BASE_URL \ + or APOLLO_API_BASE_URL to enable)" + ); + } for lib in &libs { info!( " Watching library '{}' (id={}) at {}", @@ -1812,6 +1836,15 @@ fn watch_files( let preview_dao = Arc::new(Mutex::new( Box::new(SqlitePreviewDao::new()) as Box )); + let face_dao = Arc::new(Mutex::new( + Box::new(faces::SqliteFaceDao::new()) as Box + )); + // tag_dao for the watcher's auto-bind path. Independent of the + // request-handler tag_dao instance — both end up pointing at the + // same SQLite file via SqliteTagDao::default(). + let watcher_tag_dao = Arc::new(Mutex::new( + Box::new(SqliteTagDao::default()) as Box + )); let mut last_quick_scan = SystemTime::now(); let mut last_full_scan = SystemTime::now(); @@ -1828,6 +1861,26 @@ fn watch_files( let is_full_scan = since_last_full.as_secs() >= full_interval_secs; for lib in &libs { + // Drain the unhashed-hash backlog AND the face-detection + // backlog every tick, regardless of quick/full. Quick + // scans only walk recently-modified files, so the + // pre-Phase-3 backlog never enters their candidate set + // — without these standalone passes, backfill + + // detection only progressed during full scans + // (default once an hour). + if face_client.is_enabled() { + let context = opentelemetry::Context::new(); + backfill_unhashed_backlog(&context, lib, &exif_dao); + process_face_backlog( + &context, + lib, + &face_client, + &face_dao, + &watcher_tag_dao, + &excluded_dirs, + ); + } + if is_full_scan { info!( "Running full scan for library '{}' (scan #{})", @@ -1837,6 +1890,10 @@ fn watch_files( lib, Arc::clone(&exif_dao), Arc::clone(&preview_dao), + Arc::clone(&face_dao), + Arc::clone(&watcher_tag_dao), + face_client.clone(), + &excluded_dirs, None, playlist_manager.clone(), preview_generator.clone(), @@ -1854,6 +1911,10 @@ fn watch_files( lib, Arc::clone(&exif_dao), Arc::clone(&preview_dao), + Arc::clone(&face_dao), + Arc::clone(&watcher_tag_dao), + face_client.clone(), + &excluded_dirs, Some(check_since), playlist_manager.clone(), preview_generator.clone(), @@ -1900,6 +1961,10 @@ fn process_new_files( library: &libraries::Library, exif_dao: Arc>>, preview_dao: Arc>>, + face_dao: Arc>>, + tag_dao: Arc>>, + face_client: crate::ai::face_client::FaceClient, + excluded_dirs: &[String], modified_since: Option, playlist_manager: Addr, preview_generator: Addr, @@ -2075,6 +2140,43 @@ fn process_new_files( } } + // ── Face detection pass ──────────────────────────────────────────── + // Run after EXIF writes so newly-registered files have their + // content_hash populated. Skipped wholesale when face_client is + // disabled (no Apollo integration configured) — Phase 3 wires this + // up; the watcher remains usable on legacy deploys. + if face_client.is_enabled() { + // Opportunistic content_hash backfill: photos indexed before + // content-hashing landed (or where the hash compute failed + // silently on insert) end up in image_exif with NULL + // content_hash. build_face_candidates keys on content_hash, so + // those files would never become candidates without backfill. + // Idempotent — subsequent scans see the populated hashes and + // no-op. The dedicated `backfill_hashes` binary is still the + // right tool for very large legacy libraries; this branch + // ensures small/medium deploys self-heal without operator + // action. + backfill_missing_content_hashes(&context, &files, library, &exif_dao); + let candidates = build_face_candidates(&context, &files, &exif_dao, &face_dao); + debug!( + "face_watch: scan tick — {} image file(s) walked, {} candidate(s) (library '{}', modified_since={})", + files.iter().filter(|(p, _)| !is_video_file(p)).count(), + candidates.len(), + library.name, + modified_since.is_some(), + ); + if !candidates.is_empty() { + face_watch::run_face_detection_pass( + library, + excluded_dirs, + &face_client, + Arc::clone(&face_dao), + Arc::clone(&tag_dao), + candidates, + ); + } + } + // Check for videos that need HLS playlists let video_path_base = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set"); let mut videos_needing_playlists = Vec::new(); @@ -2199,6 +2301,312 @@ fn process_new_files( } } +/// Compute and persist content_hash for image_exif rows where it's NULL. +/// +/// Bounded per call by `FACE_HASH_BACKFILL_MAX_PER_TICK` (default 500) so +/// a watcher tick on a large legacy library doesn't block for hours +/// blake3-ing every photo at once. Subsequent scans pick up the rest. +/// For 50k+ libraries the dedicated `cargo run --bin backfill_hashes` +/// is still faster (it doesn't fight a watcher loop for the DAO mutex). +/// Drain unhashed image_exif rows by querying them directly, independent +/// of the filesystem walk. Quick scans only walk recently-modified +/// files, so a backlog of pre-existing unhashed rows never enters +/// `process_new_files`'s candidate set — left alone, it would only +/// drain on full scans (default once an hour). Calling this every tick +/// keeps the face-detection backlog moving regardless. +/// +/// Returns the number of rows successfully backfilled this pass. +fn backfill_unhashed_backlog( + context: &opentelemetry::Context, + library: &libraries::Library, + exif_dao: &Arc>>, +) -> usize { + let cap: i64 = dotenv::var("FACE_HASH_BACKFILL_MAX_PER_TICK") + .ok() + .and_then(|s| s.parse().ok()) + .filter(|n: &i64| *n > 0) + .unwrap_or(2000); + + // Fetch up to cap+1 rows so we can tell "more remain" without a + // separate count query. Across libraries — there's no per-library + // filter on get_rows_missing_hash today — but we only ever update + // rows whose library_id matches the caller's library, so other + // libraries' rows just get skipped here and picked up on the next + // library's tick. Negligible cost given the cap. + let rows: Vec<(i32, String)> = { + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + dao.get_rows_missing_hash(context, cap + 1).unwrap_or_default() + }; + if rows.is_empty() { + return 0; + } + + let more_than_cap = rows.len() as i64 > cap; + let base_path = std::path::Path::new(&library.root_path); + + let mut backfilled = 0usize; + let mut errors = 0usize; + let mut skipped_other_lib = 0usize; + for (lib_id, rel_path) in rows.iter().take(cap as usize) { + if *lib_id != library.id { + skipped_other_lib += 1; + continue; + } + let abs = base_path.join(rel_path); + if !abs.exists() { + // File walked away — the watcher's reconciliation pass will + // remove the orphan exif row eventually. + continue; + } + match content_hash::compute(&abs) { + Ok(id) => { + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + if let Err(e) = + dao.backfill_content_hash(context, library.id, rel_path, &id.content_hash, id.size_bytes) + { + warn!( + "face_watch: backfill_content_hash failed for {}: {:?}", + rel_path, e + ); + errors += 1; + } else { + backfilled += 1; + } + } + Err(e) => { + debug!("face_watch: hash compute failed for {} ({:?})", abs.display(), e); + errors += 1; + } + } + } + + if backfilled > 0 || errors > 0 || more_than_cap { + info!( + "face_watch: backfill pass for library '{}': hashed {} ({} error(s), {} skipped to other libraries; {} cap, more_remain={})", + library.name, backfilled, errors, skipped_other_lib, cap, more_than_cap + ); + } + backfilled +} + +/// Per-tick face-detection drain. Pulls a capped batch of hashed-but- +/// unscanned image_exif rows directly via the FaceDao anti-join and +/// hands them to the existing detection pass. Runs on every tick (not +/// just full scans) so the backlog moves at quick-scan cadence. +fn process_face_backlog( + context: &opentelemetry::Context, + library: &libraries::Library, + face_client: &crate::ai::face_client::FaceClient, + face_dao: &Arc>>, + tag_dao: &Arc>>, + excluded_dirs: &[String], +) { + let cap: i64 = dotenv::var("FACE_BACKLOG_MAX_PER_TICK") + .ok() + .and_then(|s| s.parse().ok()) + .filter(|n: &i64| *n > 0) + .unwrap_or(64); + + let rows: Vec<(String, String)> = { + let mut dao = face_dao.lock().expect("face dao"); + match dao.list_unscanned_candidates(context, library.id, cap) { + Ok(r) => r, + Err(e) => { + warn!( + "face_watch: list_unscanned_candidates failed for library '{}': {:?}", + library.name, e + ); + return; + } + } + }; + if rows.is_empty() { + return; + } + + info!( + "face_watch: backlog drain — running detection on {} candidate(s) for library '{}' (cap={})", + rows.len(), + library.name, + cap + ); + + let candidates: Vec = rows + .into_iter() + .map(|(rel_path, content_hash)| face_watch::FaceCandidate { + rel_path, + content_hash, + }) + .collect(); + + face_watch::run_face_detection_pass( + library, + excluded_dirs, + face_client, + Arc::clone(face_dao), + Arc::clone(tag_dao), + candidates, + ); +} + +fn backfill_missing_content_hashes( + context: &opentelemetry::Context, + files: &[(PathBuf, String)], + library: &libraries::Library, + exif_dao: &Arc>>, +) { + let image_paths: Vec = files + .iter() + .filter(|(p, _)| !is_video_file(p)) + .map(|(_, rel)| rel.clone()) + .collect(); + if image_paths.is_empty() { + return; + } + + let exif_records = { + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + dao.get_exif_batch(context, &image_paths) + .unwrap_or_default() + }; + // Cheap lookup back from rel_path → absolute file_path so + // content_hash::compute can read the bytes. + let path_by_rel: HashMap = + files.iter().map(|(p, rel)| (rel.clone(), p)).collect(); + + let cap: usize = dotenv::var("FACE_HASH_BACKFILL_MAX_PER_TICK") + .ok() + .and_then(|s| s.parse().ok()) + .filter(|n: &usize| *n > 0) + .unwrap_or(2000); + + // Count the unhashed backlog up front so we can surface "still needs + // backfill: N" in the log — without it, a face-scan that's stuck at + // 44% looks stalled when really it's chipping through hashes. + let unhashed_total = exif_records + .iter() + .filter(|r| r.content_hash.is_none()) + .count(); + + let mut backfilled = 0usize; + let mut errors = 0usize; + for record in &exif_records { + // Cap on successes only — earlier this counted errors too, so a + // pocket of chronically-unhashable files at the front of the + // table (vanished mid-scan, permission denied, etc.) burned the + // budget every tick and the rest of the backlog never advanced. + // Errors are still bounded by `unhashed_total` (the loop walks + // each unhashed record at most once per tick). + if backfilled >= cap { + break; + } + if record.content_hash.is_some() { + continue; + } + let Some(file_path) = path_by_rel.get(&record.file_path) else { + // Walked file went missing between the directory scan and now; + // next tick will retry naturally. + continue; + }; + match content_hash::compute(file_path) { + Ok(id) => { + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + if let Err(e) = dao.backfill_content_hash( + context, + library.id, + &record.file_path, + &id.content_hash, + id.size_bytes, + ) { + warn!( + "face_watch: backfill_content_hash failed for {}: {:?}", + record.file_path, e + ); + errors += 1; + } else { + backfilled += 1; + } + } + Err(e) => { + debug!( + "face_watch: hash compute failed for {} ({:?})", + file_path.display(), + e + ); + errors += 1; + } + } + } + // Always log when there's an unhashed backlog so an operator + // looking at "scan stuck at 44%" can see backfill is running and + // how much remains. Quiet only when there's nothing to do. + if unhashed_total > 0 || backfilled > 0 || errors > 0 { + let remaining = unhashed_total.saturating_sub(backfilled); + info!( + "face_watch: backfilled {}/{} content_hash for library '{}' ({} error(s); {} still need backfill; cap={})", + backfilled, unhashed_total, library.name, errors, remaining, cap + ); + } +} + +/// Build the face-detection candidate list for a scan tick. +/// +/// We need `(rel_path, content_hash)` for every image file that has a +/// content_hash recorded in image_exif but no row in face_detections yet. +/// Re-querying image_exif here picks up rows the EXIF write loop just +/// inserted alongside any pre-existing rows the watcher walked over — +/// covers both new uploads and the initial backlog scan. +fn build_face_candidates( + context: &opentelemetry::Context, + files: &[(PathBuf, String)], + exif_dao: &Arc>>, + face_dao: &Arc>>, +) -> Vec { + // Restrict to image files; videos aren't face-scanned in v1 (kamadak + // doesn't even register them in image_exif). + let image_paths: Vec = files + .iter() + .filter(|(p, _)| !is_video_file(p)) + .map(|(_, rel)| rel.clone()) + .collect(); + if image_paths.is_empty() { + return Vec::new(); + } + + let exif_records = { + let mut dao = exif_dao.lock().expect("Unable to lock ExifDao"); + dao.get_exif_batch(context, &image_paths) + .unwrap_or_default() + }; + // rel_path → content_hash (only rows with a hash; without one we have + // nothing to key face data against). + let mut hash_by_path: HashMap = HashMap::with_capacity(exif_records.len()); + for record in exif_records { + if let Some(h) = record.content_hash { + hash_by_path.insert(record.file_path, h); + } + } + + let mut candidates = Vec::new(); + let mut dao = face_dao.lock().expect("face dao"); + for rel_path in image_paths { + let Some(hash) = hash_by_path.get(&rel_path) else { + continue; + }; + match dao.already_scanned(context, hash) { + Ok(true) => continue, + Ok(false) => candidates.push(face_watch::FaceCandidate { + rel_path, + content_hash: hash.clone(), + }), + Err(e) => { + warn!("face_watch: already_scanned errored for {}: {:?}", hash, e); + } + } + } + candidates +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/memories.rs b/src/memories.rs index 0e2aad5..95de714 100644 --- a/src/memories.rs +++ b/src/memories.rs @@ -23,7 +23,8 @@ use crate::utils::earliest_fs_time; // Helper that encapsulates path-exclusion semantics #[derive(Debug)] -struct PathExcluder { +pub struct PathExcluder { + base: PathBuf, excluded_dirs: Vec, excluded_patterns: Vec, } @@ -34,9 +35,12 @@ impl PathExcluder { /// Rules: /// - Entries starting with '/' are interpreted as "absolute under base" /// (e.g. "/photos/private" -> base/photos/private). - /// - Entries without '/' are treated as substring patterns that match - /// anywhere in the full path string (still scoped under base). - fn new(base: &Path, raw_excluded: &[String]) -> Self { + /// - Entries without '/' are treated as path-component patterns that + /// match a directory or file name *under* `base`. The base prefix is + /// stripped before matching so a system-level component (e.g. the + /// `tmp` in `/tmp/...` when running tests) doesn't masquerade as a + /// user-defined exclude. + pub fn new(base: &Path, raw_excluded: &[String]) -> Self { let mut excluded_dirs = Vec::new(); let mut excluded_patterns = Vec::new(); @@ -53,18 +57,19 @@ impl PathExcluder { } debug!( - "PathExcluder created. dirs={:?}, patterns={:?}", - excluded_dirs, excluded_patterns + "PathExcluder created. base={:?}, dirs={:?}, patterns={:?}", + base, excluded_dirs, excluded_patterns ); Self { + base: base.to_path_buf(), excluded_dirs, excluded_patterns, } } /// Returns true if `path` should be excluded. - fn is_excluded(&self, path: &Path) -> bool { + pub fn is_excluded(&self, path: &Path) -> bool { // Directory-based exclusions for excluded in &self.excluded_dirs { if path.starts_with(excluded) { @@ -76,19 +81,24 @@ impl PathExcluder { } } - // Pattern-based exclusions: match whole path components (dir or file name), - // not substrings. - if !self.excluded_patterns.is_empty() { - for component in path.components() { - if let Some(comp_str) = component.as_os_str().to_str() - && self.excluded_patterns.iter().any(|pat| pat == comp_str) - { - trace!( - "PathExcluder: excluded by component pattern: {:?} (component: {:?}, patterns: {:?})", - path, comp_str, self.excluded_patterns - ); - return true; - } + if self.excluded_patterns.is_empty() { + return false; + } + + // Strip the base prefix before scanning components. Without this, + // every path component above `base` (e.g. `tmp` in `/tmp/test123` + // under tempdir, or the user's `home` in `/home/user/Pictures`) + // would match user-defined patterns and produce false positives. + let scan_root = path.strip_prefix(&self.base).unwrap_or(path); + for component in scan_root.components() { + if let Some(comp_str) = component.as_os_str().to_str() + && self.excluded_patterns.iter().any(|pat| pat == comp_str) + { + trace!( + "PathExcluder: excluded by component pattern: {:?} (component: {:?}, patterns: {:?})", + path, comp_str, self.excluded_patterns + ); + return true; } } diff --git a/src/state.rs b/src/state.rs index 5682d43..18eab29 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,4 +1,5 @@ use crate::ai::apollo_client::ApolloClient; +use crate::ai::face_client::FaceClient; use crate::ai::insight_chat::{ChatLockMap, InsightChatService}; use crate::ai::openrouter::OpenRouterClient; use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient}; @@ -48,6 +49,11 @@ pub struct AppState { pub insight_generator: InsightGenerator, /// Chat continuation service. Hold an Arc so handlers can clone cheaply. pub insight_chat: Arc, + /// Face inference client (calls Apollo's `/api/internal/faces/*`). + /// Disabled (`is_enabled() == false`) when neither `APOLLO_FACE_API_BASE_URL` + /// nor `APOLLO_API_BASE_URL` is set; the file-watch hook (Phase 3) and + /// manual-face-create handler short-circuit in that case. + pub face_client: FaceClient, } impl AppState { @@ -82,6 +88,7 @@ impl AppState { insight_generator: InsightGenerator, insight_chat: Arc, preview_dao: Arc>>, + face_client: FaceClient, ) -> Self { assert!( !libraries_vec.is_empty(), @@ -115,6 +122,7 @@ impl AppState { sms_client, insight_generator, insight_chat, + face_client, } } @@ -161,6 +169,15 @@ impl Default for AppState { // generator silently falls through to the legacy Nominatim path. let apollo_client = ApolloClient::new(env::var("APOLLO_API_BASE_URL").ok()); + // Face inference client. Falls back to APOLLO_API_BASE_URL when + // APOLLO_FACE_API_BASE_URL is unset (single-Apollo deploys are the + // common case). Both unset = feature disabled, file-watch hook + // and manual-face handlers short-circuit silently. + let face_client_url = env::var("APOLLO_FACE_API_BASE_URL") + .ok() + .or_else(|| env::var("APOLLO_API_BASE_URL").ok()); + let face_client = FaceClient::new(face_client_url); + // Initialize DAOs let insight_dao: Arc>> = Arc::new(Mutex::new(Box::new(SqliteInsightDao::new()))); @@ -244,6 +261,7 @@ impl Default for AppState { insight_generator, insight_chat, preview_dao, + face_client, ) } } @@ -382,6 +400,7 @@ impl AppState { insight_generator, insight_chat, preview_dao, + FaceClient::new(None), // disabled in test ) } } diff --git a/src/tags.rs b/src/tags.rs index b94cb3b..bdd5abd 100644 --- a/src/tags.rs +++ b/src/tags.rs @@ -32,6 +32,7 @@ where ) .service(web::resource("image/tags/all").route(web::get().to(get_all_tags::))) .service(web::resource("image/tags/batch").route(web::post().to(update_tags::))) + .service(web::resource("image/tags/lookup").route(web::post().to(lookup_tags_batch::))) } async fn add_tag( @@ -238,6 +239,149 @@ async fn update_tags( .into_http_internal_err() } +#[derive(Deserialize, Debug)] +pub struct LookupTagsBatchRequest { + pub paths: Vec, +} + +/// Bulk per-path tag lookup with cross-library content-hash sibling +/// expansion. Apollo's photo-match flow used to fan out one +/// ``GET /image/tags?path=`` per record (~4k for a wide window) — +/// each call locked the dao briefly and the round-trip cost dwarfed +/// the actual SQL. This collapses the whole fan-out into: +/// +/// 1. one ``image_exif`` batch lookup → query path → content_hash +/// 2. one ``image_exif`` JOIN by content_hash → all sibling rel_paths +/// (so a tag applied under library A surfaces under library B +/// when the content hashes match — important once a backup mount +/// holds copies of files from the primary library) +/// 3. one ``tagged_photo`` JOIN over the union of (query + sibling) +/// rel_paths +/// +/// Body: ``{paths: [...]}``; response: ``{path: [{id, name, ...}]}`` +/// with only paths that have at least one tag (caller treats absence +/// as empty). Each chunk is capped to stay under SQLite's variable +/// limit; five queries per 4k photos is still ~800x cheaper than +/// per-path HTTP fan-out. +async fn lookup_tags_batch( + _: Claims, + http_request: HttpRequest, + body: web::Json, + tag_dao: web::Data>, + exif_dao: web::Data>>, +) -> impl Responder { + use std::collections::{HashMap, HashSet}; + let context = extract_context_from_request(&http_request); + let span = global_tracer().start_with_context("lookup_tags_batch", &context); + let span_context = opentelemetry::Context::current_with_span(span); + + if body.paths.is_empty() { + return HttpResponse::Ok().json(HashMap::>::new()); + } + + let query_paths: Vec = body.paths.iter().map(|p| normalize_path(p)).collect(); + + // Stage 1: query → content_hash mapping. Files without a hash yet + // (just-indexed, hash compute failed, etc.) skip the sibling + // expansion and only get tags from their own rel_path. + let exif_records = { + let mut dao = exif_dao.lock().expect("Unable to get ExifDao"); + match dao.get_exif_batch(&span_context, &query_paths) { + Ok(rows) => rows, + Err(e) => { + return HttpResponse::InternalServerError() + .body(format!("exif batch lookup failed: {:?}", e)); + } + } + }; + let mut hash_by_path: HashMap = HashMap::with_capacity(exif_records.len()); + for record in exif_records { + if let Some(h) = record.content_hash { + hash_by_path.insert(record.file_path, h); + } + } + let unique_hashes: Vec = hash_by_path + .values() + .cloned() + .collect::>() + .into_iter() + .collect(); + + // Stage 2: hash → all sibling rel_paths. + let paths_by_hash = if unique_hashes.is_empty() { + HashMap::new() + } else { + let mut dao = exif_dao.lock().expect("Unable to get ExifDao"); + match dao.get_rel_paths_for_hashes(&span_context, &unique_hashes) { + Ok(map) => map, + Err(e) => { + return HttpResponse::InternalServerError() + .body(format!("hash sibling lookup failed: {:?}", e)); + } + } + }; + + // Stage 3: build expanded path set and the reverse map + // sibling → [original query paths whose tag bucket should include + // the sibling's tags]. A query path always attributes to itself + // (covers the no-content-hash case). + let mut originals_by_sibling: HashMap> = HashMap::new(); + let mut all_paths: HashSet = HashSet::new(); + for query_path in &query_paths { + all_paths.insert(query_path.clone()); + originals_by_sibling + .entry(query_path.clone()) + .or_default() + .push(query_path.clone()); + if let Some(hash) = hash_by_path.get(query_path) + && let Some(siblings) = paths_by_hash.get(hash) + { + for sibling in siblings { + if sibling == query_path { + continue; + } + all_paths.insert(sibling.clone()); + originals_by_sibling + .entry(sibling.clone()) + .or_default() + .push(query_path.clone()); + } + } + } + + // Stage 4: tags grouped by rel_path for the union. + let all_paths_vec: Vec = all_paths.into_iter().collect(); + let tags_by_sibling = { + let mut dao = tag_dao.lock().expect("Unable to get TagDao"); + match dao.get_tags_grouped_by_paths(&span_context, &all_paths_vec) { + Ok(map) => map, + Err(e) => { + return HttpResponse::InternalServerError().body(format!("{}", e)); + } + } + }; + + // Stage 5: aggregate sibling tags back to original query paths, + // de-duped by tag id. Empty buckets stay out of the response so + // the caller's "missing key = []" contract holds. + let mut result: HashMap> = HashMap::new(); + for (sibling_path, originals) in originals_by_sibling { + if let Some(tags) = tags_by_sibling.get(&sibling_path) { + for orig in originals { + let entry = result.entry(orig).or_default(); + for t in tags { + if !entry.iter().any(|e| e.id == t.id) { + entry.push(t.clone()); + } + } + } + } + } + + span_context.span().set_status(Status::Ok); + HttpResponse::Ok().json(result) +} + #[derive(Serialize, Queryable, Clone, Debug, PartialEq)] pub struct Tag { pub id: i32, @@ -317,6 +461,14 @@ pub trait TagDao: Send + Sync { context: &opentelemetry::Context, paths: &[String], ) -> anyhow::Result>; + /// Per-path grouped lookup: ``rel_path → [tags]``. Used by the + /// ``/image/tags/lookup`` batch endpoint. Returns only paths that + /// have at least one tag; the caller treats absence as empty. + fn get_tags_grouped_by_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result>>; fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result; fn remove_tag( &mut self, @@ -470,6 +622,51 @@ impl TagDao for SqliteTagDao { }) } + fn get_tags_grouped_by_paths( + &mut self, + context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result>> { + use std::collections::HashMap; + let mut out: HashMap> = HashMap::new(); + if paths.is_empty() { + return Ok(out); + } + let mut conn = self + .connection + .lock() + .expect("Unable to lock SqliteTagDao connection"); + trace_db_call(context, "query", "get_tags_grouped_by_paths", |span| { + span.set_attribute(KeyValue::new("path_count", paths.len() as i64)); + // SQLite's default SQLITE_LIMIT_VARIABLE_NUMBER is 32766 in + // modern builds (999 in old ones). Chunk at 500 to stay + // safely under both — five queries for a 4k-photo grid is + // still ~800x cheaper than 4k single-row HTTP calls. + const CHUNK: usize = 500; + for chunk in paths.chunks(CHUNK) { + let rows: Vec<(String, i32, String, i64)> = tagged_photo::table + .inner_join(tags::table) + .filter(tagged_photo::rel_path.eq_any(chunk)) + .select(( + tagged_photo::rel_path, + tags::id, + tags::name, + tags::created_time, + )) + .get_results(conn.deref_mut()) + .with_context(|| "Unable to get tags grouped from Sqlite")?; + for (rel_path, id, name, created_time) in rows { + out.entry(rel_path).or_default().push(Tag { + id, + name, + created_time, + }); + } + } + Ok(out) + }) + } + fn create_tag(&mut self, context: &opentelemetry::Context, name: &str) -> anyhow::Result { let mut conn = self .connection @@ -893,6 +1090,23 @@ mod tests { Ok(out) } + fn get_tags_grouped_by_paths( + &mut self, + _context: &opentelemetry::Context, + paths: &[String], + ) -> anyhow::Result>> { + let tagged = self.tagged_photos.borrow(); + let mut out = std::collections::HashMap::new(); + for p in paths { + if let Some(tags) = tagged.get(p) + && !tags.is_empty() + { + out.insert(p.clone(), tags.clone()); + } + } + Ok(out) + } + fn create_tag( &mut self, _context: &opentelemetry::Context, @@ -1026,6 +1240,42 @@ mod tests { } } + #[actix_rt::test] + async fn get_tags_grouped_by_paths_returns_per_path_buckets() { + // Backstop for the batch tag-lookup endpoint: confirms the + // grouped variant returns one bucket per path with at least + // one tag, and omits paths with no tags entirely (the caller + // treats absence as []). The handler stacks sibling expansion + // on top via image_exif content_hash; the DAO method itself + // just needs to honour rel_path → tags directly. + let mut dao = TestTagDao::new(); + let ctx = opentelemetry::Context::current(); + // Seed: two paths tagged, one path untagged. + dao.tagged_photos.borrow_mut().insert( + "a.jpg".into(), + vec![Tag { id: 1, name: "alpha".into(), created_time: 0 }], + ); + dao.tagged_photos.borrow_mut().insert( + "b.jpg".into(), + vec![ + Tag { id: 2, name: "beta".into(), created_time: 0 }, + Tag { id: 3, name: "gamma".into(), created_time: 0 }, + ], + ); + let grouped = dao + .get_tags_grouped_by_paths( + &ctx, + &["a.jpg".into(), "b.jpg".into(), "c.jpg".into()], + ) + .unwrap(); + assert_eq!(grouped.get("a.jpg").map(|v| v.len()), Some(1)); + assert_eq!(grouped.get("b.jpg").map(|v| v.len()), Some(2)); + assert!( + !grouped.contains_key("c.jpg"), + "untagged paths must be absent so caller's missing-key=[] contract holds" + ); + } + #[actix_rt::test] async fn add_new_tag_test() { let tag_dao = TestTagDao::new();