auto-tag: Apollo tag client + probe binary

Adds ai::tag_client mirroring face_client for Apollo's RAM++ endpoint (APOLLO_TAG_API_BASE_URL falling back to APOLLO_API_BASE_URL), and a throwaway probe_auto_tags binary that walks image_exif and prints tags without writing the DB. Lets us eyeball RAM++ output quality + threshold before committing to a schema and per-tick drain. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Merge pull request 'feature/library-patch-endpoint' (#94 ) from feature/library-patch-endpoint into master
2026-05-13 20:01:55 -04:00 · 2026-05-13 13:44:36 +00:00 · 2026-05-13 09:23:51 -04:00 · 2026-05-13 09:02:29 -04:00 · 2026-05-13 08:58:04 -04:00 · 2026-05-13 08:47:35 -04:00
122 changed files with 34233 additions and 4837 deletions
@@ -0,0 +1,3 @@
+[target.x86_64-unknown-linux-gnu]
+linker = "/usr/bin/gcc"
+rustflags = ["-C", "link-arg=-fuse-ld=mold"]
@@ -0,0 +1,85 @@
+# ImageApi configuration template. Copy to `.env` and fill in for your
+# deploy. Comments mirror the canonical docs in CLAUDE.md — see there
+# for the full picture (especially the AI-Insights / Apollo / face
+# integration sections).
+
+# ── Required ────────────────────────────────────────────────────────────
+DATABASE_URL=./database.db
+BASE_PATH=/path/to/media
+THUMBNAILS=/path/to/thumbnails
+VIDEO_PATH=/path/to/video/hls
+GIFS_DIRECTORY=/path/to/gifs
+PREVIEW_CLIPS_DIRECTORY=/path/to/preview-clips
+BIND_URL=0.0.0.0:8080
+CORS_ALLOWED_ORIGINS=http://localhost:3000
+SECRET_KEY=replace-me-with-a-long-random-secret
+RUST_LOG=info
+
+# ── File watching ───────────────────────────────────────────────────────
+# Quick scan = recently-modified-files only; full scan = comprehensive walk.
+WATCH_QUICK_INTERVAL_SECONDS=60
+WATCH_FULL_INTERVAL_SECONDS=3600
+# Comma-separated path prefixes / component names to skip in /memories
+# AND in face detection (e.g. @eaDir, .thumbnails, /private).
+EXCLUDED_DIRS=
+
+# ── Video / HLS ─────────────────────────────────────────────────────────
+HLS_CONCURRENCY=2
+HLS_TIMEOUT_SECONDS=900
+PLAYLIST_CLEANUP_INTERVAL_SECONDS=86400
+
+# ── Telemetry (release builds only) ─────────────────────────────────────
+# OTLP_OTLS_ENDPOINT=http://localhost:4317
+
+# ── AI Insights — Ollama (local LLM) ────────────────────────────────────
+OLLAMA_PRIMARY_URL=http://localhost:11434
+OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b
+# Optional fallback server tried on connection failure.
+# OLLAMA_FALLBACK_URL=http://server:11434
+# OLLAMA_FALLBACK_MODEL=llama3.2:3b
+OLLAMA_REQUEST_TIMEOUT_SECONDS=120
+# Cap on tool-calling iterations per chat turn / agentic insight.
+AGENTIC_MAX_ITERATIONS=6
+AGENTIC_CHAT_MAX_ITERATIONS=6
+
+# ── AI Insights — OpenRouter (hybrid backend, optional) ─────────────────
+# Set OPENROUTER_API_KEY to enable the hybrid backend (vision stays
+# local on Ollama, chat routes to OpenRouter).
+# OPENROUTER_API_KEY=sk-or-...
+# OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4
+# OPENROUTER_ALLOWED_MODELS=openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash
+# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
+# OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small
+# OPENROUTER_HTTP_REFERER=https://your-site.example
+# OPENROUTER_APP_TITLE=ImageApi
+
+# ── AI Insights — sibling services (optional) ───────────────────────────
+# Apollo (places + face inference). Single Apollo deploys typically set
+# only APOLLO_API_BASE_URL and let the face client fall back to it.
+# APOLLO_API_BASE_URL=http://apollo.lan:8000
+# APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000
+# SMS_API_URL=http://localhost:8000
+# SMS_API_TOKEN=
+
+# Display name used in agentic prompts when the LLM refers to "you".
+USER_NAME=
+
+# ── Face detection (Phase 3+) ───────────────────────────────────────────
+# Cosine-sim floor for auto-binding a detected face to an existing
+# same-named person on detection. 0.4 ≈ moderate-confidence match.
+FACE_AUTOBIND_MIN_COS=0.4
+# Per-scan-tick fan-out into Apollo's detect endpoint. Apollo's GPU
+# pool serializes server-side; this just overlaps file-IO with
+# inference RTT.
+FACE_DETECT_CONCURRENCY=8
+# Per-detect HTTP timeout. CPU-only Apollo deploys may need higher.
+FACE_DETECT_TIMEOUT_SEC=60
+# Per-tick caps on the two backlog drains (independent of WATCH_*
+# quick / full scans). Tune up if you have a large unscanned backlog
+# and want it to clear faster; tune down if Apollo is overloaded.
+FACE_BACKLOG_MAX_PER_TICK=64
+FACE_HASH_BACKFILL_MAX_PER_TICK=2000
+
+# ── RAG / search ────────────────────────────────────────────────────────
+# Set to `1` to enable cross-encoder reranking on /search results.
+SEARCH_RAG_RERANK=0
@@ -1,12 +1,19 @@
 /target
 database/target
 *.db
+*.db.bak
+*.db-shm
+*.db-wal
 .env
 /tmp
+/docs
+/specs

 # Default ignored files
 .idea/shelf/
 .idea/workspace.xml
+.idea/inspectionProfiles/
+.idea/markdown.xml
 # Datasource local storage ignored files
 .idea/dataSources*
 .idea/dataSources.local.xml
@@ -69,9 +69,6 @@ cargo fix
 ```bash
 # Two-phase cleanup: resolve missing files and validate file types
 cargo run --bin cleanup_files -- --base-path /path/to/media --database-url ./database.db
-
-# Batch extract EXIF for existing files
-cargo run --bin migrate_exif
 ```

 ## Architecture Overview
@@ -79,7 +76,10 @@ cargo run --bin migrate_exif
 ### Core Components

 **Layered Architecture:**
- **HTTP Layer** (`main.rs`): Route handlers for images, videos, metadata, tags, favorites, memories
+- **Startup wiring** (`main.rs`): only ~350 lines — env load, migrations, AppState, route registration, server bind. Background jobs are kicked off here but defined elsewhere.
+- **HTTP Layer** (`handlers/{image,video,favorites}.rs`, `files.rs`, `tags.rs`, `faces.rs`, `memories.rs`, `ai/handlers.rs`): the route handlers, grouped by domain.
+- **Background loops** (`watcher.rs`): the file-watcher tick (`watch_files`, `process_new_files`) and the orphaned-playlist cleanup (`cleanup_orphaned_playlists`). Per-tick drains are factored into `backfill.rs` (`backfill_unhashed_backlog`, `backfill_missing_date_taken`, `backfill_missing_content_hashes`, `process_face_backlog`, `build_face_candidates`).
+- **Thumbnails** (`thumbnails.rs`): generation pipeline + the `IMAGE_GAUGE` / `VIDEO_GAUGE` Prometheus metrics.
 - **Auth Layer** (`auth.rs`): JWT token validation, Claims extraction via FromRequest trait
 - **Service Layer** (`files.rs`, `exif.rs`, `memories.rs`): Business logic for file operations and EXIF extraction
 - **DAO Layer** (`database/mod.rs`): Trait-based data access (ExifDao, UserDao, FavoriteDao, TagDao)
@@ -107,6 +107,242 @@ All database access goes through trait-based DAOs (e.g., `ExifDao`, `SqliteExifD
 - `query_by_exif()`: Complex filtering by camera, GPS bounds, date ranges
 - Batch operations minimize DB hits during file watching

+### Multi-library data model
+
+ImageApi supports more than one library (a library = a `(name, root_path)`
+row in the `libraries` table that maps to a mounted directory tree). The
+same bytes may exist under more than one library — typical case is an
+"active" library plus an "archive" library that ingests files as they age
+out — and the data model is designed so that derived data follows the
+**bytes**, not the path, while user-managed data does the same.
+
+**The principle.** A photo's identity is its `content_hash` (blake3, see
+`src/content_hash.rs`). Anything we compute from or attach to a photo is
+keyed on that hash so it survives:
+- the same file appearing in a second library (backup / archive / mirror),
+- the file moving between libraries (recent → archive handoff),
+- the file moving within a library (re-organized rel_path),
+- intra-library duplicates (same bytes at two paths).
+
+**Table classification.** Three categories drive the keying decision:
+
+| Category | Key | Rationale | Tables |
+|---|---|---|---|
+| Intrinsic to bytes | `content_hash` | Rerunning is wasted work (or LLM cost) | `face_detections` ✓, `image_exif` (target), `photo_insights` (target), `video_preview_clips` (target) |
+| User intent about a photo | `content_hash` | "Tag this photo" means the bytes, not a path | `tagged_photo` (target), `favorites` (target) |
+| Library administrative | `(library_id, rel_path)` | Tied to a specific filesystem location | `libraries`, `entity_photo_links`, the `rel_path` back-ref columns on hash-keyed tables |
+
+✓ = already implemented this way. *(target)* = today still keyed on
+`(library_id, rel_path)` and slated for migration. The migration adds a
+nullable `content_hash` column, populates it from `image_exif` where
+known, and read paths fall back to rel_path while the hash is null.
+
+**Carrying a `rel_path` even when hash-keyed.** Hash-keyed tables retain
+`(library_id, rel_path)` columns as a denormalized **back-reference**, not
+as the key. This lets a single query answer "what is at this path right
+now" without joining through `image_exif`, and supports the path-only
+endpoints that predate the hash. `face_detections` is the reference
+implementation: hash is the truth, path is a hint.
+
+**Merge semantics on read.** When the same hash has rows under more than
+one library:
+- Set-valued data (tags, favorites, faces, entity links) → **union**.
+- Scalar data (current insight, EXIF row, video preview clip) → earliest
+  `generated_at` / `created_time` wins. The historical lib1 row beats a
+  re-generated lib2 row, so the user's curated insight isn't shadowed by
+  a re-run on archive ingest.
+
+**Write attribution.** A new tag/favorite/insight created while viewing
+under lib2 binds to the bytes, not to lib2 — so it shows up under lib1
+too. This is by design, but it's the most surprising rule on first
+encounter; clients should not assume tags are library-scoped.
+
+**Hash-less rows (transitional state).** During and immediately after a
+new mount, `image_exif.content_hash` is being populated by
+`backfill_unhashed_backlog` (capped per tick). Rules during this window:
+- Writes: if the hash is known, write hash-keyed. If not, write
+  `(library_id, rel_path)`-keyed and let the reconciliation job collapse
+  duplicates once the hash lands.
+- Reads: prefer hash key, fall back to `(library_id, rel_path)`.
+- Reconciliation: a one-shot pass after every backfill tick collapses
+  rows that now share a hash, applying the merge semantics above.
+  Idempotent — safe to re-run.
+
+**Library handoff (recent → archive).** When a file moves between
+libraries (e.g. operator moves `~/photos/2024/IMG.nef` to the archive
+mount), the file watcher sees the disappearance under lib1 and the
+appearance under lib2. Hash-keyed rows don't need migration; the
+`(library_id, rel_path)` back-ref columns are updated to point to the new
+location. Library administrative rows (`entity_photo_links`,
+`(library_id, rel_path)` rows in `image_exif` for hash-less items) are
+re-keyed by the move detector, which matches a disappearance to an
+appearance by `content_hash` within a configurable window.
+
+**Orphans (source deleted while a copy survives).** When the only
+`image_exif` row for a hash is deleted (file removed from disk), the
+hash-keyed derived rows survive **as long as another `image_exif` row
+references the same hash**. If the last reference is gone, derived rows
+are eligible for GC (deferred — the GC job runs on a slow schedule so
+that a brief unmount or rename doesn't wipe history).
+
+**Stats and counts.** When reporting "how many photos do you have," count
+`DISTINCT content_hash` over `image_exif`, not row count. Faces stats
+already does this (`FaceDao::stats` in `src/faces.rs`); other counters
+should follow suit. Numerator and denominator must live in the same
+domain — see the face-stats commentary below for the cautionary tale.
+
+**Per-library scoping when the user asks for it.** A request scoped to
+`?library=N` filters the `image_exif` view to that library, and the
+hash-keyed derived data is joined through that view. The user sees only
+photos that have a copy under lib N, but the derived data attached to
+those photos is the merged hash-keyed view. This is the answer to "show
+me archive photos with their original tags."
+
+**Operator kill switch (`libraries.enabled`).** Setting `enabled=0` on a
+library is a hard pause: the watcher skips it entirely — before the
+probe, before ingest, before any maintenance pass — and the orphan-GC
+all-online consensus check filters disabled libraries out (they don't
+keep the GC window closed). Reads / serving are unaffected; nothing
+prevents `/image?path=...` from resolving against a disabled library's
+root if the file is on disk. The existing `image_exif` rows for a
+disabled library are **not deleted** — they continue to anchor
+hash-keyed derived data, so cross-library duplicates survive the
+disable. Toggle via SQL; there is intentionally no HTTP endpoint for
+library mutation (single-user tool, no role / permission story).
+Typical workflows: stage a new mount with `enabled=0` then flip to `1`;
+quiet a flaky NAS during maintenance without disturbing the rest of
+the system.
+
+**Per-library excludes (`libraries.excluded_dirs`).** A
+comma-separated column, same shape as the global `EXCLUDED_DIRS` env
+var, that's applied **in union** with the env-var globals when a
+walker scans this library. Use case: mount a parent directory as a
+new library while a sibling library covers a child subtree, and
+exclude that child subtree from the parent so the two libraries
+don't double-walk and double-write `image_exif`. Two entry forms
+(parsed by `memories::PathExcluder`):
+- `/sub/path` — leading slash flags it as a path under the library
+  root. Joins to root + matches by `path.starts_with(...)`. Works
+  at any depth (`/photos`, `/media/2024/raw`).
+- `name` — no leading slash flags it as a component name to skip
+  anywhere in the tree (`@eaDir`, `.thumbnails`). Single segment
+  only — `media/photos/a` without a leading slash never matches
+  anything. Hash-keyed derived
+data (faces, tags, insights) is unaffected either way — those
+follow the bytes — but `image_exif` row count, walker CPU, and
+thumbnail disk usage all drop to 1× instead of 2× for the overlap.
+Affects: file-watch ingest (`process_new_files`), thumbnail
+generation, media-count gauges, the orphaned-playlist cleanup walk,
+and the `/memories` endpoint. The face-detection backlog drain
+inherits via `face_watch::filter_excluded`. NULL = no extras (only
+the global env var applies).
+
+**Library availability and safety.** Libraries can be on network shares
+or removable media; the file watcher must not interpret a temporary
+unavailability as a mass-deletion event. Every tick begins with a
+**presence probe** per library: the library is considered online iff
+its `root_path` exists, is readable, and a top-level scan returns at
+least one expected entry (or matches a recent file-count high-water
+mark within a tolerance). The probe result gates which actions are safe
+to run on that library this tick:
+
+| Action | Requires online? |
+|---|---|
+| Quick / full scan ingest of new files | yes |
+| EXIF / face / insight backlog drains | yes — but the work runs against any online library |
+| Move-handoff detection (lib1 disappearance ↔ lib2 appearance match) | **both** libraries online |
+| `(library_id, rel_path)` re-keying on detected move | **both** libraries online |
+| Orphan GC of hash-keyed derived data | all libraries that have *ever* held the hash must be online and confirmed-clean for two consecutive ticks |
+| Reads / serving | always allowed; falls back to whichever library is online |
+
+A library that fails the probe enters a "stale" state: writes scoped to
+it are paused, its rows are flagged stale (not deleted) in
+`/libraries` status, and the watcher logs at `warn` once per
+state-transition (not per tick). A library that recovers re-enters the
+online set automatically; no operator action required for transient
+outages. The intent is that pulling a USB drive, rebooting a NAS, or
+losing a VPN never triggers a destructive code path — the worst case is
+that derived-data work pauses until the share returns.
+
+The same rule constrains the move-handoff matcher: a disappearance
+under lib1 only counts as a "move" if there is a matching appearance
+under another **online** library within the window. A bare
+disappearance with no matching appearance is treated as
+"unavailable-or-deleted, defer judgment" — it does not re-key any rows
+and does not enqueue GC.
+
+**Maintenance pipeline (`src/library_maintenance.rs`).** The watcher
+runs three maintenance passes per tick that together implement the
+move/handoff and orphan rules:
+
+1. **Missing-file scan** — per online library, paginated. A page of
+   `image_exif` rows is loaded (`IMAGE_EXIF_MISSING_SCAN_PAGE_SIZE`,
+   default 500), each row's `(root_path/rel_path)` is `stat()`-ed,
+   and confirmed-not-found rows are deleted from `image_exif`
+   (capped at `IMAGE_EXIF_MISSING_DELETE_CAP_PER_TICK`, default 200).
+   Permission/IO errors are skipped, never deleted — only `NotFound`
+   triggers a deletion. The cursor wraps every time a partial page
+   comes back, so the whole library is swept across consecutive ticks.
+   Skipped wholesale for Stale libraries via the per-library probe
+   gate at the top of the loop iteration.
+
+2. **Back-ref refresh** — DB-only. For `face_detections`,
+   `tagged_photo`, and `photo_insights`: any hash-keyed row whose
+   `(library_id, rel_path)` no longer matches an `image_exif` row
+   *but whose `content_hash` does* is repointed at the surviving
+   `image_exif` location. Idempotent SQL; no health gate needed.
+   This is what makes the recent → archive handoff invisible to
+   read paths: when the missing-file scan retires the lib-A row,
+   tags/faces/insights pivot to lib-B's path before any user
+   notices.
+
+3. **Orphan GC** — destructive. Hash-keyed derived rows whose
+   `content_hash` no longer has any `image_exif` row are eligible.
+   Two-tick consensus: a hash must be observed orphaned on two
+   consecutive ticks AND every library must be online for both. A
+   single Stale tick within the window cancels all pending deletes.
+   The pending set is held in memory (`OrphanGcState`) — restart
+   resets it, which only delays a delete, never causes one. Tags,
+   faces, and insights for orphaned hashes are deleted in one batch
+   per tick.
+
+A backup library that briefly disappears, then returns within two
+ticks, never loses any derived data. A move from lib-A to lib-B
+without disappearance flips through pass 1 (lib-A row retired) and
+pass 2 (back-refs follow), with pass 3 noting nothing because the
+hash is still present in `image_exif` (lib-B's row).
+
+**Known gap: in-place content changes (future Branch D).** The
+maintenance pipeline assumes a `(library_id, rel_path)`'s bytes are
+stable for as long as the file exists at that path. If a user edits
+a file in place (crop, re-export) without renaming, the watcher's
+quick scan walks the file (mtime is recent) but `process_new_files`
+short-circuits because `(library_id, rel_path)` already has an
+`image_exif` row — no re-hash, no re-EXIF, no face redetection. The
+row's `content_hash` keeps pointing at the original bytes. Tags /
+faces / insights stay attached to the original hash and continue to
+display because the rel_path back-ref still resolves; new faces
+introduced by the edit are never detected.
+
+The right place to fix this is a **stale-content detection pass**
+that compares `image_exif.last_modified` / `size_bytes` to
+`fs::metadata` for rows the quick scan would otherwise skip. On
+mismatch, recompute the hash, update `image_exif`, and apply the
+"content branched" semantics:
+- **Faces** re-run (faces are fully derived from bytes).
+- **Tags** migrate to the new hash (user intent — "this photo is
+  vacation" survives a crop). Insights migrate forward as a
+  starting point and are flagged for re-generation.
+- **Favorites** (when migrated to hash-keyed) follow the path /
+  user intent.
+
+The interesting case is the operator who keeps an unedited copy in
+the archive library and edits the local copy: post-detection, the
+archive copy stays on the original hash, the local copy branches to
+the new hash, and the two histories cleanly split. Apollo's
+`derived.db` cache will need an invalidation hook for the changed
+hash — design it alongside Branch D.
+
 ### File Processing Pipeline

 **Thumbnail Generation:**
@@ -114,6 +350,15 @@ All database access goes through trait-based DAOs (e.g., `ExifDao`, `SqliteExifD
 2. Creates 200x200 thumbnails in THUMBNAILS directory (mirrors source structure)
 3. Videos: extracts frame at 3-second mark via ffmpeg
 4. Images: uses `image` crate for JPEG/PNG processing
+5. RAW formats (NEF/CR2/ARW/DNG/etc.): the `image` crate can't decode RAW
+   pixel data, so the pipeline pulls an embedded JPEG preview instead. Fast
+   path is `exif::read_jpeg_at_ifd` against IFD0 (PRIMARY) and IFD1
+   (THUMBNAIL) — covers most older bodies and DNGs. Slow-path fallback shells
+   out to **`exiftool`** for `PreviewImage` / `JpgFromRaw` / `OtherImage`,
+   which reaches MakerNote / SubIFD-hosted previews kamadak-exif can't see
+   (e.g. Nikon's `PreviewIFD`, where modern Nikon bodies store the full-res
+   review JPEG). All candidates are pooled and the largest valid JPEG wins.
+   See `src/exif.rs::extract_embedded_jpeg_preview`.

 **File Watching:**
 Runs in background thread with two-tier strategy:
@@ -122,6 +367,60 @@ Runs in background thread with two-tier strategy:
 - Batch queries EXIF DB to detect new files
 - Configurable via `WATCH_QUICK_INTERVAL_SECONDS` and `WATCH_FULL_INTERVAL_SECONDS`

+**Canonical date_taken pipeline (`src/date_resolver.rs`).** Every row's
+`image_exif.date_taken` is populated at ingest by a four-step waterfall;
+which step won is recorded in `image_exif.date_taken_source` so the
+per-tick drain can re-resolve weak entries when better tools become
+available, and so the UI/debug surface can answer "why did this photo
+land on this date?". Order:
+
+1. **`exif`** — kamadak-exif `DateTime` / `DateTimeOriginal`. Fast,
+   in-process, image-only.
+2. **`exiftool`** — shell-out fallback for tags kamadak can't reach:
+   QuickTime/MP4 (`MediaCreateDate`, `TrackCreateDate`, `CreateDate`),
+   Apple's `ContentCreateDate`, MakerNote sub-IFDs. Required for
+   videos to land a real date. Single-file at ingest; the per-tick
+   drain feeds the whole batch through one `exiftool -@ -` subprocess.
+   Degrades silently when `exiftool` isn't on PATH (resolver caches the
+   "available" check via `OnceLock`).
+3. **`filename`** — `extract_date_from_filename` in `memories.rs`
+   matches screenshot, chat-export, and timestamp-named patterns.
+4. **`fs_time`** — `earliest_fs_time(metadata)` (earlier of created /
+   modified). Last resort.
+
+Notable behavior change vs. the pre-2026-05 request-time logic:
+**EXIF beats filename when both are present.** A photo named
+`Screenshot_2014-06-01.png` whose EXIF `DateTime` is 2021 now appears
+under 2021, not 2014 — on the theory that EXIF is more reliable than
+import-named filenames. The reverse case (no EXIF, filename has a
+date) is unchanged.
+
+The `backfill_missing_date_taken` drain (`src/backfill.rs`) runs every
+watcher tick alongside `backfill_unhashed_backlog` (also `src/backfill.rs`). It loads up to
+`DATE_BACKFILL_MAX_PER_TICK` rows (default 500) where
+`date_taken IS NULL` (backed by the `idx_image_exif_date_backfill`
+partial index), runs the waterfall batch via `resolve_dates_batch`,
+and writes results via the `backfill_date_taken` DAO method (touches
+only `date_taken` + `date_taken_source` so EXIF / hash / perceptual
+columns are preserved). Resolved rows — including the ones the
+waterfall could only resolve via `fs_time` — are not re-eligible:
+the resolver is deterministic on file bytes + filename + fs metadata,
+so re-running on the same inputs lands on the same source every time.
+An earlier version included `date_taken_source = 'fs_time'` in the
+eligibility predicate, but with `ORDER BY id ASC LIMIT 500` it spun on
+the same lowest-id rows in perpetuity and held the SQLite write lock
+long enough to starve face-PATCH writers (5s busy_timeout → 500). If
+a stronger tool comes online (exiftool install, new filename regex),
+re-resolve out-of-band rather than re-introducing the steady-state
+eligibility.
+
+`/memories` is a single SQL query against this column
+(`get_memories_in_window` in `src/database/mod.rs`), using
+`strftime('%m-%d' | '%W' | '%m', date_taken, 'unixepoch', tz)` for
+calendar matching with the client's timezone offset. The pre-rewrite
+version stat'd every row and walked the entire library tree — at
+~14k photos this took 10–15 s; the rewrite is single-digit ms.
+
 **EXIF Extraction:**
 - Uses `kamadak-exif` crate
 - Supports: JPEG, TIFF, RAW (NEF, CR2, CR3), HEIF/HEIC, PNG, WebP
@@ -169,6 +468,20 @@ POST   /image/tags/batch (bulk tag updates)

 // Memories (week-based grouping)
 GET /memories?path=...&recursive=true
+
+// AI Insights
+POST /insights/generate              (non-agentic single-shot)
+POST /insights/generate/agentic      (tool-calling loop; body: { file_path, backend?, model?, ... })
+GET  /insights?path=...&library=...
+GET  /insights/models                (local Ollama models + capabilities)
+GET  /insights/openrouter/models     (curated OpenRouter allowlist)
+POST /insights/rate                  (thumbs up/down for training data)
+
+// Insight Chat Continuation
+POST /insights/chat                  (single-turn reply, non-streaming)
+POST /insights/chat/stream           (SSE: text / tool_call / tool_result / truncated / done)
+GET  /insights/chat/history?path=... (rendered transcript with tool invocations)
+POST /insights/chat/rewind           (truncate transcript at a rendered index)
 ```

 **Request Types:**
@@ -190,7 +503,38 @@ Centralized in `file_types.rs` with constants `IMAGE_EXTENSIONS` and `VIDEO_EXTE
 All database operations and HTTP handlers wrapped in spans. In release builds, exports to OTLP endpoint via `OTLP_OTLS_ENDPOINT`. Debug builds use basic logger.

 **Memory Exclusion:**
-`PathExcluder` in `memories.rs` filters out directories from memories API via `EXCLUDED_DIRS` environment variable (comma-separated paths or substring patterns).
+`PathExcluder` in `memories.rs` filters out directories from memories API via `EXCLUDED_DIRS` environment variable (comma-separated paths or substring patterns). The same excluder is applied to face-detection candidates (`face_watch::filter_excluded`) so junk directories like `@eaDir` / `.thumbnails` don't burn detect calls on Apollo.
+
+### Face detection system
+
+ImageApi owns the face data; Apollo (sibling repo) hosts the insightface inference service. Inference is triggered automatically by the file watcher and persisted into two tables:
+
+- `persons(id, name UNIQUE COLLATE NOCASE, cover_face_id, entity_id, created_from_tag, notes, ...)` — operator-managed, name is the user-visible identity.
+- `face_detections(id, library_id, content_hash, rel_path, bbox_*, embedding BLOB, confidence, source, person_id, status, model_version, ...)` — keyed on `content_hash` so a photo duplicated across libraries is detected once. Marker rows for `status IN ('no_faces','failed')` carry NULL bbox/embedding (CHECK constraint enforces this).
+
+**Why content_hash and not (library_id, rel_path):** ties face data to the bytes, not the path. A backup mount that copies files from the primary library naturally inherits the existing detections without re-running inference. This is the reference implementation of the multi-library data model — see "Multi-library data model" above.
+
+**File-watch hook** (`src/watcher.rs::process_new_files`): for each photo with a populated `content_hash`, check `FaceDao::already_scanned(hash)`; if not, send bytes (or embedded JPEG preview for RAW via `exif::extract_embedded_jpeg_preview`) to Apollo's `/api/internal/faces/detect`. K=`FACE_DETECT_CONCURRENCY` (default 8) parallel calls per scan tick; Apollo serializes them via its single-worker GPU pool. `face_watch.rs` is the Tokio orchestration layer.
+
+**Per-tick backlog drain** (`src/backfill.rs`): two passes that run on every watcher tick regardless of quick-vs-full scan:
+- `backfill_unhashed_backlog` — populates `image_exif.content_hash` for photos that arrived before the hash field was retroactive. Capped by `FACE_HASH_BACKFILL_MAX_PER_TICK` (default 2000); errors don't burn the cap.
+- `process_face_backlog` — runs detection on photos that have a hash but no `face_detections` row. Capped by `FACE_BACKLOG_MAX_PER_TICK` (default 64). Selected via a SQL anti-join (`FaceDao::list_unscanned_candidates`); videos and EXCLUDED_DIRS paths filtered out client-side via `face_watch::filter_excluded` so they never reach Apollo.
+
+**Auto-bind on detection:** when a photo carries a tag whose name matches a `persons.name` (case-insensitive), the new face binds automatically iff cosine similarity to the person's existing-face mean is ≥ `FACE_AUTOBIND_MIN_COS` (default 0.4). Persons with no existing faces bind unconditionally and the new face becomes the cover.
+
+**Manual face create** (`POST /image/faces`): crops the image to the user-supplied bbox, applies EXIF orientation via `exif::apply_orientation` (the `image` crate hands raw pre-rotation pixels — without this, manually-drawn bboxes never resolved a face on re-detection), pads to ~50% of bbox dims (RetinaFace anchor scales need ~50% face-fill at det_size=640), then calls Apollo's embed endpoint. A `force` flag lets the operator save a face the detector couldn't see (e.g. profile shots, occluded faces) — the row gets a zero-vector embedding so it's manually-bound only and won't participate in clustering.
+
+**Rerun preserves manual rows** (`POST /image/faces/{id}/rerun`): only `source='auto'` rows are deleted before re-running detection. `already_scanned` returns true on ANY row, so a photo whose only faces are manually drawn never auto-redetects.
+
+**Stats domain — content_hash, not file rows** (`FaceDao::stats` in `src/faces.rs`): `total_photos` counts `DISTINCT content_hash` over `image_exif` (filtered to image extensions, `content_hash IS NOT NULL`), and so do `scanned` / `with_faces` / `no_faces` / `failed` over `face_detections`. Numerator and denominator must live in the same domain — `face_detections` is keyed on content_hash, so the same JPEG present at two rel_paths or in two libraries scans once. Counting `image_exif` rows in the denominator inflated total by one per duplicate file and produced a permanent gap (e.g. 1101/1103 with nothing actually pending). Hash-less rows are excluded from total_photos while they sit in the `backfill_unhashed_backlog` queue; otherwise the bar pins below 100% for the duration of that backfill even though those rows aren't pending detection yet — they're pending hashing.
+
+Module map:
+- `src/faces.rs` — `FaceDao` trait + `SqliteFaceDao` impl, route handlers for `/faces/*`, `/image/faces/*`, `/persons/*`. Mirror of `tags.rs` layout.
+- `src/face_watch.rs` — Tokio orchestration for the file-watch detect pass; `filter_excluded` (PathExcluder + image-extension filter), `read_image_bytes_for_detect` (RAW preview fallback).
+- `src/backfill.rs` — per-tick drains (unhashed-hash, date_taken, face-backlog, etc.) called from `watcher::watch_files` and `watcher::process_new_files`.
+- `src/watcher.rs` — the watcher loop itself and `process_new_files` (file walk → EXIF write → face-candidate build).
+- `src/ai/face_client.rs` — HTTP client for Apollo's inference. Configured by `APOLLO_FACE_API_BASE_URL`, falls back to `APOLLO_API_BASE_URL`. Both unset → feature disabled, file-watch hook is a no-op.
+- `migrations/2026-04-29-000000_add_faces/` — schema.

 ### Startup Sequence

@@ -249,6 +593,7 @@ Optional:
 ```bash
 WATCH_QUICK_INTERVAL_SECONDS=60        # Quick scan interval
 WATCH_FULL_INTERVAL_SECONDS=3600       # Full scan interval
+DATE_BACKFILL_MAX_PER_TICK=500         # Cap on canonical-date drain per watcher tick
 OTLP_OTLS_ENDPOINT=http://...          # OpenTelemetry collector (release builds)

 # AI Insights Configuration
@@ -256,8 +601,38 @@ OLLAMA_PRIMARY_URL=http://desktop:11434        # Primary Ollama server (e.g., de
 OLLAMA_FALLBACK_URL=http://server:11434        # Fallback Ollama server (optional, always-on)
 OLLAMA_PRIMARY_MODEL=nemotron-3-nano:30b       # Model for primary server (default: nemotron-3-nano:30b)
 OLLAMA_FALLBACK_MODEL=llama3.2:3b              # Model for fallback server (optional, uses primary if not set)
+OLLAMA_REQUEST_TIMEOUT_SECONDS=120             # Per-request generation timeout (default 120). Increase for slow CPU-offloaded models.
 SMS_API_URL=http://localhost:8000              # SMS message API endpoint (default: localhost:8000)
 SMS_API_TOKEN=your-api-token                   # SMS API authentication token (optional)
+
+# Apollo Places integration (optional). When set, photo-insight enrichment
+# folds the user's personal place name (Home, Work, Cabin, ...) into the
+# location string fed to the LLM, and the agentic loop gains a
+# `get_personal_place_at` tool. Unset = legacy Nominatim-only path.
+APOLLO_API_BASE_URL=http://apollo.lan:8000     # Base URL of the sibling Apollo backend
+
+# Face inference (optional). Apollo also hosts the insightface inference
+# service; ImageApi calls it from the file-watch hook (Phase 3) and from
+# the manual face-create endpoint. Falls back to APOLLO_API_BASE_URL when
+# unset (typical single-Apollo deploy). Both unset = feature disabled.
+APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000 # Override if face service runs separately
+FACE_AUTOBIND_MIN_COS=0.4                       # Phase 3: cosine-sim floor for tag-name auto-bind
+FACE_DETECT_CONCURRENCY=8                       # Phase 3: per-scan-tick parallel detect calls
+FACE_DETECT_TIMEOUT_SEC=60                      # reqwest client timeout (CPU inference can be slow)
+
+# OpenRouter (Hybrid Backend) - keeps embeddings + vision local, routes chat to OpenRouter
+OPENROUTER_API_KEY=sk-or-...                   # Required to enable hybrid backend
+OPENROUTER_DEFAULT_MODEL=anthropic/claude-sonnet-4   # Used when client doesn't pick a model
+OPENROUTER_ALLOWED_MODELS=openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash
+                                                # Curated allowlist exposed to clients via
+                                                # GET /insights/openrouter/models. Empty = no picker.
+OPENROUTER_BASE_URL=https://openrouter.ai/api/v1     # Override base URL (optional)
+OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small  # Optional, embeddings stay local today
+OPENROUTER_HTTP_REFERER=https://your-site.example    # Optional attribution header
+OPENROUTER_APP_TITLE=ImageApi                  # Optional attribution header
+
+# Insight Chat Continuation
+AGENTIC_CHAT_MAX_ITERATIONS=6                  # Cap on tool-calling iterations per chat turn (default 6)
 ```

 **AI Insights Fallback Behavior:**
@@ -275,8 +650,101 @@ The `OllamaClient` provides methods to query available models:

 This allows runtime verification of model availability before generating insights.

+**Hybrid Backend (OpenRouter):**
+- Per-request opt-in via `backend=hybrid` on `POST /insights/generate/agentic`.
+- Local Ollama still describes the image (vision); the description is inlined
+  into the chat prompt and the agentic loop runs on OpenRouter.
+- `request.model` (if provided) overrides `OPENROUTER_DEFAULT_MODEL` for that
+  call. The mobile picker reads from `OPENROUTER_ALLOWED_MODELS`.
+- No live capability precheck — the operator-curated allowlist is trusted.
+  A bad model id surfaces as a chat-call error.
+- `GET /insights/openrouter/models` returns `{ models, default_model, configured }`
+  for client picker UIs.
+
+**Insight Chat Continuation:**
+
+After an agentic insight is generated, the full `Vec<ChatMessage>` transcript is
+stored in `photo_insights.training_messages` and can be continued via the
+chat endpoints. The `PhotoInsightResponse.has_training_messages` flag tells
+clients whether chat is available for a given insight.
+
+- `POST /insights/chat` runs one turn of the agentic loop against the replayed
+  history. Body: `{ file_path, library?, user_message, model?, backend?, num_ctx?,
+  temperature?, top_p?, top_k?, min_p?, max_iterations?, system_prompt?, amend? }`.
+  `system_prompt` is a per-turn override: in append mode (default) it's applied
+  ephemerally — the original system message is restored before persistence so
+  the stored transcript keeps its baked persona. In amend mode the override
+  stays in place and becomes the new insight row's system message. Mirrors the
+  internal `annotate_system_with_budget` swap-and-restore pattern.
+- `POST /insights/chat/stream` is the SSE variant — same request body, response
+  is `text/event-stream` with events: `iteration_start`, `text` (delta), `tool_call`,
+  `tool_result`, `truncated`, `done`, plus a server-emitted `error_message` on
+  failure. Preferred by the mobile client for live tool-chip updates.
+- `GET /insights/chat/history?path=...&library=...` returns the rendered
+  transcript. Each assistant message carries a `tools: [{name, arguments, result,
+  result_truncated?}]` array with the tool invocations that led up to it. Tool
+  results over 2000 chars are truncated with `result_truncated: true`.
+- `POST /insights/chat/rewind` truncates the transcript at a given rendered
+  index (drops that message + any tool-call scaffolding that preceded it + all
+  later turns). Index 0 is protected. Used for "try again from here" flows.
+
+Backend routing rules (matches agentic-insight generation):
+- Stored `backend` on the insight row is authoritative by default.
+- `request.backend` may override per-turn. `local -> hybrid` is rejected in
+  v1 (would require on-the-fly visual-description rewrite); `hybrid -> local`
+  replays verbatim since the description is already inlined as text.
+- `request.model` overrides the chat model (an Ollama id in local mode, an
+  OpenRouter id in hybrid mode).
+
+Persistence:
+- Append mode (default): re-serialize the full history and `UPDATE` the same
+  row's `training_messages`.
+- Amend mode (`amend: true`): regenerate the title, insert a new insight row
+  via `store_insight` (auto-flips prior rows' `is_current=false`). Response
+  surfaces the new row's id as `amended_insight_id`.
+
+Per-`(library_id, file_path)` async mutex (`AppState.insight_chat.chat_locks`)
+serialises concurrent turns on the same insight so the JSON blob doesn't race.
+
+Context management is a soft bound: if the serialized history exceeds
+`num_ctx - 2048` tokens (cheap 4-byte/token heuristic), the oldest
+assistant-tool_call + tool_result pairs are dropped until under budget. The
+initial user message (with any images) and system prompt are always preserved.
+The `truncated` event / flag is surfaced to the client when a drop occurred.
+
+Configurable env:
+- `AGENTIC_CHAT_MAX_ITERATIONS` — cap on tool-calling iterations per turn
+  (default 6). Per-request `max_iterations` is clamped to this cap.
+
+**Apollo Places integration (optional):**
+
+The sibling Apollo project (personal location-history viewer) owns
+user-defined Places: `name + lat/lon + radius_m + description (+ optional
+category)`. When `APOLLO_API_BASE_URL` is set, ImageApi queries
+`/api/places/contains?lat=&lon=` to enrich the LLM prompt's location
+string. See `src/ai/apollo_client.rs` and `src/ai/insight_generator.rs`:
+
+- **Auto-enrichment** (always on when configured): the per-photo location
+  resolver folds the most-specific containing Place ("Home — near
+  Cambridge, MA" or "Home (My house in Cambridge) — near Cambridge, MA"
+  when a description is set) into the location field of `combine_contexts`.
+  Smallest-radius wins — Apollo sorts server-side, this code takes `[0]`.
+- **Agentic tool** `get_personal_place_at(latitude, longitude)`: registered
+  alongside `reverse_geocode` only when `apollo_enabled()` returns true.
+  Returns "- Name [category]: description (radius N m)" lines, smallest
+  radius first. The tool is **deliberately narrow** — no enumerate-all
+  variant; auto-enrichment covers the photo-context path and the agentic
+  tool covers ad-hoc lat/lon questions in chat continuation.
+
+Failure modes degrade silently to the legacy Nominatim path: 5 s timeout,
+errors logged at `warn`, empty results returned. Apollo's routes are
+unauthenticated (single-user, LAN-trust); add JWT auth here + on Apollo's
+side if exposing beyond a trusted network.
+
 ## Dependencies of Note

+### Rust crates
+
 - **actix-web**: HTTP framework
 - **diesel**: ORM for SQLite
 - **jsonwebtoken**: JWT implementation
@@ -287,3 +755,18 @@ This allows runtime verification of model availability before generating insight
 - **opentelemetry**: Distributed tracing
 - **bcrypt**: Password hashing
 - **infer**: Magic number file type detection
+
+### External binaries (must be on `PATH`)
+
+- **`ffmpeg`** — video thumbnail extraction (`StreamActor`, HLS pipeline) and
+  the HEIF/HEIC/NEF/ARW thumbnail fallback in `generate_image_thumbnail_ffmpeg`.
+  Required for any deploy that holds video or HEIF files.
+- **`exiftool`** — optional but strongly recommended for RAW-heavy libraries.
+  The thumbnail pipeline shells out to it as the slow-path fallback for
+  embedded preview extraction (Nikon MakerNote `PreviewIFD`, Canon SubIFDs,
+  etc. — anything kamadak-exif's IFD0/IFD1 readers can't reach). Without
+  exiftool installed, RAWs whose preview lives outside IFD0/IFD1 will fall
+  through to ffmpeg, which often produces black thumbnails. Install via
+  package manager: `apt install libimage-exiftool-perl`,
+  `brew install exiftool`, `winget install OliverBetz.ExifTool`, or
+  `choco install exiftool`.
@@ -1,6 +1,6 @@
 [package]
 name = "image-api"
-version = "0.5.2"
+version = "1.1.0"
 authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
 edition = "2024"

@@ -9,6 +9,9 @@ edition = "2024"
 [profile.release]
 lto = "thin"

+[profile.dev]
+debug = "line-tables-only"
+
 [dependencies]
 actix = "0.13.1"
 actix-web = "4"
@@ -23,13 +26,13 @@ jsonwebtoken = "9.3.0"
 serde = "1"
 serde_json = "1"
 diesel = { version = "2.2.10", features = ["sqlite"] }
-libsqlite3-sys = { version = "0.35", features = ["bundled"] }
+libsqlite3-sys = "0.35"
 diesel_migrations = "2.2.0"
 chrono = "0.4"
 clap = { version = "4.5", features = ["derive"] }
 dotenv = "0.15"
 bcrypt = "0.17.1"
-image = { version = "0.25.5", default-features = false, features = ["jpeg", "png", "rayon"] }
+image = { version = "0.25.5", default-features = false, features = ["jpeg", "png", "rayon", "webp", "tiff", "avif"] }
 infer = "0.16"
 walkdir = "2.4.0"
 rayon = "1.5"
@@ -49,9 +52,22 @@ opentelemetry-appender-log = "0.31.0"
 tempfile = "3.20.0"
 regex = "1.11.1"
 exif = { package = "kamadak-exif", version = "0.6.1" }
-reqwest = { version = "0.12", features = ["json"] }
+reqwest = { version = "0.12", features = ["json", "stream", "multipart"] }
+async-stream = "0.3"
+tokio-util = { version = "0.7", features = ["io"] }
+bytes = "1"
 urlencoding = "2.1"
 zerocopy = "0.8"
 ical = "0.11"
 scraper = "0.20"
 base64 = "0.22"
+blake3 = "1.5"
+image_hasher = "3.0"
+bk-tree = "0.5"
+async-trait = "0.1"
+indicatif = "0.17"
+
+# Windows lacks system sqlite3, so re-enable the bundled C build there.
+# Linux/macOS use the system library (faster builds, smaller binary).
+[target.'cfg(windows)'.dependencies]
+libsqlite3-sys = { version = "0.35", features = ["bundled"] }
@@ -14,14 +14,60 @@ Upon first run it will generate thumbnails for all images and videos at `BASE_PA
 - **RAG-based Context Retrieval** - Semantic search over daily conversation summaries
 - **Automatic Daily Summaries** - LLM-generated summaries of daily conversations with embeddings

+## External Dependencies
+
+### ffmpeg (required)
+`ffmpeg` must be on `PATH`. It is used for:
+- **HLS video streaming** — transcoding/segmenting source videos into `.m3u8` + `.ts` playlists
+- **Video thumbnails** — extracting a frame at the 3-second mark
+- **Video preview clips** — short looping previews for the Video Wall
+- **HEIC / HEIF thumbnails** — decoding Apple's HEIC format (your ffmpeg build must include
+  `libheif`; most modern builds do)
+
+Builds used in development: the `gyan.dev` full build on Windows, and distro `ffmpeg`
+packages on Linux work fine. If HEIC thumbnails silently fail, check
+`ffmpeg -formats | grep heif` to confirm HEIF support.
+
+### RAW photo thumbnails
+RAW formats (ARW, NEF, CR2, CR3, DNG, RAF, ORF, RW2, PEF, SRW, TIFF) are thumbnailed
+by reading an embedded JPEG preview out of the TIFF container — no external RAW
+decoder (libraw / dcraw) is involved. The pipeline tries two layers in order and
+keeps the largest valid JPEG:
+
+1. **Fast path (no extra dependency)** — `kamadak-exif` reads
+   `JPEGInterchangeFormat` from IFD0 / IFD1 directly. Covers older bodies and
+   most DNGs.
+2. **`exiftool` fallback (recommended for RAW-heavy libraries)** — shells out
+   to extract `PreviewImage` / `JpgFromRaw` / `OtherImage`, which reaches
+   MakerNote and SubIFD-hosted previews kamadak-exif can't see (e.g. Nikon's
+   `PreviewIFD`, where modern Nikon bodies stash the full-res review JPEG).
+   If `exiftool` isn't on `PATH` this layer is skipped silently and only the
+   fast-path result is used.
+
+Install `exiftool` via your package manager:
+- macOS: `brew install exiftool`
+- Linux (Debian/Ubuntu): `apt install libimage-exiftool-perl`
+- Windows: `winget install OliverBetz.ExifTool` or `choco install exiftool`
+
+Files where neither layer produces a valid preview fall back to ffmpeg. Anything
+that still can't be decoded is marked with a `<thumb>.unsupported` sentinel in
+the thumbnail directory so we don't retry it every scan. Delete those sentinels
+(and any cached black thumbnails) to force retries after a tooling upgrade.
+
 ## Environment
 There are a handful of required environment variables to have the API run.
 They should be defined where the binary is located or above it in an `.env` file.
-You must have `ffmpeg` installed for streaming video and generating video thumbnails.

 - `DATABASE_URL` is a path or url to a database (currently only SQLite is tested)
 - `BASE_PATH` is the root from which you want to serve images and videos
- `THUMBNAILS` is a path where generated thumbnails should be stored
+- `THUMBNAILS` is a path where generated thumbnails should be stored. Thumbnails
+  mirror the source tree under `BASE_PATH` and keep the source's original
+  extension (e.g. `foo.arw` or `bar.mp4`), though the file contents are always
+  JPEG bytes — browsers content-sniff. Files that can't be thumbnailed by the
+  `image` crate, ffmpeg, or an embedded RAW preview get a zero-byte
+  `<thumb_path>.unsupported` sentinel in this directory so subsequent scans
+  skip them. Delete the `*.unsupported` files to force retries (for example
+  after upgrading ffmpeg or adding libheif)
 - `VIDEO_PATH` is a path where HLS playlists and video parts should be stored
 - `GIFS_DIRECTORY` is a path where generated video GIF thumbnails should be stored
 - `BIND_URL` is the url and port to bind to (typically your own IP address)
@@ -50,6 +96,29 @@ The following environment variables configure AI-powered photo insights and dail
 - `OLLAMA_URL` - Used if `OLLAMA_PRIMARY_URL` not set
 - `OLLAMA_MODEL` - Used if `OLLAMA_PRIMARY_MODEL` not set

+#### OpenRouter Configuration (Hybrid Backend)
+The hybrid agentic backend keeps embeddings + vision local (Ollama) while routing
+chat + tool-calling to OpenRouter. Enabled per-request when the client sends
+`backend=hybrid`.
+
+- `OPENROUTER_API_KEY` - OpenRouter API key. Required to enable the hybrid backend.
+- `OPENROUTER_DEFAULT_MODEL` - Model id used when the client doesn't specify one
+  [default: `anthropic/claude-sonnet-4`]
+  - Example: `openai/gpt-4o-mini`, `google/gemini-2.5-flash`
+- `OPENROUTER_ALLOWED_MODELS` - Comma-separated curated allowlist exposed to
+  clients via `GET /insights/openrouter/models`. The mobile picker shows only
+  these. Empty/unset = no picker, server default is used.
+  - Example: `openai/gpt-4o-mini,anthropic/claude-haiku-4-5,google/gemini-2.5-flash`
+- `OPENROUTER_BASE_URL` - Override base URL [default: `https://openrouter.ai/api/v1`]
+- `OPENROUTER_EMBEDDING_MODEL` - Embedding model for OpenRouter
+  [default: `openai/text-embedding-3-small`]. Only used if/when embeddings are
+  routed through OpenRouter (currently embeddings stay local).
+- `OPENROUTER_HTTP_REFERER` - Optional `HTTP-Referer` for OpenRouter attribution
+- `OPENROUTER_APP_TITLE` - Optional `X-Title` for OpenRouter attribution
+
+Capability checks are skipped for the curated allowlist — bad model ids surface
+as a 4xx from the chat call. Pick tool-capable models.
+
 #### SMS API Configuration
 - `SMS_API_URL` - URL to SMS message API [default: `http://localhost:8000`]
  - Used to fetch conversation data for context in insights
@@ -60,6 +129,24 @@ The following environment variables configure AI-powered photo insights and dail
  - Controls how many times the model can invoke tools before being forced to produce a final answer
  - Increase for more thorough context gathering; decrease to limit response time

+#### Insight Chat Continuation
+After an agentic insight is generated, the conversation can be continued. Endpoints:
+- `POST /insights/chat` — single-turn reply (non-streaming)
+- `POST /insights/chat/stream` — SSE variant with live `text` deltas and
+  `tool_call` / `tool_result` events. Mobile client uses this.
+- `GET /insights/chat/history?path=...&library=...` — rendered transcript;
+  each assistant message carries a `tools: [{name, arguments, result}]` array
+- `POST /insights/chat/rewind` — truncate transcript at a rendered index
+  (drops that message + any preceding tool scaffolding + later turns). Used
+  for "try again from here" flows. The initial user message is protected.
+
+Amend mode (`amend: true` in the chat request body) regenerates the insight's
+title and inserts a new row instead of appending to the existing transcript,
+so you can rewrite the saved summary from within chat.
+
+- `AGENTIC_CHAT_MAX_ITERATIONS` - Cap on tool-calling iterations per chat turn [default: `6`]
+  - Per-request `max_iterations` (when sent by the client) is clamped to this cap
+
 #### Fallback Behavior
 - Primary server is tried first with 5-second connection timeout
 - On failure, automatically falls back to secondary server (if configured)
@@ -72,3 +159,34 @@ Daily conversation summaries are generated automatically on server startup. Conf
 - Contacts to process
 - Model version used for embeddings: `nomic-embed-text:v1.5`

+### Apollo + Face Recognition (Optional)
+
+Apollo (sibling project) hosts both the Places API and the local insightface
+inference service. Both integrations are optional and degrade gracefully when
+unset.
+
+- `APOLLO_API_BASE_URL` - Base URL of the sibling Apollo backend.
+  - When set, photo-insight enrichment folds the user's personal place name
+    (Home, Work, Cabin, ...) into the location string, and the agentic loop
+    gains a `get_personal_place_at` tool. Unset = legacy Nominatim-only path.
+- `APOLLO_FACE_API_BASE_URL` - Base URL for the face-detection service.
+  - Falls back to `APOLLO_API_BASE_URL` when unset (typical single-Apollo
+    deploy). Both unset = face feature disabled (file-watch hook and
+    manual-face endpoints short-circuit silently).
+- `FACE_AUTOBIND_MIN_COS` (Phase 3) - Cosine-sim floor for auto-binding a
+  detected face to an existing same-named person via people-tag bootstrap
+  [default: `0.4`].
+- `FACE_DETECT_CONCURRENCY` (Phase 3) - Per-scan-tick concurrent detect
+  calls fired by the file watcher [default: `8`]. Apollo serializes them
+  via its single-worker GPU pool.
+- `FACE_DETECT_TIMEOUT_SEC` - reqwest client timeout per detect call
+  [default: `60`]. CPU inference on a backlog can take many seconds.
+- `FACE_BACKLOG_MAX_PER_TICK` - Cap on the per-tick backlog drain (photos
+  with a content_hash but no face_detections row) [default: `64`]. Runs
+  every watcher tick regardless of quick-vs-full scan, so the unscanned
+  set drains independently of the file walk.
+- `FACE_HASH_BACKFILL_MAX_PER_TICK` - Cap on the per-tick content_hash
+  backfill (photos that were registered before the hash field was
+  populated retroactively) [default: `2000`]. Errors don't burn the cap;
+  only successful hashes count.
+
@@ -0,0 +1,155 @@
+-- Revert multi-library support.
+-- Drops library_id/content_hash/size_bytes, renames rel_path back to the
+-- original column names, and drops the libraries table. Rows originally
+-- from non-primary libraries (id > 1) would be orphaned, so the rollback
+-- keeps only rows from library_id=1.
+
+PRAGMA foreign_keys=OFF;
+
+-- tagged_photo: rel_path → photo_name.
+DROP INDEX IF EXISTS idx_tagged_photo_relpath_tag;
+DROP INDEX IF EXISTS idx_tagged_photo_rel_path;
+ALTER TABLE tagged_photo RENAME COLUMN rel_path TO photo_name;
+CREATE INDEX IF NOT EXISTS idx_tagged_photo_photo_name ON tagged_photo(photo_name);
+CREATE INDEX IF NOT EXISTS idx_tagged_photo_count ON tagged_photo(photo_name, tag_id);
+
+-- favorites: rel_path → path.
+DROP INDEX IF EXISTS idx_favorites_unique;
+DROP INDEX IF EXISTS idx_favorites_rel_path;
+ALTER TABLE favorites RENAME COLUMN rel_path TO path;
+CREATE INDEX IF NOT EXISTS idx_favorites_path ON favorites(path);
+CREATE UNIQUE INDEX IF NOT EXISTS idx_favorites_unique ON favorites(userid, path);
+
+-- video_preview_clips: drop library_id, rel_path → file_path.
+CREATE TABLE video_preview_clips_old (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    file_path TEXT NOT NULL UNIQUE,
+    status TEXT NOT NULL DEFAULT 'pending',
+    duration_seconds REAL,
+    file_size_bytes INTEGER,
+    error_message TEXT,
+    created_at TEXT NOT NULL,
+    updated_at TEXT NOT NULL
+);
+
+INSERT INTO video_preview_clips_old (
+    id, file_path, status, duration_seconds, file_size_bytes,
+    error_message, created_at, updated_at
+)
+SELECT
+    id, rel_path, status, duration_seconds, file_size_bytes,
+    error_message, created_at, updated_at
+FROM video_preview_clips
+WHERE library_id = 1;
+
+DROP TABLE video_preview_clips;
+ALTER TABLE video_preview_clips_old RENAME TO video_preview_clips;
+
+CREATE INDEX idx_preview_clips_file_path ON video_preview_clips(file_path);
+CREATE INDEX idx_preview_clips_status    ON video_preview_clips(status);
+
+-- entity_photo_links: drop library_id, rel_path → file_path.
+CREATE TABLE entity_photo_links_old (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    entity_id INTEGER NOT NULL,
+    file_path TEXT NOT NULL,
+    role TEXT NOT NULL,
+    CONSTRAINT fk_epl_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE,
+    UNIQUE(entity_id, file_path, role)
+);
+
+INSERT INTO entity_photo_links_old (id, entity_id, file_path, role)
+SELECT id, entity_id, rel_path, role
+FROM entity_photo_links
+WHERE library_id = 1;
+
+DROP TABLE entity_photo_links;
+ALTER TABLE entity_photo_links_old RENAME TO entity_photo_links;
+
+CREATE INDEX idx_entity_photo_links_entity ON entity_photo_links(entity_id);
+CREATE INDEX idx_entity_photo_links_photo  ON entity_photo_links(file_path);
+
+-- photo_insights: drop library_id, rel_path → file_path.
+CREATE TABLE photo_insights_old (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    file_path TEXT NOT NULL,
+    title TEXT NOT NULL,
+    summary TEXT NOT NULL,
+    generated_at BIGINT NOT NULL,
+    model_version TEXT NOT NULL,
+    is_current BOOLEAN NOT NULL DEFAULT 0,
+    training_messages TEXT,
+    approved BOOLEAN
+);
+
+INSERT INTO photo_insights_old (
+    id, file_path, title, summary, generated_at, model_version, is_current,
+    training_messages, approved
+)
+SELECT
+    id, rel_path, title, summary, generated_at, model_version, is_current,
+    training_messages, approved
+FROM photo_insights
+WHERE library_id = 1;
+
+DROP TABLE photo_insights;
+ALTER TABLE photo_insights_old RENAME TO photo_insights;
+
+CREATE INDEX idx_photo_insights_file_path ON photo_insights(file_path);
+CREATE INDEX idx_photo_insights_current   ON photo_insights(file_path, is_current);
+
+-- image_exif: drop library_id/content_hash/size_bytes, rel_path → file_path.
+CREATE TABLE image_exif_old (
+    id INTEGER PRIMARY KEY NOT NULL,
+    file_path TEXT NOT NULL UNIQUE,
+    camera_make TEXT,
+    camera_model TEXT,
+    lens_model TEXT,
+    width INTEGER,
+    height INTEGER,
+    orientation INTEGER,
+    gps_latitude REAL,
+    gps_longitude REAL,
+    gps_altitude REAL,
+    focal_length REAL,
+    aperture REAL,
+    shutter_speed TEXT,
+    iso INTEGER,
+    date_taken BIGINT,
+    created_time BIGINT NOT NULL,
+    last_modified BIGINT NOT NULL
+);
+
+INSERT INTO image_exif_old (
+    id, file_path,
+    camera_make, camera_model, lens_model,
+    width, height, orientation,
+    gps_latitude, gps_longitude, gps_altitude,
+    focal_length, aperture, shutter_speed, iso, date_taken,
+    created_time, last_modified
+)
+SELECT
+    id, rel_path,
+    camera_make, camera_model, lens_model,
+    width, height, orientation,
+    gps_latitude, gps_longitude, gps_altitude,
+    focal_length, aperture, shutter_speed, iso, date_taken,
+    created_time, last_modified
+FROM image_exif
+WHERE library_id = 1;
+
+DROP TABLE image_exif;
+ALTER TABLE image_exif_old RENAME TO image_exif;
+
+CREATE INDEX idx_image_exif_file_path  ON image_exif(file_path);
+CREATE INDEX idx_image_exif_camera     ON image_exif(camera_make, camera_model);
+CREATE INDEX idx_image_exif_gps        ON image_exif(gps_latitude, gps_longitude);
+CREATE INDEX idx_image_exif_date_taken ON image_exif(date_taken);
+CREATE INDEX idx_image_exif_date_path  ON image_exif(date_taken DESC, file_path);
+
+-- Finally, drop the libraries registry.
+DROP TABLE libraries;
+
+PRAGMA foreign_keys=ON;
+
+ANALYZE;
@@ -0,0 +1,216 @@
+-- Multi-library support.
+-- Adds `libraries` registry table and a `library_id` column on per-instance
+-- metadata tables. Renames `file_path` / `photo_name` to `rel_path` for
+-- semantic clarity (values already stored relative to BASE_PATH).
+-- Adds `content_hash` + `size_bytes` to `image_exif` to support
+-- content-based dedup of thumbnails and HLS output across libraries.
+--
+-- SQLite cannot alter column constraints in place, so per-instance tables
+-- are recreated following the idiom established in
+-- 2026-04-02-000000_photo_insights_history/up.sql. Existing row `id`s are
+-- preserved so foreign keys (entity_facts.source_insight_id, etc.) remain
+-- valid after migration.
+
+PRAGMA foreign_keys=OFF;
+
+-- ---------------------------------------------------------------------------
+-- 1. Libraries registry.
+-- Seeded with a placeholder for the primary library; AppState patches
+-- `root_path` from the BASE_PATH env var on first boot. Subsequent
+-- prod-to-dev DB syncs update this row via a single SQL UPDATE.
+-- ---------------------------------------------------------------------------
+CREATE TABLE libraries (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    name TEXT NOT NULL UNIQUE,
+    root_path TEXT NOT NULL,
+    created_at BIGINT NOT NULL
+);
+
+INSERT INTO libraries (id, name, root_path, created_at)
+VALUES (1, 'main', 'BASE_PATH_PLACEHOLDER', strftime('%s','now'));
+
+-- ---------------------------------------------------------------------------
+-- 2. image_exif: + library_id, file_path → rel_path, + content_hash/size_bytes.
+-- ---------------------------------------------------------------------------
+CREATE TABLE image_exif_new (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    library_id INTEGER NOT NULL REFERENCES libraries(id),
+    rel_path TEXT NOT NULL,
+
+    -- Camera information
+    camera_make TEXT,
+    camera_model TEXT,
+    lens_model TEXT,
+
+    -- Image properties
+    width INTEGER,
+    height INTEGER,
+    orientation INTEGER,
+
+    -- GPS
+    gps_latitude REAL,
+    gps_longitude REAL,
+    gps_altitude REAL,
+
+    -- Capture settings
+    focal_length REAL,
+    aperture REAL,
+    shutter_speed TEXT,
+    iso INTEGER,
+    date_taken BIGINT,
+
+    -- Housekeeping
+    created_time BIGINT NOT NULL,
+    last_modified BIGINT NOT NULL,
+
+    -- Content identity (backfilled by the `backfill_hashes` binary and by the watcher for new files)
+    content_hash TEXT,
+    size_bytes BIGINT,
+
+    UNIQUE(library_id, rel_path)
+);
+
+INSERT INTO image_exif_new (
+    id, library_id, rel_path,
+    camera_make, camera_model, lens_model,
+    width, height, orientation,
+    gps_latitude, gps_longitude, gps_altitude,
+    focal_length, aperture, shutter_speed, iso, date_taken,
+    created_time, last_modified
+)
+SELECT
+    id, 1, file_path,
+    camera_make, camera_model, lens_model,
+    width, height, orientation,
+    gps_latitude, gps_longitude, gps_altitude,
+    focal_length, aperture, shutter_speed, iso, date_taken,
+    created_time, last_modified
+FROM image_exif;
+
+DROP TABLE image_exif;
+ALTER TABLE image_exif_new RENAME TO image_exif;
+
+CREATE INDEX idx_image_exif_rel_path      ON image_exif(rel_path);
+CREATE INDEX idx_image_exif_camera        ON image_exif(camera_make, camera_model);
+CREATE INDEX idx_image_exif_gps           ON image_exif(gps_latitude, gps_longitude);
+CREATE INDEX idx_image_exif_date_taken    ON image_exif(date_taken);
+CREATE INDEX idx_image_exif_date_path     ON image_exif(date_taken DESC, rel_path);
+CREATE INDEX idx_image_exif_lib_date      ON image_exif(library_id, date_taken);
+CREATE INDEX idx_image_exif_content_hash  ON image_exif(content_hash);
+
+-- ---------------------------------------------------------------------------
+-- 3. photo_insights: + library_id, file_path → rel_path.
+-- Preserve `id` so entity_facts.source_insight_id FKs remain valid.
+-- ---------------------------------------------------------------------------
+CREATE TABLE photo_insights_new (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    library_id INTEGER NOT NULL REFERENCES libraries(id),
+    rel_path TEXT NOT NULL,
+    title TEXT NOT NULL,
+    summary TEXT NOT NULL,
+    generated_at BIGINT NOT NULL,
+    model_version TEXT NOT NULL,
+    is_current BOOLEAN NOT NULL DEFAULT 0,
+    training_messages TEXT,
+    approved BOOLEAN
+);
+
+INSERT INTO photo_insights_new (
+    id, library_id, rel_path, title, summary, generated_at, model_version,
+    is_current, training_messages, approved
+)
+SELECT
+    id, 1, file_path, title, summary, generated_at, model_version,
+    is_current, training_messages, approved
+FROM photo_insights;
+
+DROP TABLE photo_insights;
+ALTER TABLE photo_insights_new RENAME TO photo_insights;
+
+CREATE INDEX idx_photo_insights_rel_path ON photo_insights(rel_path);
+CREATE INDEX idx_photo_insights_current  ON photo_insights(library_id, rel_path, is_current);
+
+-- ---------------------------------------------------------------------------
+-- 4. entity_photo_links: + library_id, file_path → rel_path.
+-- Preserves entity FK; UNIQUE now includes library_id to allow the same
+-- rel_path to link entities in multiple libraries independently.
+-- ---------------------------------------------------------------------------
+CREATE TABLE entity_photo_links_new (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    entity_id INTEGER NOT NULL,
+    library_id INTEGER NOT NULL REFERENCES libraries(id),
+    rel_path TEXT NOT NULL,
+    role TEXT NOT NULL,
+    CONSTRAINT fk_epl_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE,
+    UNIQUE(entity_id, library_id, rel_path, role)
+);
+
+INSERT INTO entity_photo_links_new (id, entity_id, library_id, rel_path, role)
+SELECT id, entity_id, 1, file_path, role FROM entity_photo_links;
+
+DROP TABLE entity_photo_links;
+ALTER TABLE entity_photo_links_new RENAME TO entity_photo_links;
+
+CREATE INDEX idx_entity_photo_links_entity ON entity_photo_links(entity_id);
+CREATE INDEX idx_entity_photo_links_photo  ON entity_photo_links(library_id, rel_path);
+
+-- ---------------------------------------------------------------------------
+-- 5. video_preview_clips: + library_id, file_path → rel_path.
+-- ---------------------------------------------------------------------------
+CREATE TABLE video_preview_clips_new (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    library_id INTEGER NOT NULL REFERENCES libraries(id),
+    rel_path TEXT NOT NULL,
+    status TEXT NOT NULL DEFAULT 'pending',
+    duration_seconds REAL,
+    file_size_bytes INTEGER,
+    error_message TEXT,
+    created_at TEXT NOT NULL,
+    updated_at TEXT NOT NULL,
+    UNIQUE(library_id, rel_path)
+);
+
+INSERT INTO video_preview_clips_new (
+    id, library_id, rel_path, status, duration_seconds, file_size_bytes,
+    error_message, created_at, updated_at
+)
+SELECT
+    id, 1, file_path, status, duration_seconds, file_size_bytes,
+    error_message, created_at, updated_at
+FROM video_preview_clips;
+
+DROP TABLE video_preview_clips;
+ALTER TABLE video_preview_clips_new RENAME TO video_preview_clips;
+
+CREATE INDEX idx_preview_clips_rel_path ON video_preview_clips(rel_path);
+CREATE INDEX idx_preview_clips_status   ON video_preview_clips(status);
+
+-- ---------------------------------------------------------------------------
+-- 6. favorites: path → rel_path. Library-agnostic (cross-library sharing).
+-- ---------------------------------------------------------------------------
+ALTER TABLE favorites RENAME COLUMN path TO rel_path;
+
+DROP INDEX IF EXISTS idx_favorites_path;
+DROP INDEX IF EXISTS idx_favorites_unique;
+CREATE INDEX idx_favorites_rel_path ON favorites(rel_path);
+CREATE UNIQUE INDEX idx_favorites_unique ON favorites(userid, rel_path);
+
+-- ---------------------------------------------------------------------------
+-- 7. tagged_photo: photo_name → rel_path. Library-agnostic.
+-- Dedup first so the (rel_path, tag_id) unique index can be created safely.
+-- ---------------------------------------------------------------------------
+ALTER TABLE tagged_photo RENAME COLUMN photo_name TO rel_path;
+
+DELETE FROM tagged_photo
+WHERE id NOT IN (
+    SELECT MIN(id) FROM tagged_photo GROUP BY rel_path, tag_id
+);
+
+DROP INDEX IF EXISTS idx_tagged_photo_photo_name;
+DROP INDEX IF EXISTS idx_tagged_photo_count;
+CREATE INDEX idx_tagged_photo_rel_path ON tagged_photo(rel_path);
+CREATE UNIQUE INDEX idx_tagged_photo_relpath_tag ON tagged_photo(rel_path, tag_id);
+
+PRAGMA foreign_keys=ON;
+
+ANALYZE;
@@ -0,0 +1,4 @@
+-- No-op: there's no sensible way to recover which rows originally used
+-- backslashes, and there's no reason to want backslashes back. The
+-- deleted duplicates are also gone.
+SELECT 1;
@@ -0,0 +1,85 @@
+-- Normalize `rel_path` columns to forward slashes. Windows ingest
+-- historically produced a mix of `\` and `/`, which broke lookups and
+-- caused spurious UNIQUE-constraint violations on re-registration.
+--
+-- SQLite enforces UNIQUE per-row during UPDATE, so we have to drop
+-- losing duplicates BEFORE normalizing. For each table that has a
+-- UNIQUE on rel_path, we delete rows whose normalized form already
+-- exists in canonical (forward-slash) form — keeping the existing
+-- forward-slash row as the survivor. Then a flat UPDATE finishes the
+-- job for remaining backslash rows.
+
+-- image_exif: UNIQUE(library_id, rel_path)
+DELETE FROM image_exif
+ WHERE rel_path LIKE '%\%'
+   AND EXISTS (
+       SELECT 1 FROM image_exif AS other
+        WHERE other.library_id = image_exif.library_id
+          AND other.rel_path = REPLACE(image_exif.rel_path, '\', '/')
+          AND other.id != image_exif.id
+   );
+UPDATE image_exif
+   SET rel_path = REPLACE(rel_path, '\', '/')
+ WHERE rel_path LIKE '%\%';
+
+-- favorites: UNIQUE(userid, rel_path)
+DELETE FROM favorites
+ WHERE rel_path LIKE '%\%'
+   AND EXISTS (
+       SELECT 1 FROM favorites AS other
+        WHERE other.userid = favorites.userid
+          AND other.rel_path = REPLACE(favorites.rel_path, '\', '/')
+          AND other.id != favorites.id
+   );
+UPDATE favorites
+   SET rel_path = REPLACE(rel_path, '\', '/')
+ WHERE rel_path LIKE '%\%';
+
+-- tagged_photo: UNIQUE(rel_path, tag_id)
+DELETE FROM tagged_photo
+ WHERE rel_path LIKE '%\%'
+   AND EXISTS (
+       SELECT 1 FROM tagged_photo AS other
+        WHERE other.tag_id = tagged_photo.tag_id
+          AND other.rel_path = REPLACE(tagged_photo.rel_path, '\', '/')
+          AND other.id != tagged_photo.id
+   );
+UPDATE tagged_photo
+   SET rel_path = REPLACE(rel_path, '\', '/')
+ WHERE rel_path LIKE '%\%';
+
+-- entity_photo_links: UNIQUE(entity_id, library_id, rel_path, role)
+DELETE FROM entity_photo_links
+ WHERE rel_path LIKE '%\%'
+   AND EXISTS (
+       SELECT 1 FROM entity_photo_links AS other
+        WHERE other.entity_id = entity_photo_links.entity_id
+          AND other.library_id = entity_photo_links.library_id
+          AND other.role = entity_photo_links.role
+          AND other.rel_path = REPLACE(entity_photo_links.rel_path, '\', '/')
+          AND other.id != entity_photo_links.id
+   );
+UPDATE entity_photo_links
+   SET rel_path = REPLACE(rel_path, '\', '/')
+ WHERE rel_path LIKE '%\%';
+
+-- video_preview_clips: UNIQUE(library_id, rel_path)
+DELETE FROM video_preview_clips
+ WHERE rel_path LIKE '%\%'
+   AND EXISTS (
+       SELECT 1 FROM video_preview_clips AS other
+        WHERE other.library_id = video_preview_clips.library_id
+          AND other.rel_path = REPLACE(video_preview_clips.rel_path, '\', '/')
+          AND other.id != video_preview_clips.id
+   );
+UPDATE video_preview_clips
+   SET rel_path = REPLACE(rel_path, '\', '/')
+ WHERE rel_path LIKE '%\%';
+
+-- photo_insights has no UNIQUE on rel_path (history table), so a plain
+-- normalize is safe.
+UPDATE photo_insights
+   SET rel_path = REPLACE(rel_path, '\', '/')
+ WHERE rel_path LIKE '%\%';
+
+ANALYZE;
@@ -0,0 +1,23 @@
+-- SQLite can't DROP COLUMN cleanly on older versions; rebuild the table.
+CREATE TABLE photo_insights_backup AS
+    SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
+           is_current, training_messages, approved
+    FROM photo_insights;
+DROP TABLE photo_insights;
+CREATE TABLE photo_insights (
+    id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+    library_id INTEGER NOT NULL REFERENCES libraries(id),
+    rel_path TEXT NOT NULL,
+    title TEXT NOT NULL,
+    summary TEXT NOT NULL,
+    generated_at BIGINT NOT NULL,
+    model_version TEXT NOT NULL,
+    is_current BOOLEAN NOT NULL DEFAULT TRUE,
+    training_messages TEXT,
+    approved BOOLEAN
+);
+INSERT INTO photo_insights
+    SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
+           is_current, training_messages, approved
+    FROM photo_insights_backup;
+DROP TABLE photo_insights_backup;
@@ -0,0 +1 @@
+ALTER TABLE photo_insights ADD COLUMN backend TEXT NOT NULL DEFAULT 'local';
@@ -0,0 +1,24 @@
+-- SQLite can't DROP COLUMN cleanly on older versions; rebuild the table.
+CREATE TABLE photo_insights_backup AS
+    SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
+           is_current, training_messages, approved, backend
+    FROM photo_insights;
+DROP TABLE photo_insights;
+CREATE TABLE photo_insights (
+    id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
+    library_id INTEGER NOT NULL REFERENCES libraries(id),
+    rel_path TEXT NOT NULL,
+    title TEXT NOT NULL,
+    summary TEXT NOT NULL,
+    generated_at BIGINT NOT NULL,
+    model_version TEXT NOT NULL,
+    is_current BOOLEAN NOT NULL DEFAULT TRUE,
+    training_messages TEXT,
+    approved BOOLEAN,
+    backend TEXT NOT NULL DEFAULT 'local'
+);
+INSERT INTO photo_insights
+    SELECT id, library_id, rel_path, title, summary, generated_at, model_version,
+           is_current, training_messages, approved, backend
+    FROM photo_insights_backup;
+DROP TABLE photo_insights_backup;
@@ -0,0 +1 @@
+ALTER TABLE photo_insights ADD COLUMN fewshot_source_ids TEXT;
@@ -0,0 +1,2 @@
+DROP TABLE IF EXISTS face_detections;
+DROP TABLE IF EXISTS persons;
@@ -0,0 +1,67 @@
+-- Local face recognition tables.
+--
+-- `persons` are visual identities (the "who" of a face). The optional
+-- `entity_id` bridges to the existing knowledge graph `entities` table —
+-- when set, this person is the visual side of an LLM-extracted entity.
+-- Don't auto-create entities from persons; the entity table represents
+-- LLM-extracted knowledge with its own confidence semantics, and silently
+-- filling it from face detections muddies the provenance.
+--
+-- `face_detections` carries one row per detected face on a content_hash,
+-- plus marker rows with `status='no_faces'` or `status='failed'` so the
+-- file watcher knows not to re-scan a hash. Keying on `content_hash`
+-- (cross-library dedup) rather than `(library_id, rel_path)` means the
+-- same JPEG in two libraries is scanned once. The denormalized `rel_path`
+-- carries the most-recently-seen path — useful for cluster-thumb URL
+-- generation; canonical path lookup goes through image_exif.
+
+CREATE TABLE persons (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    name TEXT NOT NULL,
+    cover_face_id INTEGER,                  -- backfilled when the first face binds
+    entity_id INTEGER,                      -- optional bridge to entities(id)
+    created_from_tag BOOLEAN NOT NULL DEFAULT 0,
+    notes TEXT,
+    created_at BIGINT NOT NULL,
+    updated_at BIGINT NOT NULL,
+    CONSTRAINT fk_persons_entity FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE SET NULL,
+    UNIQUE(name COLLATE NOCASE)
+);
+
+CREATE INDEX idx_persons_entity ON persons(entity_id);
+
+CREATE TABLE face_detections (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    library_id INTEGER NOT NULL,
+    content_hash TEXT NOT NULL,             -- canonical key (cross-library dedup)
+    rel_path TEXT NOT NULL,                 -- denormalized; most recently seen
+    bbox_x REAL,                            -- normalized 0..1; NULL on marker rows
+    bbox_y REAL,
+    bbox_w REAL,
+    bbox_h REAL,
+    embedding BLOB,                         -- 512×f32 = 2048 bytes; NULL on marker rows
+    confidence REAL,                        -- detector score
+    source TEXT NOT NULL,                   -- 'auto' | 'manual'
+    person_id INTEGER,
+    status TEXT NOT NULL DEFAULT 'detected', -- 'detected' | 'no_faces' | 'failed'
+    model_version TEXT NOT NULL,            -- e.g. 'buffalo_l'; embedding lineage
+    created_at BIGINT NOT NULL,
+    CONSTRAINT fk_fd_library FOREIGN KEY (library_id) REFERENCES libraries(id),
+    CONSTRAINT fk_fd_person FOREIGN KEY (person_id) REFERENCES persons(id) ON DELETE SET NULL,
+    -- Detected rows carry geometry + embedding; marker rows ('no_faces',
+    -- 'failed') carry neither. CHECK enforces the invariant so manual
+    -- inserts can't slip through with half a row.
+    CONSTRAINT chk_marker CHECK (
+        (status = 'detected' AND bbox_x IS NOT NULL AND embedding IS NOT NULL)
+        OR (status IN ('no_faces','failed') AND bbox_x IS NULL AND embedding IS NULL)
+    )
+);
+
+CREATE INDEX idx_face_detections_hash       ON face_detections(content_hash);
+CREATE INDEX idx_face_detections_lib_path   ON face_detections(library_id, rel_path);
+CREATE INDEX idx_face_detections_person     ON face_detections(person_id);
+CREATE INDEX idx_face_detections_status     ON face_detections(status);
+-- One marker row per (content_hash, status='no_faces') so the file watcher
+-- doesn't double-mark when a hash is seen on multiple full-scan passes.
+CREATE UNIQUE INDEX idx_face_detections_no_faces_unique
+    ON face_detections(content_hash) WHERE status = 'no_faces';
@@ -0,0 +1,2 @@
+DROP INDEX IF EXISTS idx_persons_is_ignored;
+ALTER TABLE persons DROP COLUMN is_ignored;
@@ -0,0 +1,20 @@
+-- IGNORE / junk bucket for the face recognition feature.
+--
+-- An "Ignored" person is the destination for strangers, faces the user
+-- doesn't want tagged, and false detections. It looks like any other
+-- person row (so face_detections.person_id stays a clean foreign key)
+-- but `is_ignored=1` flags it for special UI treatment:
+--   - hidden from the persons list by default
+--   - excluded from `find_persons_by_names_ci` so a tag-name match
+--     can never auto-bind a real face to the ignore bucket
+--   - cluster-suggest already filters by `person_id IS NULL`, so faces
+--     bound to an ignored person are naturally excluded from future
+--     re-clustering
+--
+-- Partial index because the WHERE-clause is small (typically 1 row),
+-- and we only ever query for `is_ignored = 1` to find the bucket.
+
+ALTER TABLE persons ADD COLUMN is_ignored BOOLEAN NOT NULL DEFAULT 0;
+
+CREATE INDEX idx_persons_is_ignored
+    ON persons(is_ignored) WHERE is_ignored = 1;
@@ -0,0 +1 @@
+DROP INDEX IF EXISTS idx_tags_name_nocase;
@@ -0,0 +1,28 @@
+-- Tags only enforced uniqueness in application code (the add_tag handler
+-- looks up by name before inserting). The schema itself accepted dupes,
+-- so a divergent code path could land two tags with the same name. Now
+-- that we expose a rename endpoint we want a hard guarantee: case-
+-- insensitive UNIQUE on tags.name.
+
+-- Pre-flight: collapse exact-name duplicates (case-insensitive) onto the
+-- lowest-id row before adding the constraint, otherwise the index
+-- creation fails on any DB that ever produced dupes. On a clean DB this
+-- is a no-op.
+UPDATE tagged_photo
+SET tag_id = (
+    SELECT MIN(t2.id) FROM tags t2
+    WHERE LOWER(t2.name) = LOWER((SELECT name FROM tags WHERE id = tagged_photo.tag_id))
+)
+WHERE tag_id IN (
+    SELECT t.id FROM tags t
+    WHERE t.id <> (
+        SELECT MIN(t2.id) FROM tags t2 WHERE LOWER(t2.name) = LOWER(t.name)
+    )
+);
+
+DELETE FROM tags
+WHERE id <> (
+    SELECT MIN(t2.id) FROM tags t2 WHERE LOWER(t2.name) = LOWER(tags.name)
+);
+
+CREATE UNIQUE INDEX idx_tags_name_nocase ON tags (name COLLATE NOCASE);
@@ -0,0 +1,5 @@
+DROP INDEX IF EXISTS idx_photo_insights_content_hash;
+ALTER TABLE photo_insights DROP COLUMN content_hash;
+
+DROP INDEX IF EXISTS idx_tagged_photo_content_hash;
+ALTER TABLE tagged_photo DROP COLUMN content_hash;
@@ -0,0 +1,64 @@
+-- Phase B of the multi-library data-model rollout: add a nullable
+-- `content_hash` column to derived/user-intent tables that should follow
+-- the bytes rather than the path. Reads will prefer hash-key joins and
+-- fall back to rel_path while the column is null. A separate
+-- reconciliation pass collapses duplicates as the column populates.
+--
+-- See CLAUDE.md → "Multi-library data model" for the policy. The
+-- reference implementation is `face_detections`, which has been
+-- hash-keyed since it was introduced.
+--
+-- Tables in this migration:
+--   * tagged_photo   — user-intent (tags follow the bytes)
+--   * photo_insights — intrinsic to bytes (LLM-generated description)
+--
+-- favorites is the natural third candidate but its DAO is barely used in
+-- v1 and the row count is tiny; deferring lets this migration stay
+-- focused on the high-volume tables that drive cross-library overhead.
+
+-- ---------------------------------------------------------------------------
+-- tagged_photo
+-- ---------------------------------------------------------------------------
+ALTER TABLE tagged_photo ADD COLUMN content_hash TEXT;
+
+-- Backfill: for each tagged_photo row, find the content_hash for its
+-- rel_path. tagged_photo doesn't carry a library_id, so a rel_path that
+-- exists under multiple libraries with different content is genuinely
+-- ambiguous — we take the first matching image_exif row. The
+-- reconciliation pass at runtime cleans up any rows that resolve
+-- differently once a hash is known per library.
+UPDATE tagged_photo
+SET content_hash = (
+    SELECT content_hash FROM image_exif
+    WHERE image_exif.rel_path = tagged_photo.rel_path
+      AND image_exif.content_hash IS NOT NULL
+    LIMIT 1
+)
+WHERE content_hash IS NULL;
+
+-- Hash-key index. Partial (only non-null rows) to keep the index small
+-- during the transitional window where most rows are still null.
+CREATE INDEX idx_tagged_photo_content_hash
+    ON tagged_photo (content_hash)
+    WHERE content_hash IS NOT NULL;
+
+-- ---------------------------------------------------------------------------
+-- photo_insights
+-- ---------------------------------------------------------------------------
+ALTER TABLE photo_insights ADD COLUMN content_hash TEXT;
+
+-- Backfill keyed on (library_id, rel_path) — photo_insights already
+-- carries library_id, so the resolution is unambiguous.
+UPDATE photo_insights
+SET content_hash = (
+    SELECT content_hash FROM image_exif
+    WHERE image_exif.library_id = photo_insights.library_id
+      AND image_exif.rel_path = photo_insights.rel_path
+      AND image_exif.content_hash IS NOT NULL
+    LIMIT 1
+)
+WHERE content_hash IS NULL;
+
+CREATE INDEX idx_photo_insights_content_hash
+    ON photo_insights (content_hash)
+    WHERE content_hash IS NOT NULL;
@@ -0,0 +1,2 @@
+-- Requires SQLite 3.35+ for ALTER TABLE DROP COLUMN.
+ALTER TABLE libraries DROP COLUMN enabled;
@@ -0,0 +1,14 @@
+-- Operator-controlled kill switch for a library. When `enabled = 0` the
+-- watcher tick skips that library entirely — before the availability
+-- probe, before ingest, before any maintenance pass — and the orphan-GC
+-- all-online check treats it as out-of-scope rather than as a blocker.
+--
+-- The intended workflow is staging a new mount: insert with enabled=0,
+-- verify the row appears in /libraries with enabled=false, then UPDATE
+-- to 1 to start ingest. Same toggle works as a maintenance kill switch
+-- after the fact ("don't keep probing this NAS while I'm rebooting it").
+--
+-- Default 1 so every existing library stays running on upgrade — no
+-- behavior change without an explicit flip.
+
+ALTER TABLE libraries ADD COLUMN enabled BOOLEAN NOT NULL DEFAULT 1;
@@ -0,0 +1,2 @@
+-- Requires SQLite 3.35+ for ALTER TABLE DROP COLUMN.
+ALTER TABLE libraries DROP COLUMN excluded_dirs;
@@ -0,0 +1,14 @@
+-- Per-library excluded directories.
+--
+-- The global EXCLUDED_DIRS env var is the right knob for excludes that
+-- every library shares (Synology @eaDir, .thumbnails, etc.). It's a
+-- poor fit for "exclude this subtree from THIS library only", which
+-- the natural use case for is mounting a parent directory while
+-- another library already covers a child subtree underneath.
+--
+-- This column is parsed comma-separated, same shape as the env var,
+-- and the watcher / memories / thumbnail walks each apply
+-- (env_globals ∪ library.excluded_dirs) when scanning the library.
+-- NULL = no extra excludes; the global env var still applies.
+
+ALTER TABLE libraries ADD COLUMN excluded_dirs TEXT;
@@ -0,0 +1,8 @@
+DROP INDEX IF EXISTS idx_image_exif_duplicate_of_hash;
+DROP INDEX IF EXISTS idx_image_exif_dhash;
+DROP INDEX IF EXISTS idx_image_exif_phash;
+
+ALTER TABLE image_exif DROP COLUMN duplicate_decided_at;
+ALTER TABLE image_exif DROP COLUMN duplicate_of_hash;
+ALTER TABLE image_exif DROP COLUMN dhash_64;
+ALTER TABLE image_exif DROP COLUMN phash_64;
@@ -0,0 +1,41 @@
+-- Adds perceptual-hash signals + soft-mark resolution state to image_exif so
+-- the duplicates surface in Apollo can group near-duplicates (re-encoded,
+-- resized, format-converted copies) and let the user demote losers without
+-- touching the file on disk. Image-only for v1: phash_64/dhash_64 are NULL
+-- on videos and on images that fail to decode. See Apollo CLAUDE.md →
+-- Duplicate detection / Caching layer for the policy.
+--
+-- Soft-mark columns are media-type-agnostic — when video perceptual hashing
+-- arrives, it lives in a separate hash-keyed companion table and reuses the
+-- same duplicate_of_hash / duplicate_decided_at machinery.
+
+-- pHash (DCT, 64-bit) packed as i64 for fast XOR + popcount Hamming.
+ALTER TABLE image_exif ADD COLUMN phash_64 BIGINT;
+
+-- dHash (gradient, 64-bit). Cheap, robust to compression/resize. Stored
+-- alongside pHash so the query layer can fall back if either is null.
+ALTER TABLE image_exif ADD COLUMN dhash_64 BIGINT;
+
+-- When non-null, this row is a soft-marked duplicate of the row whose
+-- content_hash matches. The duplicate file stays on disk; the default
+-- /photos listing filters it out. /photos?include_duplicates=true opts
+-- back in (the Apollo duplicates modal uses this).
+ALTER TABLE image_exif ADD COLUMN duplicate_of_hash TEXT;
+
+-- Unix seconds of the resolve. Distinguishes "never reviewed" from
+-- "reviewed and resolved" for the Apollo include_resolved toggle.
+ALTER TABLE image_exif ADD COLUMN duplicate_decided_at BIGINT;
+
+-- Partial indexes — the columns are NULL for the vast majority of rows
+-- during the transitional window and forever for videos / decode failures.
+CREATE INDEX idx_image_exif_phash
+    ON image_exif (phash_64)
+    WHERE phash_64 IS NOT NULL;
+
+CREATE INDEX idx_image_exif_dhash
+    ON image_exif (dhash_64)
+    WHERE dhash_64 IS NOT NULL;
+
+CREATE INDEX idx_image_exif_duplicate_of_hash
+    ON image_exif (duplicate_of_hash)
+    WHERE duplicate_of_hash IS NOT NULL;
@@ -0,0 +1,2 @@
+DROP INDEX IF EXISTS idx_image_exif_date_backfill;
+ALTER TABLE image_exif DROP COLUMN date_taken_source;
@@ -0,0 +1,24 @@
+-- Tracks where a row's `date_taken` was sourced so the canonical-date
+-- waterfall (kamadak-exif → exiftool → filename → earliest_fs_time) is
+-- visible to debugging and to the per-tick backfill drain that re-runs
+-- weak sources once stronger ones become available (e.g. exiftool gets
+-- installed on a deploy that didn't have it). See CLAUDE.md → Memories
+-- canonical-date pipeline.
+--
+-- Values:
+--   'exif'     — kamadak-exif read DateTime/DateTimeOriginal directly
+--   'exiftool' — exiftool fallback caught a video / MakerNote / QuickTime tag
+--   'filename' — extract_date_from_filename matched a known pattern
+--   'fs_time'  — fell through to earliest_fs_time(metadata)
+--
+-- NULL when `date_taken` itself is NULL (no source resolved the date).
+ALTER TABLE image_exif ADD COLUMN date_taken_source TEXT;
+
+-- Partial index for the per-tick backfill drain: targets rows that need
+-- re-resolution (no date yet, or only the weakest source resolved it).
+-- Filename-sourced rows are intentionally excluded — the regex is
+-- authoritative when it matches and re-running exiftool wouldn't change
+-- the answer.
+CREATE INDEX idx_image_exif_date_backfill
+    ON image_exif (library_id, id)
+    WHERE date_taken IS NULL OR date_taken_source = 'fs_time';
@@ -0,0 +1,9 @@
+-- Reverting this migration is a no-op: the labels we wrote in `up.sql`
+-- are correct under any state of the schema (every dated row was indeed
+-- exif-sourced before the resolver landed), and there's no signal that
+-- distinguishes "labelled by this migration" from "labelled by the
+-- ingest path post-resolver". Clearing them would break the drain's
+-- eligibility filter again.
+--
+-- The companion migration `2026-05-06-000000_add_date_taken_source` is
+-- the one to revert if you need to remove the column entirely.
@@ -0,0 +1,20 @@
+-- Backfill `date_taken_source` for rows that pre-date the canonical-date
+-- pipeline. Before the resolver landed, `image_exif.date_taken` could
+-- only be populated via `exif::extract_exif_from_path` (kamadak-exif)
+-- on the file-watcher, upload, or GPS-write paths. The resolver column
+-- migration added `date_taken_source` defaulting to NULL, so every
+-- historical row with a date is currently unlabelled — and the
+-- per-tick drain skips them because its eligibility predicate is
+-- `date_taken IS NULL OR date_taken_source = 'fs_time'`.
+--
+-- Label them `'exif'` once and let the drain take over from here. Safe
+-- because every code path that wrote `date_taken` prior to the
+-- resolver was a kamadak-exif read — there was no other source.
+--
+-- Idempotent: re-running this migration on a DB that has already been
+-- backfilled is a no-op (the WHERE clause matches nothing the second
+-- time around).
+UPDATE image_exif
+SET date_taken_source = 'exif'
+WHERE date_taken IS NOT NULL
+  AND date_taken_source IS NULL;
@@ -0,0 +1,2 @@
+ALTER TABLE image_exif DROP COLUMN original_date_taken_source;
+ALTER TABLE image_exif DROP COLUMN original_date_taken;
@@ -0,0 +1,15 @@
+-- Manual date_taken override: when an operator overrides a row's date via
+-- POST /image/exif/date, the prior `(date_taken, date_taken_source)` is
+-- snapshotted into these columns and the live columns hold the new value
+-- with `date_taken_source = 'manual'`. POST /image/exif/date/clear restores
+-- the pair and nulls the originals.
+--
+-- The waterfall source-name set is now:
+--   'exif' | 'exiftool' | 'filename' | 'fs_time' | 'manual'
+--
+-- The `idx_image_exif_date_backfill` partial index already filters to
+-- `date_taken IS NULL OR date_taken_source = 'fs_time'`, so 'manual' rows
+-- are naturally excluded from the per-tick backfill drain — no index
+-- change needed.
+ALTER TABLE image_exif ADD COLUMN original_date_taken BIGINT;
+ALTER TABLE image_exif ADD COLUMN original_date_taken_source TEXT;
@@ -0,0 +1,43 @@
+-- Drop the persona-scoping column on entity_facts via the table-rebuild
+-- dance for SQLite-version portability (matches the pattern in
+-- 2026-04-20-000000_add_backend_to_insights/down.sql).
+DROP INDEX IF EXISTS idx_entity_facts_persona;
+
+CREATE TABLE entity_facts_backup AS
+    SELECT id, subject_entity_id, predicate, object_entity_id, object_value,
+           source_photo, source_insight_id, confidence, status, created_at
+    FROM entity_facts;
+
+DROP TABLE entity_facts;
+
+CREATE TABLE entity_facts (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    subject_entity_id INTEGER NOT NULL,
+    predicate TEXT NOT NULL,
+    object_entity_id INTEGER,
+    object_value TEXT,
+    source_photo TEXT,
+    source_insight_id INTEGER,
+    confidence REAL NOT NULL DEFAULT 0.6,
+    status TEXT NOT NULL DEFAULT 'active',
+    created_at BIGINT NOT NULL,
+    CONSTRAINT fk_ef_subject FOREIGN KEY (subject_entity_id) REFERENCES entities(id) ON DELETE CASCADE,
+    CONSTRAINT fk_ef_object  FOREIGN KEY (object_entity_id)  REFERENCES entities(id) ON DELETE SET NULL,
+    CONSTRAINT fk_ef_insight FOREIGN KEY (source_insight_id) REFERENCES photo_insights(id) ON DELETE SET NULL,
+    CHECK (object_entity_id IS NOT NULL OR object_value IS NOT NULL)
+);
+
+INSERT INTO entity_facts
+    SELECT id, subject_entity_id, predicate, object_entity_id, object_value,
+           source_photo, source_insight_id, confidence, status, created_at
+    FROM entity_facts_backup;
+
+DROP TABLE entity_facts_backup;
+
+CREATE INDEX idx_entity_facts_subject ON entity_facts(subject_entity_id);
+CREATE INDEX idx_entity_facts_predicate ON entity_facts(predicate);
+CREATE INDEX idx_entity_facts_status ON entity_facts(status);
+CREATE INDEX idx_entity_facts_source_photo ON entity_facts(source_photo);
+
+DROP INDEX IF EXISTS idx_personas_user;
+DROP TABLE IF EXISTS personas;
@@ -0,0 +1,64 @@
+-- Personas live server-side now (mobile previously stored them in
+-- AsyncStorage only). Each user gets the three built-ins seeded; custom
+-- personas land here too via POST /personas or POST /personas/migrate.
+--
+-- `entity_facts` gains a persona_id so each persona accumulates its own
+-- voice over a shared entity graph (entities themselves stay unscoped).
+-- Existing rows backfill to 'default' via the column DEFAULT — that
+-- becomes the historical baseline. The `include_all_memories` flag on
+-- personas lets any persona opt back into reading the full pool.
+
+CREATE TABLE personas (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    user_id INTEGER NOT NULL,
+    persona_id TEXT NOT NULL,
+    name TEXT NOT NULL,
+    system_prompt TEXT NOT NULL,
+    is_built_in BOOLEAN NOT NULL DEFAULT FALSE,
+    include_all_memories BOOLEAN NOT NULL DEFAULT FALSE,
+    created_at BIGINT NOT NULL,
+    updated_at BIGINT NOT NULL,
+    UNIQUE(user_id, persona_id),
+    CONSTRAINT fk_personas_user FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
+);
+
+CREATE INDEX idx_personas_user ON personas(user_id);
+
+-- Seed built-ins for every existing user. System prompts copied verbatim
+-- from FileViewer-React/hooks/usePersonas.tsx so server and client agree
+-- on the canonical voice for each built-in.
+INSERT INTO personas (user_id, persona_id, name, system_prompt, is_built_in, created_at, updated_at)
+SELECT
+    u.id,
+    'default',
+    'Default Assistant',
+    'You are my long-term memory assistant. Use only the information provided. Do not invent details. Respond in 3–6 sentences in third person, leading with the most concrete moment from the photo and the surrounding context. Plain prose, no headings.',
+    TRUE,
+    strftime('%s', 'now') * 1000,
+    strftime('%s', 'now') * 1000
+FROM users u
+UNION ALL
+SELECT
+    u.id,
+    'journal',
+    'Personal Journal',
+    'You are a personal journal writer. Write in first person, present tense, with warmth and reflection — focusing on emotions and meaningful moments. Use only the information provided; do not invent details. Aim for 4–8 sentences in a single flowing paragraph, no headings.',
+    TRUE,
+    strftime('%s', 'now') * 1000,
+    strftime('%s', 'now') * 1000
+FROM users u
+UNION ALL
+SELECT
+    u.id,
+    'factual',
+    'Factual Reporter',
+    'You are a factual memory recorder. Be precise, objective, and concise. Lead with the date and place, then list what / when / who in 2–4 short sentences. Use only the information provided; if a detail is unknown, say so rather than guessing.',
+    TRUE,
+    strftime('%s', 'now') * 1000,
+    strftime('%s', 'now') * 1000
+FROM users u;
+
+-- Persona scoping on facts only. Entities and entity_photo_links stay
+-- shared (real-world referents and shared photo ↔ entity associations).
+ALTER TABLE entity_facts ADD COLUMN persona_id TEXT NOT NULL DEFAULT 'default';
+CREATE INDEX idx_entity_facts_persona ON entity_facts(persona_id);
@@ -0,0 +1,47 @@
+-- Reverse 2026-05-10-000000_entity_facts_persona_fk: drop the
+-- composite FK and the user_id column via the same rebuild pattern.
+
+DROP INDEX IF EXISTS idx_entity_facts_user_persona;
+DROP INDEX IF EXISTS idx_entity_facts_persona;
+DROP INDEX IF EXISTS idx_entity_facts_source_photo;
+DROP INDEX IF EXISTS idx_entity_facts_status;
+DROP INDEX IF EXISTS idx_entity_facts_predicate;
+DROP INDEX IF EXISTS idx_entity_facts_subject;
+
+ALTER TABLE entity_facts RENAME TO entity_facts_old;
+
+CREATE TABLE entity_facts (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    subject_entity_id INTEGER NOT NULL,
+    predicate TEXT NOT NULL,
+    object_entity_id INTEGER,
+    object_value TEXT,
+    source_photo TEXT,
+    source_insight_id INTEGER,
+    confidence REAL NOT NULL DEFAULT 0.6,
+    status TEXT NOT NULL DEFAULT 'active',
+    created_at BIGINT NOT NULL,
+    persona_id TEXT NOT NULL DEFAULT 'default',
+    CONSTRAINT fk_ef_subject FOREIGN KEY (subject_entity_id) REFERENCES entities(id) ON DELETE CASCADE,
+    CONSTRAINT fk_ef_object  FOREIGN KEY (object_entity_id)  REFERENCES entities(id) ON DELETE SET NULL,
+    CONSTRAINT fk_ef_insight FOREIGN KEY (source_insight_id) REFERENCES photo_insights(id) ON DELETE SET NULL,
+    CHECK (object_entity_id IS NOT NULL OR object_value IS NOT NULL)
+);
+
+INSERT INTO entity_facts
+    (id, subject_entity_id, predicate, object_entity_id, object_value,
+     source_photo, source_insight_id, confidence, status, created_at,
+     persona_id)
+SELECT
+    id, subject_entity_id, predicate, object_entity_id, object_value,
+    source_photo, source_insight_id, confidence, status, created_at,
+    persona_id
+FROM entity_facts_old;
+
+DROP TABLE entity_facts_old;
+
+CREATE INDEX idx_entity_facts_subject ON entity_facts(subject_entity_id);
+CREATE INDEX idx_entity_facts_predicate ON entity_facts(predicate);
+CREATE INDEX idx_entity_facts_status ON entity_facts(status);
+CREATE INDEX idx_entity_facts_source_photo ON entity_facts(source_photo);
+CREATE INDEX idx_entity_facts_persona ON entity_facts(persona_id);
@@ -0,0 +1,82 @@
+-- Add a real foreign key from entity_facts to personas. Until now,
+-- entity_facts.persona_id was a free-form string with no integrity
+-- guarantee — deleting a persona orphaned its facts, which then sat
+-- forever in the readable-only-via-PersonaFilter::All hive-mind view.
+--
+-- personas is keyed (user_id, persona_id) so the FK has to be
+-- composite. That requires entity_facts to carry user_id too, which
+-- has the side benefit of fixing multi-user fact leakage on the read
+-- path (without it, two users with the same 'default' persona would
+-- see each other's default-scoped facts).
+--
+-- SQLite can't ALTER TABLE to add an FK; the table-rebuild dance is
+-- the only way. Pattern matches 2026-05-09's down.sql and the older
+-- 2026-04-20-000000 migration.
+
+DROP INDEX IF EXISTS idx_entity_facts_subject;
+DROP INDEX IF EXISTS idx_entity_facts_predicate;
+DROP INDEX IF EXISTS idx_entity_facts_status;
+DROP INDEX IF EXISTS idx_entity_facts_source_photo;
+DROP INDEX IF EXISTS idx_entity_facts_persona;
+
+ALTER TABLE entity_facts RENAME TO entity_facts_old;
+
+CREATE TABLE entity_facts (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    subject_entity_id INTEGER NOT NULL,
+    predicate TEXT NOT NULL,
+    object_entity_id INTEGER,
+    object_value TEXT,
+    source_photo TEXT,
+    source_insight_id INTEGER,
+    confidence REAL NOT NULL DEFAULT 0.6,
+    status TEXT NOT NULL DEFAULT 'active',
+    created_at BIGINT NOT NULL,
+    persona_id TEXT NOT NULL DEFAULT 'default',
+    user_id INTEGER NOT NULL DEFAULT 1,
+    CONSTRAINT fk_ef_subject FOREIGN KEY (subject_entity_id) REFERENCES entities(id) ON DELETE CASCADE,
+    CONSTRAINT fk_ef_object  FOREIGN KEY (object_entity_id)  REFERENCES entities(id) ON DELETE SET NULL,
+    CONSTRAINT fk_ef_insight FOREIGN KEY (source_insight_id) REFERENCES photo_insights(id) ON DELETE SET NULL,
+    CONSTRAINT fk_ef_persona FOREIGN KEY (user_id, persona_id) REFERENCES personas(user_id, persona_id) ON DELETE CASCADE,
+    CHECK (object_entity_id IS NOT NULL OR object_value IS NOT NULL)
+);
+
+-- Backfill: assign each legacy fact to the user that owns the matching
+-- persona. Built-ins are seeded per-user with the same persona_id
+-- string for everyone, so MIN(user_id) deterministically picks the
+-- earliest registered user (typically user 1, the operator). Custom
+-- persona_ids exist for at most one user, so MIN is also unique.
+-- Falls back to user_id=1 when no matching persona row exists; in that
+-- case the FK below would still fail, but legacy rows shouldn't be in
+-- that state because 2026-05-09 ADD COLUMN defaulted persona_id to
+-- 'default', which is seeded for every user.
+INSERT INTO entity_facts
+    (id, subject_entity_id, predicate, object_entity_id, object_value,
+     source_photo, source_insight_id, confidence, status, created_at,
+     persona_id, user_id)
+SELECT
+    old.id,
+    old.subject_entity_id,
+    old.predicate,
+    old.object_entity_id,
+    old.object_value,
+    old.source_photo,
+    old.source_insight_id,
+    old.confidence,
+    old.status,
+    old.created_at,
+    old.persona_id,
+    COALESCE(
+        (SELECT MIN(p.user_id) FROM personas p WHERE p.persona_id = old.persona_id),
+        1
+    )
+FROM entity_facts_old old;
+
+DROP TABLE entity_facts_old;
+
+CREATE INDEX idx_entity_facts_subject ON entity_facts(subject_entity_id);
+CREATE INDEX idx_entity_facts_predicate ON entity_facts(predicate);
+CREATE INDEX idx_entity_facts_status ON entity_facts(status);
+CREATE INDEX idx_entity_facts_source_photo ON entity_facts(source_photo);
+CREATE INDEX idx_entity_facts_persona ON entity_facts(persona_id);
+CREATE INDEX idx_entity_facts_user_persona ON entity_facts(user_id, persona_id);
@@ -0,0 +1,5 @@
+-- SQLite can drop columns since 3.35 (March 2021); embedded
+-- libsqlite3-sys is well past that. Drop in reverse insert order so
+-- a partial down still leaves the schema valid.
+ALTER TABLE entity_facts DROP COLUMN valid_until;
+ALTER TABLE entity_facts DROP COLUMN valid_from;
@@ -0,0 +1,25 @@
+-- Add valid-time columns to entity_facts.
+--
+-- entity_facts already has created_at — *transaction time*, the
+-- moment WE recorded the fact. That's not the same as the real-world
+-- period the fact was true. "Cameron is_in_relationship_with X" was
+-- only true during a window; recording it in 2026 doesn't make it
+-- true today. Without the distinction, every former relationship,
+-- former job, former address reads as currently-true.
+--
+-- Adding two BIGINT NULL columns: valid_from / valid_until (unix
+-- seconds). NULL means "unbounded on that side" — `valid_from IS
+-- NULL` reads as "always-true-back-to-the-beginning",
+-- `valid_until IS NULL` as "still-true-now-or-unknown". Both NULL =
+-- temporal validity unknown (current state of all legacy rows).
+--
+-- Conflict detection refines accordingly: same-predicate facts with
+-- different objects stop flagging when their intervals are disjoint
+-- ("lives_in NYC 2018-2020" and "lives_in SF 2020-present" are both
+-- valid, just at different times).
+
+ALTER TABLE entity_facts ADD COLUMN valid_from BIGINT;
+ALTER TABLE entity_facts ADD COLUMN valid_until BIGINT;
+
+-- Optional partial index for time-bounded scans. Skipped for now —
+-- conflict detection runs per-entity (small N) and doesn't need it.
@@ -0,0 +1,2 @@
+DROP INDEX IF EXISTS idx_entity_facts_superseded_by;
+ALTER TABLE entity_facts DROP COLUMN superseded_by;
@@ -0,0 +1,31 @@
+-- Add a supersession pointer to entity_facts.
+--
+-- Status alone is a one-way trapdoor: 'rejected' loses the link
+-- between the rejected fact and the one that replaced it. For
+-- evolving facts (Cameron's relationship, employer, address) the
+-- curator wants to *replace* a stale fact with a new one and keep
+-- the history readable: "from 2018 until 2022 this was true, then
+-- it became this other thing".
+--
+-- A nullable INTEGER column pointing at another entity_facts.id —
+-- no FK constraint because SQLite can't ALTER ADD COLUMN with REFs;
+-- the DAO's delete_fact clears dangling pointers in the same
+-- transaction as the parent delete to keep the column honest.
+--
+-- A status of 'superseded' on the old fact (alongside the existing
+-- active / reviewed / rejected) signals "replaced by a newer
+-- claim". Read paths already filter 'rejected' out of the active
+-- view; the curation UI will treat 'superseded' the same way for
+-- conflict detection so they don't keep flagging.
+--
+-- Pairs with the valid-time columns from 2026-05-10-000100: the
+-- supersede action auto-stamps the old fact's `valid_until` from
+-- the new fact's `valid_from`, closing the interval cleanly.
+
+ALTER TABLE entity_facts ADD COLUMN superseded_by INTEGER;
+
+-- Helpful index for "show me what superseded this fact" walks
+-- (rare today; cheap to add now while the table is small).
+CREATE INDEX idx_entity_facts_superseded_by
+    ON entity_facts(superseded_by)
+    WHERE superseded_by IS NOT NULL;
@@ -0,0 +1,4 @@
+DROP INDEX IF EXISTS idx_entity_facts_created_by_backend;
+DROP INDEX IF EXISTS idx_entity_facts_created_by_model;
+ALTER TABLE entity_facts DROP COLUMN created_by_backend;
+ALTER TABLE entity_facts DROP COLUMN created_by_model;
@@ -0,0 +1,30 @@
+-- Track which model + backend generated each fact so the curator
+-- can audit which configurations produce trustworthy knowledge.
+--
+-- photo_insights already carries `model_version` + `backend`, and
+-- entity_facts.source_insight_id links to it — but:
+--   1. source_insight_id is only set after an insight is stored
+--      (post-loop), so chat-continuation facts and facts whose insight
+--      was regenerated lose the link.
+--   2. JOINing for every read is more friction than just embedding the
+--      provenance on the fact row itself.
+--   3. Manual facts (POST /knowledge/facts) have no insight at all and
+--      need to record "manual" as their provenance.
+--
+-- Two nullable TEXT columns are enough for the audit use case: model
+-- (e.g. "qwen2.5:7b", "anthropic/claude-sonnet-4") and backend
+-- ("local", "hybrid", "manual"). Pre-existing rows leave both NULL —
+-- legacy facts predate this tracking and can't be back-filled
+-- reliably from training_messages without burning compute.
+
+ALTER TABLE entity_facts ADD COLUMN created_by_model TEXT;
+ALTER TABLE entity_facts ADD COLUMN created_by_backend TEXT;
+
+-- Indexes are cheap and useful for "show me all facts from model X"
+-- audit queries — partial so the legacy NULL rows don't bloat them.
+CREATE INDEX idx_entity_facts_created_by_model
+    ON entity_facts(created_by_model)
+    WHERE created_by_model IS NOT NULL;
+CREATE INDEX idx_entity_facts_created_by_backend
+    ON entity_facts(created_by_backend)
+    WHERE created_by_backend IS NOT NULL;
@@ -0,0 +1 @@
+ALTER TABLE personas DROP COLUMN reviewed_only_facts;
@@ -0,0 +1,16 @@
+-- Per-persona toggle: when true, agent reads only see facts whose
+-- status is exactly 'reviewed' (human-verified). When false (the
+-- default), agent reads see 'active' OR 'reviewed' — everything not
+-- rejected or superseded.
+--
+-- The mobile app surfaces this as "Strict mode" on the persona
+-- editor: useful when you want a persona's chat to be grounded
+-- exclusively on the curated subset, e.g. for tasks where
+-- hallucinated agent claims are particularly costly.
+--
+-- Note: this is separate from `include_all_memories` (which unions
+-- across personas for hive-mind reads). Reviewed-only operates on
+-- the status axis; include_all_memories operates on the persona-
+-- scope axis. They compose freely.
+
+ALTER TABLE personas ADD COLUMN reviewed_only_facts BOOLEAN NOT NULL DEFAULT 0;
@@ -0,0 +1,5 @@
+ALTER TABLE personas DROP COLUMN allow_agent_corrections;
+DROP INDEX IF EXISTS idx_entity_facts_last_modified_at;
+ALTER TABLE entity_facts DROP COLUMN last_modified_at;
+ALTER TABLE entity_facts DROP COLUMN last_modified_by_backend;
+ALTER TABLE entity_facts DROP COLUMN last_modified_by_model;
@@ -0,0 +1,30 @@
+-- Three coupled changes for agent self-correction safety:
+--
+-- 1. `entity_facts.last_modified_by_*` + `last_modified_at` track who
+--    most recently mutated each fact. `created_by_*` from migration
+--    2026-05-10-000300 records who first wrote the row; this records
+--    who last *changed* it. Separate columns so the create vs update
+--    audit is independently grep-able ("show me every fact gpt-5
+--    altered last week" stays a single index scan).
+--
+-- 2. `personas.allow_agent_corrections` is the gate for the new
+--    agent-side `update_fact` / `supersede_fact` tools. Default OFF —
+--    a fresh persona's agent can create but can't alter or replace.
+--    Operator opts in per-persona after the model has earned trust,
+--    typically via the strict-mode flow (curate, then ratchet up
+--    agent autonomy as confidence rises). Parallel in shape to
+--    `reviewed_only_facts` from 2026-05-10-000400; they compose.
+--
+-- 3. Index on `last_modified_at` (partial, NOT NULL) for the
+--    audit-feed reads in the curation UI ("show recent agent edits
+--    sorted newest first").
+
+ALTER TABLE entity_facts ADD COLUMN last_modified_by_model TEXT;
+ALTER TABLE entity_facts ADD COLUMN last_modified_by_backend TEXT;
+ALTER TABLE entity_facts ADD COLUMN last_modified_at BIGINT;
+
+CREATE INDEX idx_entity_facts_last_modified_at
+    ON entity_facts(last_modified_at)
+    WHERE last_modified_at IS NOT NULL;
+
+ALTER TABLE personas ADD COLUMN allow_agent_corrections BOOLEAN NOT NULL DEFAULT 0;
@@ -0,0 +1,6 @@
+-- Irreversible: we collapsed multiple raw entity_type strings to
+-- canonical forms and don't have a per-row record of the original.
+-- The down migration is intentionally a no-op (the rewritten values
+-- are still semantically correct), and the up migration is safe to
+-- re-run because every UPDATE is conditional on `!= canonical`.
+SELECT 1;
@@ -0,0 +1,43 @@
+-- Canonicalize `entities.entity_type` so legacy rows from before
+-- `normalize_entity_type` landed in upsert_entity stop polluting
+-- client-side filters. Mirrors the synonym map in
+-- `src/database/knowledge_dao.rs::normalize_entity_type`:
+--   person  ← person | people | human | individual | contact
+--   place   ← place  | location | venue | site | area | landmark
+--   event   ← event  | occasion | activity | celebration
+--   thing   ← thing  | object | item | product
+-- Types outside the synonym set (e.g. "friend", "family") are not
+-- recognized as canonical and get a lowercase+trim pass instead, so
+-- at minimum case variants collapse.
+--
+-- `UPDATE OR IGNORE` skips rows that would violate UNIQUE(name,
+-- entity_type) after the rewrite. Two rows like ("Sarah", "person")
+-- + ("Sarah", "Person") would otherwise collide — the duplicate
+-- survives unchanged so the curator can merge it via the curation
+-- UI rather than have the migration silently delete data.
+
+UPDATE OR IGNORE entities
+SET entity_type = 'person'
+WHERE LOWER(TRIM(entity_type)) IN ('person', 'people', 'human', 'individual', 'contact')
+  AND entity_type != 'person';
+
+UPDATE OR IGNORE entities
+SET entity_type = 'place'
+WHERE LOWER(TRIM(entity_type)) IN ('place', 'location', 'venue', 'site', 'area', 'landmark')
+  AND entity_type != 'place';
+
+UPDATE OR IGNORE entities
+SET entity_type = 'event'
+WHERE LOWER(TRIM(entity_type)) IN ('event', 'occasion', 'activity', 'celebration')
+  AND entity_type != 'event';
+
+UPDATE OR IGNORE entities
+SET entity_type = 'thing'
+WHERE LOWER(TRIM(entity_type)) IN ('thing', 'object', 'item', 'product')
+  AND entity_type != 'thing';
+
+-- Anything left ("Friend" vs "friend") gets a lowercase+trim sweep
+-- so at least case variants of the same custom type collapse.
+UPDATE OR IGNORE entities
+SET entity_type = LOWER(TRIM(entity_type))
+WHERE entity_type != LOWER(TRIM(entity_type));
@@ -0,0 +1,5 @@
+DROP INDEX IF EXISTS idx_image_exif_date_backfill;
+
+CREATE INDEX idx_image_exif_date_backfill
+    ON image_exif (library_id, id)
+    WHERE date_taken IS NULL OR date_taken_source = 'fs_time';
@@ -0,0 +1,18 @@
+-- Narrow the date-backfill partial index to NULL-only rows.
+--
+-- The original index (2026-05-06-000000_add_date_taken_source) also matched
+-- `date_taken_source = 'fs_time'` so the drain could "re-resolve weak
+-- entries when better tools become available." In practice the resolver
+-- is deterministic on file bytes + filename + fs metadata: a row that
+-- landed on fs_time once will land on fs_time again on every subsequent
+-- tick. With `ORDER BY id ASC LIMIT 500`, the drain spun on the same
+-- lowest-id fs_time rows in perpetuity, never advancing, while hammering
+-- the SQLite write lock once per row and starving other writers (face
+-- PATCHes were hitting busy_timeout and returning 500). Drop fs_time
+-- from the eligibility set; if exiftool / a new filename pattern ever
+-- comes online, a one-shot operator command can re-resolve.
+DROP INDEX IF EXISTS idx_image_exif_date_backfill;
+
+CREATE INDEX idx_image_exif_date_backfill
+    ON image_exif (library_id, id)
+    WHERE date_taken IS NULL;
@@ -0,0 +1,392 @@
+# Insight Chat improvements — design
+
+**Date:** 2026-05-07
+**Branch:** `feature/insight-chat-improvements` (in both `ImageApi/` and `FileViewer-React/`)
+**Scope:** ImageApi photo-anchored insight + chat surface, plus the
+FileViewer-React client. Apollo's free/visit chat is **not** in this cycle.
+
+## Problem
+
+Three concrete gaps in today's insight + chat surface:
+
+1. **Tool drift.** ImageApi exposes 13 tools to the LLM. Some are gated on
+   `apollo_enabled` / `has_vision`, but several optional ones
+   (`search_rag`, `get_calendar_events`, `get_location_history`) are
+   registered unconditionally even when their backing tables are empty.
+   Descriptions vary in quality and a couple have outright bugs.
+2. **Inconsistent / incomplete tool descriptions.** Tools like
+   `search_messages` describe their selection rules but omit useful
+   examples; `store_fact` doesn't show the `object_entity_id` vs
+   `object_value` choice; `get_sms_messages` accepts a `days_radius`
+   parameter that the backing client silently ignores. The LLM is being
+   instructed against a slightly wrong reality.
+3. **System prompt fights the persona.** Today's generation prompt
+   prepends the user's `custom_system_prompt` and then immediately asserts
+   `"You are a personal photo memory assistant..."`. The user message
+   demands `"a detailed insight with a title and summary"`. Both
+   contradict whatever voice / shape / POV the persona just established.
+   On chat continuation the persona is baked into the stored transcript at
+   generation time and can't be changed without regenerating.
+
+## Goals
+
+- Tool catalog is **representative** — every tool registered for a turn is
+  backed by data the user actually has.
+- Tool descriptions are **concise but complete**, with examples for any
+  tool whose param choice has multiple modes or non-obvious interactions.
+- Persona / system prompt is **authoritative** for voice, length, and
+  shape — both at generation and during chat continuation.
+- Per-turn system prompt overrides on chat work without surprising
+  side-effects on the stored transcript outside `amend` mode.
+
+## Non-goals
+
+- Apollo backend / frontend changes. Separate cycle.
+- Refactoring the `generate_photo_title` post-hoc title flow. Already
+  takes `custom_system_prompt`.
+- Tool consolidation (e.g. merging `search_messages` + `get_sms_messages`).
+  Considered and deferred — keeps blast radius small.
+- Removing knowledge-memory tools (`recall_*` / `store_*`). Audit
+  confirmed they have a live read path via `knowledge.rs` HTTP routes.
+- Persisting persona changes to the stored transcript outside `amend`
+  mode. Deliberate — re-opens use the persona currently active in the
+  client, not a sticky historical setting.
+
+---
+
+## Design
+
+### A. System prompt — generation
+
+Today (`insight_generator.rs:3305–3326`):
+
+```
+[custom_system_prompt if any] +
+"You are a personal photo memory assistant helping to reconstruct..." +
+{owner_id_note} +
+{fewshot_block} +
+"IMPORTANT INSTRUCTIONS:
+1. You MUST call multiple tools...
+2. When calling get_sms_messages and search_rag...
+3. Use recall_facts_for_photo...
+...
+8. You have a hard budget of {max_iterations} iterations..."
+```
+
+The first concatenation is the bug: `custom` claims one identity, the
+next line asserts another.
+
+**New structure** — two named blocks, in order:
+
+```
+[Identity / voice / format block]    ← persona-controlled (or neutral default)
+[Procedural block]                   ← always identity-free
+```
+
+**Identity block:**
+- When `custom_system_prompt` is supplied: use that string verbatim, no
+  pre/append.
+- When not: a neutral default that doesn't fight a future persona.
+  Working text: `"You are reconstructing a memory from a photo. Use the
+  gathered context to write a thoughtful summary; you decide voice,
+  length, and shape."`
+
+**Procedural block** — identity-free, always emitted:
+
+```
+Tool-use guidance:
+- You have a budget of {max_iterations} tool-calling iterations.
+- Call tools to gather context BEFORE writing your final answer; don't
+  answer after one or two calls.
+- When calling get_sms_messages or search_rag, make at least one call
+  WITHOUT a contact filter — surrounding events matter even when a
+  contact is known.
+- Use recall_facts_for_photo + recall_entities to load any prior
+  knowledge about subjects in the photo.
+- When you identify people / places / events / things, use store_entity
+  + store_fact to grow the persistent memory.
+- A tool returning no results is informative; continue with the others.
+
+{owner_id_note if applicable}
+{fewshot_block if applicable}
+```
+
+Differences from today's "IMPORTANT INSTRUCTIONS" block: removed the
+"you are a personal photo memory assistant" framing and the explicit
+"at least 5 tool calls" floor (replaced with the softer "don't answer
+after one or two"). Few-shot stays — it's pattern-of-tool-use, not
+identity.
+
+### B. User message — generation
+
+Today (line 3357):
+
+```
+{visual_block}Please analyze this photo and gather any relevant context
+from the surrounding weeks.
+
+Photo file path: {file_path}
+Date taken: {date}
+{contact_info}
+{gps_info}
+{tags_info}
+
+Use the available tools to gather more context about this moment
+(messages, calendar events, location history, etc.), then write a
+detailed insight with a title and summary.
+```
+
+Problems: the trailing line bakes in output shape ("title and
+summary"), and the title from the resulting response is **discarded
+anyway** — `generate_photo_title` (line 3494) regenerates the title
+post-hoc from the summary. So the prompt is constraining voice for no
+data-model benefit.
+
+**New payload** — context-only, no output prescription:
+
+```
+{visual_block}Photo file path: {file_path}
+Date taken: {date}
+{contact_info}
+{gps_info}
+{tags_info}
+
+Gather context with the available tools, then respond.
+```
+
+The persona owns shape. If a user wants "title-then-paragraph" output,
+their persona prompt says so.
+
+### C. System prompt — chat continuation
+
+Add `system_prompt: Option<String>` to `ChatTurnRequest` (and to its
+HTTP wrapper `ChatTurnHttpRequest`). It carries through both the
+non-streaming `chat_turn` and the streaming `chat_turn_stream`.
+
+**Append mode (default, `amend=false`)** — ephemeral
+swap-and-restore, mirroring the existing `annotate_system_with_budget`
+pattern:
+
+1. Load stored transcript.
+2. If `system_prompt` is `Some(s)`:
+   - If first message is a `system` role: stash original content,
+     replace with `s`.
+   - Else: prepend a synthetic ephemeral system message with `s` (note
+     it's synthetic so the restore step pops it rather than rewriting).
+3. Run `annotate_system_with_budget` on top (existing per-turn budget
+   note appends to whatever's there now).
+4. Run the agentic loop.
+5. **Before persistence**, restore the original system content (or pop
+   the synthetic one). Run `restore_system_content` for the budget
+   annotation as today.
+6. Save.
+
+Result: the model sees the override; the stored transcript is
+unchanged outside the model's actual reply.
+
+**Amend mode (`amend=true`)**:
+
+- If `system_prompt` is supplied: the override stays in place during
+  the serialization for the new insight row. The new row's
+  `training_messages` system message is the override. `is_current=false`
+  flips on prior rows as today.
+- If not supplied: behaves as today (stored transcript's system message
+  carries forward unchanged).
+
+### D. FileViewer-React — client wiring
+
+`hooks/useInsightChat.tsx`:
+- `SendTurnOptions` gains `systemPromptOverride?: string | null`.
+- Inside `sendTurn`, before issuing the streaming POST:
+  1. Read the active persona's `systemPrompt` from AsyncStorage
+     (already loaded for generation flows — reuse the same accessor).
+  2. If a one-shot `systemPromptOverride` is set, append as a suffix
+     (`${persona}\n\n${override}`) so persona voice survives + override
+     tweaks the turn.
+  3. Include the resulting string as `system_prompt` on the request body.
+- No history-load change. The history endpoint still returns the stored
+  transcript.
+
+`components/InsightChatModal.tsx`:
+- Add a small "Style note" composer affordance — a one-shot text input
+  that, when filled, becomes the `systemPromptOverride` for the next
+  send. Cleared after send.
+- The existing persona chip continues to open `PersonaManagerModal`.
+
+`hooks/usePersonas.tsx` and the bundled defaults:
+- Built-in `assistant` and `journal` prompts get audited and rewritten
+  to **explicitly state voice / shape / length** — since the framework
+  no longer guarantees a default shape, the persona must.
+
+### E. Tool catalog — gating
+
+Widen `build_tool_definitions` from `(has_vision: bool, apollo_enabled:
+bool)` to a single `ToolGateOpts` struct:
+
+```rust
+pub struct ToolGateOpts {
+    pub has_vision: bool,
+    pub apollo_enabled: bool,
+    pub daily_summaries_present: bool,
+    pub calendar_present: bool,
+    pub location_history_present: bool,
+}
+```
+
+The chat / generation services compute the three new fields lazily per
+turn via `SELECT 1 FROM <table> LIMIT 1` (cheap; cached for the turn's
+duration). Lazy because operators import data after launch and we don't
+want to require a restart for the LLM to discover its new capabilities.
+
+Per-tool gating:
+
+| Tool | Existing gate | New gate |
+|---|---|---|
+| `describe_photo` | `has_vision` | unchanged |
+| `get_personal_place_at` | `apollo_enabled` | unchanged |
+| `get_calendar_events` | none | `calendar_present` |
+| `get_location_history` | none | `location_history_present` |
+| `search_rag` | none | `daily_summaries_present` |
+
+All other tools always-on. (`get_sms_messages` and `search_messages`
+fail informatively if SMS-API is unreachable; not worth a startup probe
+since intermittent failures are the same shape.)
+
+### F. Tool descriptions — convention
+
+Every description follows:
+
+1. One sentence: **what** + **when to call**.
+2. Param semantics worth knowing (units, ranges, mode behavior,
+   precedence).
+3. **Example invocation** for tools with multiple modes, optional bands,
+   or non-obvious parameter interactions.
+4. Cross-references when relevant: `prefer X when both apply`.
+
+Banned: all-caps section headers inside descriptions
+(`"CONTENT search"`, `"TIME-BASED fetch"`); persona-prescriptive language
+(`"you are a..."`); behavioral references to other tools by description
+rather than name.
+
+Tools getting examples: `search_messages`, `search_rag`, `store_fact`,
+`get_sms_messages`. Trivial tools (`get_current_datetime`,
+`reverse_geocode`, `get_file_tags`) skip the example.
+
+Sample (`search_messages`):
+
+> Search SMS/MMS message bodies. Modes: `fts5` (keyword + phrase + prefix
+> + AND/OR/NOT + NEAR proximity), `semantic` (embedding similarity,
+> requires generated embeddings), `hybrid` (RRF merge, recommended;
+> degrades to `fts5` when embeddings absent). Optional `start_ts` /
+> `end_ts` (real-UTC unix seconds) and `contact_id` filters. For pure
+> date / contact browsing without keywords, prefer `get_sms_messages`.
+>
+> Examples:
+> - `{query: "trader joe's"}` — phrase across all time.
+> - `{query: "dinner", contact_id: 42, start_ts: 1700000000, end_ts: 1700604800}`
+>   — keyword within a contact and a week.
+> - `{query: "NEAR(meeting work, 5)"}` — proximity search.
+
+### G. SMS tool fixes
+
+#### `get_sms_messages` — honor `days_radius`
+
+Today: `sms_client::fetch_messages_for_contact(contact, center_ts)`
+hardcodes `Duration::days(4)` (lines 31–37). The tool accepts
+`days_radius` and silently ignores it.
+
+**Fix:** widen the signature to
+`fetch_messages_for_contact(contact, center_ts, days_radius)`. Tool
+plumbs through. Default 4 retained for back-compat.
+
+#### `search_messages` — add date and contact_id filters
+
+Today: ImageApi's `search_messages` only forwards `query`, `mode`,
+`limit` to SMS-API.
+
+**Fix:** add `start_ts`, `end_ts`, `contact_id` parameters.
+- `contact_id` forwards directly to SMS-API
+  (`/api/messages/search/?contact_id=`).
+- `start_ts` / `end_ts` are not natively accepted by SMS-API's search
+  endpoint. Apply client-side post-filter on the response (Apollo's
+  pattern: `chat_tools.py:670–680`). Bump the SMS-API `limit` to a
+  larger fetch pool when a date filter is supplied so in-window matches
+  aren't lost to out-of-window FTS rank.
+
+---
+
+## Implementation sequencing
+
+Each step is independently mergeable.
+
+### ImageApi PRs
+
+1. **Split system-prompt assembly + neutralize user message.** Two
+   named blocks; user message context-only. Default identity string
+   added. Tests: golden snapshots of the resulting `system_content`
+   with and without `custom_system_prompt`.
+2. **`system_prompt` field on chat request + swap/restore + amend
+   persistence.** Mirrors `annotate_system_with_budget` pattern. Tests:
+   round-trip system content unchanged in append mode; persisted in
+   amend mode.
+3. **`fetch_messages_for_contact` honors `days_radius`.** Tool wires
+   the param through. Tests: window math at the client level.
+4. **`ToolGateOpts` + per-tool description rewrites.** Description
+   text changes are the bulk of the diff but no behavior change beyond
+   gating.
+
+### FileViewer-React PR
+
+5. **Chat hook sends `system_prompt`; modal gets style-note input;
+   built-in personas updated to specify shape.** The
+   `useInsightChat.sendTurn` call site picks up the persona and
+   includes it on every chat turn body. Style-note input is a one-shot
+   suffix.
+
+## Testing & verification
+
+**Automated:**
+- Unit (Rust): swap-and-restore round-trip preserves stored transcript.
+- Unit (Rust): amend mode persists override into new insight row.
+- Unit (Rust): `fetch_messages_for_contact(days_radius=N)` produces a
+  window of `2N` days centered on `center_ts`.
+- Unit (Rust): `build_tool_definitions(opts)` excludes gated tools when
+  the corresponding flag is false.
+
+**Manual:**
+- Run a chat turn against an existing insight without `system_prompt` →
+  output unchanged from baseline.
+- Same insight, with override → output reflects new voice.
+- Re-open chat → original baked persona still authoritative (override
+  was ephemeral).
+- Regenerate an insight with the journal persona → model's voice
+  matches journal style; no "memory assistant" framing leaks through.
+- Toggle data presence (delete a row from `calendar_events`) → tool
+  drops from the catalog on the next turn.
+
+## Risks
+
+- **Default identity wording matters.** A too-neutral default ("Use the
+  gathered context to write a summary") might produce flatter output
+  than today's "personal photo memory assistant" framing for users
+  who never set a persona. Mitigation: tune the default with a small
+  set of test photos before merging.
+- **Persona-suffix style notes can contradict persona voice.** A user
+  who picks `journal` (first person, warm) and adds the style note
+  "respond in bullet points" will get a tonal collision. Acceptable —
+  the user expressed a per-turn intent and we honor it. Document the
+  composition rule in the persona-manager UI.
+- **Lazy data-presence probes add a per-turn `SELECT 1`.** Negligible
+  on SQLite (sub-millisecond) but adds up across many turns. Cache the
+  result for the turn's duration; don't re-probe per-tool.
+
+## Open questions
+
+None blocking. Items deferred to a possible follow-up cycle:
+
+- Apollo parity for the same per-turn override pattern (already
+  present; just needs RN client wiring on the photo path which is
+  already proxy).
+- Tool consolidation (`search_messages` + `get_sms_messages` →
+  single `search_messages` with optional date filter, Apollo-style).
+  Considered and deferred — separate spec.
@@ -0,0 +1,110 @@
+//! Thin async HTTP client for Apollo's `/api/places/*` endpoints.
+//!
+//! Apollo (the personal location-history viewer at the sibling repo) owns
+//! user-defined Places: `name + lat/lon + radius_m + description (+ optional
+//! category)`. We consume them in two places:
+//!
+//! 1. Automatic enrichment in [`crate::ai::insight_generator`] — the always-on
+//!    path that folds the most-specific containing Place into the location
+//!    string fed to the LLM.
+//! 2. The agentic `get_personal_place_at` tool — lets the LLM ask "what
+//!    user-defined place contains this lat/lon" during chat continuation.
+//!
+//! Apollo does the haversine. This client is plumbing only — no geometry,
+//! no caching at the moment. If insight throughput ever makes per-photo
+//! HTTP latency a problem, swap to a small `Mutex<HashMap>` TTL cache here.
+//!
+//! Configured via `APOLLO_API_BASE_URL`. When unset, the client constructs
+//! to a no-op shell: every method returns empty / `None`, the enrichment
+//! path silently falls through to the legacy Nominatim-only output, and the
+//! tool registration in `insight_generator` reports "integration disabled."
+
+use anyhow::Result;
+use reqwest::Client;
+use serde::Deserialize;
+use std::time::Duration;
+
+// Public fields — `id`, `lat`, `lon` aren't read from the current tool
+// output but are part of the wire model and useful for future tool
+// extensions / debugging.
+#[allow(dead_code)]
+#[derive(Debug, Clone, Deserialize)]
+pub struct ApolloPlace {
+    pub id: i32,
+    pub name: String,
+    #[serde(default)]
+    pub description: String,
+    pub lat: f64,
+    pub lon: f64,
+    pub radius_m: i32,
+    #[serde(default)]
+    pub category: Option<String>,
+}
+
+#[derive(Deserialize)]
+struct PlacesResponse {
+    places: Vec<ApolloPlace>,
+}
+
+#[derive(Clone)]
+pub struct ApolloClient {
+    client: Client,
+    /// `None` means the integration is disabled — every method returns
+    /// empty so the rest of insight generation runs unchanged.
+    base_url: Option<String>,
+}
+
+impl ApolloClient {
+    pub fn new(base_url: Option<String>) -> Self {
+        // 5 s timeout: Apollo runs on the LAN. If it doesn't answer in
+        // five seconds, treat the call as failed and fall back to the
+        // legacy Nominatim path rather than block the whole insight.
+        let client = Client::builder()
+            .timeout(Duration::from_secs(5))
+            .build()
+            .expect("reqwest client build");
+        Self { client, base_url }
+    }
+
+    /// Convenience for callers that need to know whether to register the
+    /// `get_personal_place_at` tool (or to short-circuit enrichment).
+    pub fn is_enabled(&self) -> bool {
+        self.base_url.is_some()
+    }
+
+    /// Server-side haversine: returns places whose radius contains
+    /// (lat, lon), already sorted smallest-radius-first by Apollo. The
+    /// caller can take `[0]` for the most-specific match (matches
+    /// Apollo's `primaryPlaceFor` rule on the frontend, so the carousel
+    /// badge and the LLM prompt always agree).
+    pub async fn places_containing(&self, lat: f64, lon: f64) -> Vec<ApolloPlace> {
+        let Some(base) = self.base_url.as_deref() else {
+            return Vec::new();
+        };
+        match self.fetch_places_containing(base, lat, lon).await {
+            Ok(places) => places,
+            Err(err) => {
+                log::warn!("apollo_client: places_containing({lat:.4}, {lon:.4}) failed: {err}");
+                Vec::new()
+            }
+        }
+    }
+
+    async fn fetch_places_containing(
+        &self,
+        base: &str,
+        lat: f64,
+        lon: f64,
+    ) -> Result<Vec<ApolloPlace>> {
+        let url = format!("{}/api/places/contains", base.trim_end_matches('/'));
+        let resp = self
+            .client
+            .get(&url)
+            .query(&[("lat", lat), ("lon", lon)])
+            .send()
+            .await?
+            .error_for_status()?;
+        let body: PlacesResponse = resp.json().await?;
+        Ok(body.places)
+    }
+}
@@ -6,12 +6,83 @@ use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 use tokio::time::sleep;

-use crate::ai::{OllamaClient, SmsApiClient, SmsMessage};
+use crate::ai::{EMBEDDING_MODEL, OllamaClient, SmsApiClient, SmsMessage, user_display_name};
 use crate::database::{DailySummaryDao, InsertDailySummary};
 use crate::otel::global_tracer;

 /// Strip boilerplate prefixes and common phrases from summaries before embedding.
 /// This improves embedding diversity by removing structural similarity.
+/// Maximum number of messages passed to the summarizer for a single day.
+/// Tuned to avoid token overflow on typical chat models; shared between
+/// the production job and the test binary so they can't drift.
+pub const DAILY_SUMMARY_MESSAGE_LIMIT: usize = 300;
+
+/// System prompt used when generating daily conversation summaries.
+pub const DAILY_SUMMARY_SYSTEM_PROMPT: &str = "You are a conversation summarizer. Create clear, factual summaries with \
+     precise subject attribution AND extract distinctive keywords. Focus on \
+     specific, unique terms that differentiate this conversation from others.";
+
+/// Build the prompt for a single day's conversation summary. Shared by the
+/// production job and the test binary so prompt tweaks land in both places.
+/// Returns `(prompt, system_prompt)`.
+pub fn build_daily_summary_prompt(
+    contact: &str,
+    date: &NaiveDate,
+    messages: &[SmsMessage],
+) -> (String, &'static str) {
+    let user_name = user_display_name();
+    let messages_text: String = messages
+        .iter()
+        .take(DAILY_SUMMARY_MESSAGE_LIMIT)
+        .map(|m| {
+            if m.is_sent {
+                format!("{}: {}", user_name, m.body)
+            } else {
+                format!("{}: {}", m.contact, m.body)
+            }
+        })
+        .collect::<Vec<_>>()
+        .join("\n");
+
+    let prompt = format!(
+        r#"Summarize this day's conversation between {user_name} and {contact}.
+
+CRITICAL FORMAT RULES:
+- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
+- Do NOT repeat the date at the beginning
+- Start DIRECTLY with the content - begin with a person's name or action
+- Write in past tense, as if recording what happened
+
+NARRATIVE (4-8 sentences):
+- What specific topics, activities, or events were discussed?
+- What places, people, or organizations were mentioned?
+- What plans were made or decisions discussed?
+- Clearly distinguish between what {user_name} did versus what {contact} did
+
+KEYWORDS (comma-separated):
+5-10 specific keywords that capture this conversation's unique content:
+- Proper nouns (people, places, brands)
+- Specific activities ("drum corps audition" not just "music")
+- Distinctive terms that make this day unique
+
+Date: {month_day_year} ({weekday})
+Messages:
+{messages_text}
+
+YOUR RESPONSE (follow this format EXACTLY):
+Summary: [Start directly with content, NO preamble]
+
+Keywords: [specific, unique terms]"#,
+        user_name = user_name,
+        contact = contact,
+        month_day_year = date.format("%B %d, %Y"),
+        weekday = date.format("%A"),
+        messages_text = messages_text,
+    );
+
+    (prompt, DAILY_SUMMARY_SYSTEM_PROMPT)
+}
+
 pub fn strip_summary_boilerplate(summary: &str) -> String {
    let mut text = summary.trim().to_string();

@@ -290,65 +361,10 @@ async fn generate_and_store_daily_summary(
    span.set_attribute(KeyValue::new("contact", contact.to_string()));
    span.set_attribute(KeyValue::new("message_count", messages.len() as i64));

-    // Format messages for LLM
-    let messages_text: String = messages
-        .iter()
-        .take(200) // Limit to 200 messages per day to avoid token overflow
-        .map(|m| {
-            if m.is_sent {
-                format!("Me: {}", m.body)
-            } else {
-                format!("{}: {}", m.contact, m.body)
-            }
-        })
-        .collect::<Vec<_>>()
-        .join("\n");
-
-    let weekday = date.format("%A");
-
-    let prompt = format!(
-        r#"Summarize this day's conversation between me and {}.
-
-CRITICAL FORMAT RULES:
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
- Do NOT repeat the date at the beginning
- Start DIRECTLY with the content - begin with a person's name or action
- Write in past tense, as if recording what happened
-
-NARRATIVE (3-5 sentences):
- What specific topics, activities, or events were discussed?
- What places, people, or organizations were mentioned?
- What plans were made or decisions discussed?
- Clearly distinguish between what "I" did versus what {} did
-
-KEYWORDS (comma-separated):
-5-10 specific keywords that capture this conversation's unique content:
- Proper nouns (people, places, brands)
- Specific activities ("drum corps audition" not just "music")
- Distinctive terms that make this day unique
-
-Date: {} ({})
-Messages:
-{}
-
-YOUR RESPONSE (follow this format EXACTLY):
-Summary: [Start directly with content, NO preamble]
-
-Keywords: [specific, unique terms]"#,
-        contact,
-        contact,
-        date.format("%B %d, %Y"),
-        weekday,
-        messages_text
-    );
+    let (prompt, system_prompt) = build_daily_summary_prompt(contact, date, messages);

    // Generate summary with LLM
-    let summary = ollama
-        .generate(
-            &prompt,
-            Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
-        )
-        .await?;
+    let summary = ollama.generate(&prompt, Some(system_prompt)).await?;

    log::debug!(
        "Generated summary for {}: {}",
@@ -381,8 +397,7 @@ Keywords: [specific, unique terms]"#,
        message_count: messages.len() as i32,
        embedding,
        created_at: Utc::now().timestamp(),
-        // model_version: "nomic-embed-text:v1.5".to_string(),
-        model_version: "mxbai-embed-large:335m".to_string(),
+        model_version: EMBEDDING_MODEL.to_string(),
    };

    // Create context from current span for DB operation
@@ -0,0 +1,400 @@
+//! Thin async HTTP client for Apollo's `/api/internal/faces/*` endpoints.
+//!
+//! Apollo (the personal location-history viewer at the sibling repo) hosts the
+//! insightface inference service. This client is the ImageApi side of the
+//! contract — it shoves image bytes through `/detect` and returns boxes +
+//! 512-d ArcFace embeddings, plus a single-embedding `/embed` for the manual
+//! face-create flow.
+//!
+//! Mirrors `apollo_client.rs` shape: optional base URL (None = disabled, the
+//! file watcher and manual-create handlers no-op), reqwest client with a
+//! generous timeout because CPU inference on a backlog can take many seconds
+//! per photo.
+//!
+//! Configured via `APOLLO_FACE_API_BASE_URL`, falling back to
+//! `APOLLO_API_BASE_URL` when the dedicated var is unset (single-Apollo
+//! deploys are the common case). Both unset → `is_enabled()` returns false.
+//!
+//! Wire format: multipart/form-data with `file=<bytes>` and `meta=<json>`.
+//! `meta` carries `{content_hash, library_id, rel_path, orientation?,
+//! model_version?}` — useful for Apollo-side logging and idempotency, ignored
+//! by Apollo today but part of the stable wire contract so future versions
+//! can act on it without a client change.
+//!
+//! Error mapping (reflected in [`FaceDetectError`]):
+//! - 422 `decode_failed` → permanent: ImageApi marks `status='failed'` and
+//!   doesn't retry until manual rerun.
+//! - 200 with `faces:[]` → `status='no_faces'` marker row.
+//! - 503 `cuda_oom` / `engine_unavailable` → defer-and-retry: no marker
+//!   written.
+//! - Any other 5xx / network error → defer.
+
+use anyhow::{Context, Result};
+use base64::Engine;
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+#[derive(Debug, Clone, Serialize)]
+pub struct DetectMeta {
+    pub content_hash: String,
+    pub library_id: i32,
+    pub rel_path: String,
+    /// EXIF orientation int (1..8). Apollo applies `exif_transpose` on the
+    /// bytes before inference, so this is informational only — supply when
+    /// the bytes were extracted from a RAW preview that lost the tag.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub orientation: Option<i32>,
+    /// Echoed back in the response. ImageApi stores it in
+    /// `face_detections.model_version`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model_version: Option<String>,
+}
+
+// Wire shape for the bbox sub-object Apollo returns. Read by Phase 3's
+// file-watch hook; silence the dead-code lint until then.
+#[allow(dead_code)]
+#[derive(Debug, Clone, Deserialize)]
+pub struct DetectedBbox {
+    pub x: f32,
+    pub y: f32,
+    pub w: f32,
+    pub h: f32,
+}
+
+#[allow(dead_code)] // bbox consumed by Phase 3 file-watch hook
+#[derive(Debug, Clone, Deserialize)]
+pub struct DetectedFace {
+    pub bbox: DetectedBbox,
+    pub confidence: f32,
+    /// base64 of 2048 bytes (512×f32 LE). ImageApi stores the raw bytes
+    /// verbatim as a BLOB — see `decode_embedding` for the unpack.
+    pub embedding: String,
+}
+
+impl DetectedFace {
+    /// Decode the wire-format embedding back into raw bytes for storage.
+    /// Returns the 2048-byte little-endian f32 buffer or an error if the
+    /// base64 is malformed or the wrong length.
+    pub fn decode_embedding(&self) -> Result<Vec<u8>> {
+        let bytes = base64::engine::general_purpose::STANDARD
+            .decode(self.embedding.as_bytes())
+            .context("face embedding base64 decode")?;
+        if bytes.len() != 2048 {
+            anyhow::bail!(
+                "face embedding wrong size: got {} bytes, expected 2048",
+                bytes.len()
+            );
+        }
+        Ok(bytes)
+    }
+}
+
+#[allow(dead_code)] // duration_ms logged by Phase 3 file-watch hook
+#[derive(Debug, Clone, Deserialize)]
+pub struct DetectResponse {
+    pub model_version: String,
+    pub duration_ms: i64,
+    pub faces: Vec<DetectedFace>,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+#[allow(dead_code)] // Reported by Apollo; useful for future health-driven backoff
+pub struct FaceHealth {
+    pub loaded: bool,
+    pub providers: Vec<String>,
+    pub model_version: String,
+    pub det_size: i32,
+    #[serde(default)]
+    pub load_error: Option<String>,
+}
+
+/// Distinguishes permanent failures (don't retry) from transient ones
+/// (defer and retry on next scan tick). The file-watch hook keys its
+/// marker-row decision on this — a `Permanent` outcome writes
+/// `status='failed'`, a `Transient` outcome writes nothing so the next
+/// pass tries again.
+#[derive(Debug)]
+pub enum FaceDetectError {
+    /// Apollo refused the bytes for a reason that won't change on retry
+    /// (decode failure, zero-dim image). Mark `status='failed'`.
+    Permanent(anyhow::Error),
+    /// Apollo couldn't process this turn but might next time (CUDA OOM,
+    /// engine not loaded yet, network hiccup). Don't mark anything.
+    Transient(anyhow::Error),
+    /// Feature is disabled (no `APOLLO_FACE_API_BASE_URL`). Caller should
+    /// silently no-op — same shape as `apollo_client::is_enabled()` false.
+    Disabled,
+}
+
+impl std::fmt::Display for FaceDetectError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            FaceDetectError::Permanent(e) => write!(f, "permanent: {e}"),
+            FaceDetectError::Transient(e) => write!(f, "transient: {e}"),
+            FaceDetectError::Disabled => write!(f, "face client disabled"),
+        }
+    }
+}
+
+impl std::error::Error for FaceDetectError {}
+
+#[derive(Clone)]
+pub struct FaceClient {
+    client: Client,
+    /// `None` → disabled. Trim trailing slash at construction so url
+    /// building doesn't double up.
+    base_url: Option<String>,
+}
+
+impl FaceClient {
+    pub fn new(base_url: Option<String>) -> Self {
+        // 60 s timeout: CPU inference on a backlog can take many seconds
+        // per photo, especially the first call into a cold GPU. Apollo's
+        // bounded threadpool (1 worker on CUDA) means concurrent calls
+        // queue server-side; 60 s is enough headroom for a few items in
+        // the queue without surfacing a false transient.
+        let timeout_secs = std::env::var("FACE_DETECT_TIMEOUT_SEC")
+            .ok()
+            .and_then(|s| s.parse::<u64>().ok())
+            .unwrap_or(60);
+        let client = Client::builder()
+            .timeout(Duration::from_secs(timeout_secs))
+            .build()
+            .expect("reqwest client build");
+        Self {
+            client,
+            base_url: base_url.map(|u| u.trim_end_matches('/').to_string()),
+        }
+    }
+
+    pub fn is_enabled(&self) -> bool {
+        self.base_url.is_some()
+    }
+
+    /// Detect every face in `bytes`. ImageApi calls this from the file-watch
+    /// hook (Phase 3) and from the manual rerun handler. Empty `faces[]` in
+    /// the response is the no-faces signal — caller writes a marker row.
+    #[allow(dead_code)] // Phase 3 file-watch hook + rerun handler
+    pub async fn detect(
+        &self,
+        bytes: Vec<u8>,
+        meta: DetectMeta,
+    ) -> std::result::Result<DetectResponse, FaceDetectError> {
+        let Some(base) = self.base_url.as_deref() else {
+            return Err(FaceDetectError::Disabled);
+        };
+        let url = format!("{}/api/internal/faces/detect", base);
+        self.post_multipart(&url, bytes, &meta).await
+    }
+
+    /// Single-embedding endpoint for the manual face-create flow. Caller
+    /// crops the image to the user-drawn bbox and passes those bytes; we
+    /// run detection inside the crop and return the highest-confidence
+    /// face's embedding. Apollo returns 422 `no_face_in_crop` when the
+    /// box missed — surfaced here as `Permanent`.
+    pub async fn embed(
+        &self,
+        bytes: Vec<u8>,
+        meta: DetectMeta,
+    ) -> std::result::Result<DetectResponse, FaceDetectError> {
+        let Some(base) = self.base_url.as_deref() else {
+            return Err(FaceDetectError::Disabled);
+        };
+        let url = format!("{}/api/internal/faces/embed", base);
+        self.post_multipart(&url, bytes, &meta).await
+    }
+
+    /// Engine reachability + provider/model report. Used by ImageApi for a
+    /// startup sanity check; not on the hot path.
+    #[allow(dead_code)] // Phase 3 startup probe
+    pub async fn health(&self) -> Result<FaceHealth> {
+        let base = self.base_url.as_deref().context("face client disabled")?;
+        let url = format!("{}/api/internal/faces/health", base);
+        let resp = self.client.get(&url).send().await?.error_for_status()?;
+        let body: FaceHealth = resp.json().await?;
+        Ok(body)
+    }
+
+    async fn post_multipart(
+        &self,
+        url: &str,
+        bytes: Vec<u8>,
+        meta: &DetectMeta,
+    ) -> std::result::Result<DetectResponse, FaceDetectError> {
+        let meta_json = serde_json::to_string(meta)
+            .map_err(|e| FaceDetectError::Permanent(anyhow::anyhow!("meta serialize: {e}")))?;
+        let form = reqwest::multipart::Form::new()
+            .text("meta", meta_json)
+            .part(
+                "file",
+                reqwest::multipart::Part::bytes(bytes)
+                    .file_name(meta.rel_path.clone())
+                    .mime_str("application/octet-stream")
+                    .unwrap_or_else(|_| reqwest::multipart::Part::bytes(Vec::new())),
+            );
+
+        let resp = match self.client.post(url).multipart(form).send().await {
+            Ok(r) => r,
+            Err(e) if e.is_timeout() || e.is_connect() => {
+                return Err(FaceDetectError::Transient(anyhow::anyhow!(
+                    "face client network: {e}"
+                )));
+            }
+            Err(e) => {
+                return Err(FaceDetectError::Transient(anyhow::anyhow!(
+                    "face client request: {e}"
+                )));
+            }
+        };
+
+        let status = resp.status();
+        if status.is_success() {
+            let body: DetectResponse = resp.json().await.map_err(|e| {
+                FaceDetectError::Transient(anyhow::anyhow!("face response decode: {e}"))
+            })?;
+            return Ok(body);
+        }
+
+        let body_text = resp.text().await.unwrap_or_default();
+        Err(classify_error_response(status.as_u16(), &body_text))
+    }
+}
+
+/// Map an Apollo HTTP error response to a FaceDetectError. Pulled out as a
+/// pure function so the marker-row contract (422 → Permanent, 503 →
+/// Transient) is unit-testable without spinning up an HTTP server.
+fn classify_error_response(status: u16, body_text: &str) -> FaceDetectError {
+    // Apollo encodes its error class in the JSON body's `detail`. Try to
+    // parse it; fall back to status-only classification.
+    let detail_code = serde_json::from_str::<serde_json::Value>(body_text)
+        .ok()
+        .and_then(|v| {
+            // detail can be a string ("decode_failed") or an object
+            // ({"code": "cuda_oom", ...}) depending on the endpoint and
+            // Apollo's response shape — handle both.
+            v.get("detail")
+                .and_then(|d| d.as_str().map(str::to_string))
+                .or_else(|| {
+                    v.get("detail")
+                        .and_then(|d| d.get("code"))
+                        .and_then(|c| c.as_str())
+                        .map(str::to_string)
+                })
+        })
+        .unwrap_or_default();
+
+    if status == 422 {
+        return FaceDetectError::Permanent(anyhow::anyhow!(
+            "face detect 422 {}: {}",
+            detail_code,
+            body_text
+        ));
+    }
+    if status == 503 {
+        return FaceDetectError::Transient(anyhow::anyhow!(
+            "face detect 503 {}: {}",
+            detail_code,
+            body_text
+        ));
+    }
+    // Infra-level 4xx that an operator can fix without re-encoding the
+    // bytes: 408 (proxy timeout), 413 (request too large — reverse-proxy
+    // body cap), 429 (rate limit). Treating these as Permanent poisons
+    // every photo that hit the misconfig with `status='failed'` and
+    // requires a manual DELETE to recover. Defer instead so the next
+    // scan tick retries naturally once the proxy is fixed.
+    if matches!(status, 408 | 413 | 429) {
+        return FaceDetectError::Transient(anyhow::anyhow!(
+            "face detect {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ));
+    }
+    // Any other 4xx: be conservative and treat as Permanent so we don't
+    // loop forever on a stable rejection. Any other 5xx: Transient —
+    // likely intermittent.
+    if (400..500).contains(&status) {
+        FaceDetectError::Permanent(anyhow::anyhow!(
+            "face detect {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ))
+    } else {
+        FaceDetectError::Transient(anyhow::anyhow!(
+            "face detect {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn is_permanent(e: &FaceDetectError) -> bool {
+        matches!(e, FaceDetectError::Permanent(_))
+    }
+    fn is_transient(e: &FaceDetectError) -> bool {
+        matches!(e, FaceDetectError::Transient(_))
+    }
+
+    #[test]
+    fn classify_422_decode_failed_is_permanent() {
+        // Permanent → ImageApi marks status='failed' and stops retrying.
+        let e = classify_error_response(422, r#"{"detail":"decode_failed: bad bytes"}"#);
+        assert!(is_permanent(&e), "422 decode_failed must be Permanent");
+        assert!(format!("{e}").contains("decode_failed"));
+    }
+
+    #[test]
+    fn classify_503_cuda_oom_is_transient() {
+        // Transient → ImageApi must NOT write a marker so the next scan
+        // retries. The detail.code is nested in an object rather than a
+        // bare string; the parser handles both.
+        let e = classify_error_response(
+            503,
+            r#"{"detail":{"code":"cuda_oom","error":"out of memory"}}"#,
+        );
+        assert!(is_transient(&e), "503 cuda_oom must be Transient");
+        assert!(format!("{e}").contains("cuda_oom"));
+    }
+
+    #[test]
+    fn classify_500_is_transient_other_4xx_is_permanent() {
+        // Conservative split: 5xx defers (intermittent), other 4xx
+        // is treated as a stable rejection so we don't loop forever.
+        assert!(is_transient(&classify_error_response(500, "")));
+        assert!(is_transient(&classify_error_response(502, "{}")));
+        assert!(is_permanent(&classify_error_response(400, "{}")));
+        assert!(is_permanent(&classify_error_response(404, "{}")));
+    }
+
+    #[test]
+    fn classify_infra_4xx_is_transient() {
+        // 408 / 413 / 429 are operator-fixable proxy/infra errors.
+        // Marking them Permanent poisons every affected photo with
+        // status='failed' and requires manual SQL to recover. The
+        // 413 path specifically bit us when nginx defaulted to a 1 MB
+        // body cap and rejected normal-size photos before they reached
+        // the backend.
+        assert!(is_transient(&classify_error_response(408, "")));
+        assert!(is_transient(&classify_error_response(
+            413,
+            "<html>nginx</html>"
+        )));
+        assert!(is_transient(&classify_error_response(429, "{}")));
+    }
+
+    #[test]
+    fn classify_handles_unparseable_body() {
+        // Apollo can return non-JSON on misroute / proxy errors; the
+        // classifier must still produce a useful variant.
+        let e = classify_error_response(503, "<html>nginx</html>");
+        assert!(is_transient(&e));
+    }
+}
@@ -3,12 +3,24 @@ use opentelemetry::KeyValue;
 use opentelemetry::trace::{Span, Status, Tracer};
 use serde::{Deserialize, Serialize};

+use crate::ai::insight_chat::{ChatStreamEvent, ChatTurnRequest};
+use crate::ai::ollama::ChatMessage;
 use crate::ai::{InsightGenerator, ModelCapabilities, OllamaClient};
 use crate::data::Claims;
-use crate::database::InsightDao;
+use crate::database::{ExifDao, InsightDao};
+use crate::libraries;
 use crate::otel::{extract_context_from_request, global_tracer};
+use crate::state::AppState;
 use crate::utils::normalize_path;

+/// Hardcoded few-shot exemplars for the agentic endpoint. Populate with the
+/// ids of approved insights whose `training_messages` should be compressed
+/// into trajectory form and injected into the system prompt. Empty = no
+/// change in behavior. Request-level `fewshot_insight_ids` overrides this
+/// when non-empty.
+// const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[2918, 2908];
+const DEFAULT_FEWSHOT_INSIGHT_IDS: &[i32] = &[];
+
 #[derive(Debug, Deserialize)]
 pub struct GeneratePhotoInsightRequest {
    pub file_path: String,
@@ -26,11 +38,30 @@ pub struct GeneratePhotoInsightRequest {
    pub top_k: Option<i32>,
    #[serde(default)]
    pub min_p: Option<f32>,
+    /// `"local"` (default, Ollama with images) | `"hybrid"` (local vision +
+    /// OpenRouter chat). Only respected by the agentic endpoint.
+    #[serde(default)]
+    pub backend: Option<String>,
+    /// Insight ids whose stored `training_messages` should be compressed
+    /// into few-shot trajectories and injected into the system prompt.
+    /// Silently truncated to the first 2. When absent/empty, the handler
+    /// falls back to `DEFAULT_FEWSHOT_INSIGHT_IDS`.
+    #[serde(default)]
+    pub fewshot_insight_ids: Option<Vec<i32>>,
+    /// Active persona id for this generation. New facts are tagged with
+    /// it (`entity_facts.persona_id`); recall during the agentic loop is
+    /// scoped to it. Defaults to `"default"` when absent.
+    #[serde(default)]
+    pub persona_id: Option<String>,
 }

 #[derive(Debug, Deserialize)]
 pub struct GetPhotoInsightQuery {
    pub path: String,
+    /// Library context for this lookup. Used to pick the right content
+    /// hash when the same rel_path exists under multiple roots.
+    #[serde(default)]
+    pub library: Option<String>,
 }

 #[derive(Debug, Deserialize)]
@@ -59,6 +90,10 @@ pub struct PhotoInsightResponse {
    pub eval_count: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub approved: Option<bool>,
+    pub backend: String,
+    /// True when the insight was generated agentically and a chat
+    /// continuation can be started against it. Drives the mobile chat button.
+    pub has_training_messages: bool,
 }

 #[derive(Debug, Serialize)]
@@ -146,15 +181,30 @@ pub async fn generate_insight_handler(
 pub async fn get_insight_handler(
    _claims: Claims,
    query: web::Query<GetPhotoInsightQuery>,
+    app_state: web::Data<AppState>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
+    exif_dao: web::Data<std::sync::Mutex<Box<dyn ExifDao>>>,
 ) -> impl Responder {
    let normalized_path = normalize_path(&query.path);
    log::debug!("Fetching insight for {}", normalized_path);

    let otel_context = opentelemetry::Context::new();
+
+    // Expand to rel_paths sharing content so an insight generated under
+    // library 1 still shows when the same photo is viewed from library 2.
+    let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
+        .ok()
+        .flatten()
+        .unwrap_or_else(|| app_state.primary_library());
+    let sibling_paths = {
+        let mut exif = exif_dao.lock().expect("Unable to lock ExifDao");
+        exif.get_rel_paths_sharing_content(&otel_context, library.id, &normalized_path)
+            .unwrap_or_else(|_| vec![normalized_path.clone()])
+    };
+
    let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");

-    match dao.get_insight(&otel_context, &normalized_path) {
+    match dao.get_insight_for_paths(&otel_context, &sibling_paths) {
        Ok(Some(insight)) => {
            let response = PhotoInsightResponse {
                id: insight.id,
@@ -166,6 +216,8 @@ pub async fn get_insight_handler(
                prompt_eval_count: None,
                eval_count: None,
                approved: insight.approved,
+                has_training_messages: insight.training_messages.is_some(),
+                backend: insight.backend,
            };
            HttpResponse::Ok().json(response)
        }
@@ -233,6 +285,8 @@ pub async fn get_all_insights_handler(
                    prompt_eval_count: None,
                    eval_count: None,
                    approved: insight.approved,
+                    has_training_messages: insight.training_messages.is_some(),
+                    backend: insight.backend,
                })
                .collect();

@@ -251,11 +305,14 @@ pub async fn get_all_insights_handler(
 #[post("/insights/generate/agentic")]
 pub async fn generate_agentic_insight_handler(
    http_request: HttpRequest,
-    _claims: Claims,
+    claims: Claims,
    request: web::Json<GeneratePhotoInsightRequest>,
    insight_generator: web::Data<InsightGenerator>,
    insight_dao: web::Data<std::sync::Mutex<Box<dyn InsightDao>>>,
 ) -> impl Responder {
+    // Service tokens (sub: "service:apollo") fall through to user_id=1
+    // — the operator convention. Mobile/web clients have a numeric sub.
+    let user_id = claims.sub.parse::<i32>().unwrap_or(1);
    let parent_context = extract_context_from_request(&http_request);
    let tracer = global_tracer();
    let mut span = tracer.start_with_context("http.insights.generate_agentic", &parent_context);
@@ -288,6 +345,52 @@ pub async fn generate_agentic_insight_handler(
        max_iterations
    );

+    if let Some(ref b) = request.backend {
+        span.set_attribute(KeyValue::new("backend", b.clone()));
+    }
+
+    // Resolve few-shot ids: request-provided ids take precedence when
+    // non-empty; otherwise fall back to the hardcoded defaults.
+    let fewshot_ids: Vec<i32> = match request.fewshot_insight_ids.as_deref() {
+        Some(ids) if !ids.is_empty() => ids.iter().take(2).copied().collect(),
+        _ => DEFAULT_FEWSHOT_INSIGHT_IDS
+            .iter()
+            .take(2)
+            .copied()
+            .collect(),
+    };
+    span.set_attribute(KeyValue::new("fewshot_count", fewshot_ids.len() as i64));
+
+    let fewshot_examples: Vec<Vec<ChatMessage>> = {
+        let otel_context = opentelemetry::Context::new();
+        let mut dao = insight_dao.lock().expect("Unable to lock InsightDao");
+        fewshot_ids
+            .iter()
+            .filter_map(|id| {
+                let insight = dao.get_insight_by_id(&otel_context, *id).ok().flatten()?;
+                let json = insight.training_messages?;
+                match serde_json::from_str::<Vec<ChatMessage>>(&json) {
+                    Ok(msgs) => Some(msgs),
+                    Err(e) => {
+                        log::warn!(
+                            "Few-shot insight {} has malformed training_messages: {}",
+                            id,
+                            e
+                        );
+                        None
+                    }
+                }
+            })
+            .collect()
+    };
+
+    let persona_id = request
+        .persona_id
+        .clone()
+        .filter(|s| !s.trim().is_empty())
+        .unwrap_or_else(|| "default".to_string());
+    span.set_attribute(KeyValue::new("persona_id", persona_id.clone()));
+
    let result = insight_generator
        .generate_agentic_insight_for_photo(
            &normalized_path,
@@ -299,6 +402,11 @@ pub async fn generate_agentic_insight_handler(
            request.top_k,
            request.min_p,
            max_iterations,
+            request.backend.clone(),
+            fewshot_examples,
+            fewshot_ids,
+            user_id,
+            persona_id,
        )
        .await;

@@ -320,6 +428,8 @@ pub async fn generate_agentic_insight_handler(
                        prompt_eval_count,
                        eval_count,
                        approved: insight.approved,
+                        has_training_messages: insight.training_messages.is_some(),
+                        backend: insight.backend,
                    };
                    HttpResponse::Ok().json(response)
                }
@@ -411,6 +521,34 @@ pub async fn get_available_models_handler(
    HttpResponse::Ok().json(response)
 }

+#[derive(Debug, Serialize)]
+pub struct OpenRouterModelsResponse {
+    pub models: Vec<String>,
+    pub default_model: Option<String>,
+    pub configured: bool,
+}
+
+/// GET /insights/openrouter/models - Curated OpenRouter model ids exposed
+/// to clients for the hybrid backend. Returned verbatim from
+/// `OPENROUTER_ALLOWED_MODELS`; no live call to OpenRouter.
+#[get("/insights/openrouter/models")]
+pub async fn get_openrouter_models_handler(
+    _claims: Claims,
+    app_state: web::Data<crate::state::AppState>,
+) -> impl Responder {
+    let configured = app_state.openrouter.is_some();
+    let default_model = app_state
+        .openrouter
+        .as_ref()
+        .map(|c| c.primary_model.clone());
+    let response = OpenRouterModelsResponse {
+        models: app_state.openrouter_allowed_models.clone(),
+        default_model,
+        configured,
+    };
+    HttpResponse::Ok().json(response)
+}
+
 /// POST /insights/rate - Rate an insight (thumbs up/down for training data)
 #[post("/insights/rate")]
 pub async fn rate_insight_handler(
@@ -482,7 +620,10 @@ pub async fn export_training_data_handler(

            HttpResponse::Ok()
                .content_type("application/jsonl")
-                .insert_header(("Content-Disposition", "attachment; filename=\"training_data.jsonl\""))
+                .insert_header((
+                    "Content-Disposition",
+                    "attachment; filename=\"training_data.jsonl\"",
+                ))
                .body(jsonl)
        }
        Err(e) => {
@@ -493,3 +634,411 @@ pub async fn export_training_data_handler(
        }
    }
 }
+
+#[derive(Debug, Deserialize)]
+pub struct ChatTurnHttpRequest {
+    pub file_path: String,
+    #[serde(default)]
+    pub library: Option<String>,
+    pub user_message: String,
+    #[serde(default)]
+    pub model: Option<String>,
+    #[serde(default)]
+    pub backend: Option<String>,
+    #[serde(default)]
+    pub num_ctx: Option<i32>,
+    #[serde(default)]
+    pub temperature: Option<f32>,
+    #[serde(default)]
+    pub top_p: Option<f32>,
+    #[serde(default)]
+    pub top_k: Option<i32>,
+    #[serde(default)]
+    pub min_p: Option<f32>,
+    #[serde(default)]
+    pub max_iterations: Option<usize>,
+    /// Per-turn system-prompt override. Ephemeral in append mode,
+    /// persisted in amend / regenerate mode. See ChatTurnRequest for
+    /// semantics. Also seeds the bootstrap path when no insight exists.
+    #[serde(default)]
+    pub system_prompt: Option<String>,
+    /// Active persona id for this turn. New facts/recalls scope to it.
+    /// Defaults to `"default"` when missing.
+    #[serde(default)]
+    pub persona_id: Option<String>,
+    #[serde(default)]
+    pub amend: bool,
+    /// When true, force the bootstrap path even if an insight already
+    /// exists: flip the existing row(s) to `is_current=false` and create
+    /// a new insight row from this turn. Takes precedence over `amend`.
+    /// Collapses to a normal bootstrap when no insight exists.
+    #[serde(default)]
+    pub regenerate: bool,
+}
+
+#[derive(Debug, Serialize)]
+pub struct ChatTurnHttpResponse {
+    pub assistant_message: String,
+    pub tool_calls_made: usize,
+    pub iterations_used: usize,
+    pub truncated: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt_eval_count: Option<i32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub eval_count: Option<i32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub amended_insight_id: Option<i32>,
+    pub backend: String,
+    pub model: String,
+}
+
+/// POST /insights/chat — submit a follow-up turn against an existing insight.
+#[post("/insights/chat")]
+pub async fn chat_turn_handler(
+    http_request: HttpRequest,
+    claims: Claims,
+    request: web::Json<ChatTurnHttpRequest>,
+    app_state: web::Data<AppState>,
+) -> impl Responder {
+    let parent_context = extract_context_from_request(&http_request);
+    let tracer = global_tracer();
+    let mut span = tracer.start_with_context("http.insights.chat", &parent_context);
+    span.set_attribute(KeyValue::new("file_path", request.file_path.clone()));
+
+    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
+        Ok(Some(lib)) => lib,
+        Ok(None) => app_state.primary_library(),
+        Err(e) => {
+            return HttpResponse::BadRequest().json(serde_json::json!({
+                "error": format!("invalid library: {}", e)
+            }));
+        }
+    };
+
+    // Service-token claims (sub: "service:apollo") fall through to
+    // user_id=1 — the operator convention. Mobile/web clients have a
+    // numeric sub. Required for the entity_facts composite FK.
+    let user_id = claims.sub.parse::<i32>().unwrap_or(1);
+
+    let chat_req = ChatTurnRequest {
+        library_id: library.id,
+        user_id,
+        file_path: request.file_path.clone(),
+        user_message: request.user_message.clone(),
+        model: request.model.clone(),
+        backend: request.backend.clone(),
+        num_ctx: request.num_ctx,
+        temperature: request.temperature,
+        top_p: request.top_p,
+        top_k: request.top_k,
+        min_p: request.min_p,
+        max_iterations: request.max_iterations,
+        system_prompt: request.system_prompt.clone(),
+        persona_id: request.persona_id.clone(),
+        amend: request.amend,
+        regenerate: request.regenerate,
+    };
+
+    match app_state.insight_chat.chat_turn(chat_req).await {
+        Ok(result) => {
+            span.set_status(Status::Ok);
+            HttpResponse::Ok().json(ChatTurnHttpResponse {
+                assistant_message: result.assistant_message,
+                tool_calls_made: result.tool_calls_made,
+                iterations_used: result.iterations_used,
+                truncated: result.truncated,
+                prompt_eval_count: result.prompt_eval_count,
+                eval_count: result.eval_count,
+                amended_insight_id: result.amended_insight_id,
+                backend: result.backend_used,
+                model: result.model_used,
+            })
+        }
+        Err(e) => {
+            let msg = format!("{}", e);
+            log::error!("Chat turn failed: {}", msg);
+            span.set_status(Status::error(msg.clone()));
+
+            // Map well-known errors to client-facing 4xx codes.
+            if msg.contains("no insight found") {
+                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
+            } else if msg.contains("no chat history") {
+                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
+            } else if msg.contains("user_message")
+                || msg.contains("unknown backend")
+                || msg.contains("switching from local to hybrid")
+                || msg.contains("hybrid backend unavailable")
+            {
+                HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
+            } else {
+                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
+            }
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+pub struct ChatHistoryQuery {
+    pub path: String,
+    #[serde(default)]
+    pub library: Option<String>,
+}
+
+#[derive(Debug, Serialize)]
+pub struct ChatHistoryHttpResponse {
+    pub messages: Vec<RenderedHistoryMessage>,
+    pub turn_count: usize,
+    pub model_version: String,
+    pub backend: String,
+}
+
+#[derive(Debug, Serialize)]
+pub struct RenderedHistoryMessage {
+    pub role: String,
+    pub content: String,
+    pub is_initial: bool,
+    #[serde(skip_serializing_if = "Vec::is_empty")]
+    pub tools: Vec<HistoryToolInvocation>,
+}
+
+#[derive(Debug, Serialize)]
+pub struct HistoryToolInvocation {
+    pub name: String,
+    pub arguments: serde_json::Value,
+    pub result: String,
+    #[serde(skip_serializing_if = "std::ops::Not::not")]
+    pub result_truncated: bool,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct ChatRewindHttpRequest {
+    pub file_path: String,
+    #[serde(default)]
+    pub library: Option<String>,
+    /// 0-based index into the rendered transcript. The message at this
+    /// index, and everything after it, is discarded. Must be > 0 — the
+    /// initial user message is protected.
+    pub discard_from_rendered_index: usize,
+}
+
+/// POST /insights/chat/rewind — truncate the stored conversation so the
+/// rendered message at `discard_from_rendered_index` (and everything after)
+/// is removed. Use when a user wants to retry a turn with a different
+/// prompt without prior replies poisoning context.
+#[post("/insights/chat/rewind")]
+pub async fn chat_rewind_handler(
+    _claims: Claims,
+    request: web::Json<ChatRewindHttpRequest>,
+    app_state: web::Data<AppState>,
+) -> impl Responder {
+    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
+        Ok(Some(lib)) => lib,
+        Ok(None) => app_state.primary_library(),
+        Err(e) => {
+            return HttpResponse::BadRequest().json(serde_json::json!({
+                "error": format!("invalid library: {}", e)
+            }));
+        }
+    };
+
+    match app_state
+        .insight_chat
+        .rewind_history(
+            library.id,
+            &request.file_path,
+            request.discard_from_rendered_index,
+        )
+        .await
+    {
+        Ok(()) => HttpResponse::Ok().json(serde_json::json!({ "success": true })),
+        Err(e) => {
+            let msg = format!("{}", e);
+            log::error!("Chat rewind failed: {}", msg);
+            if msg.contains("no insight found") {
+                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
+            } else if msg.contains("no chat history") {
+                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
+            } else if msg.contains("cannot discard the initial") || msg.contains("out of range") {
+                HttpResponse::BadRequest().json(serde_json::json!({ "error": msg }))
+            } else {
+                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
+            }
+        }
+    }
+}
+
+/// GET /insights/chat/history — return the rendered transcript for a photo.
+#[get("/insights/chat/history")]
+pub async fn chat_history_handler(
+    _claims: Claims,
+    query: web::Query<ChatHistoryQuery>,
+    app_state: web::Data<AppState>,
+) -> impl Responder {
+    // library_id scopes the lookup so a regenerate on this library
+    // isn't shadowed by an untouched is_current=true row in another
+    // library for the same rel_path. load_history falls back to the
+    // cross-library lookup when the scoped one misses, so a photo
+    // with no insight in this library but one in another still
+    // surfaces (the "show this photo's primary insight" merge case).
+    let library = libraries::resolve_library_param(&app_state, query.library.as_deref())
+        .ok()
+        .flatten()
+        .unwrap_or_else(|| app_state.primary_library());
+
+    match app_state.insight_chat.load_history(library.id, &query.path) {
+        Ok(view) => HttpResponse::Ok().json(ChatHistoryHttpResponse {
+            messages: view
+                .messages
+                .into_iter()
+                .map(|m| RenderedHistoryMessage {
+                    role: m.role,
+                    content: m.content,
+                    is_initial: m.is_initial,
+                    tools: m
+                        .tools
+                        .into_iter()
+                        .map(|t| HistoryToolInvocation {
+                            name: t.name,
+                            arguments: t.arguments,
+                            result: t.result,
+                            result_truncated: t.result_truncated,
+                        })
+                        .collect(),
+                })
+                .collect(),
+            turn_count: view.turn_count,
+            model_version: view.model_version,
+            backend: view.backend,
+        }),
+        Err(e) => {
+            let msg = format!("{}", e);
+            if msg.contains("no insight found") {
+                HttpResponse::NotFound().json(serde_json::json!({ "error": msg }))
+            } else if msg.contains("no chat history") {
+                HttpResponse::Conflict().json(serde_json::json!({ "error": msg }))
+            } else {
+                HttpResponse::InternalServerError().json(serde_json::json!({ "error": msg }))
+            }
+        }
+    }
+}
+
+/// POST /insights/chat/stream — streaming variant of /insights/chat.
+/// Returns `text/event-stream` with one event per chat stream event.
+#[post("/insights/chat/stream")]
+pub async fn chat_stream_handler(
+    claims: Claims,
+    request: web::Json<ChatTurnHttpRequest>,
+    app_state: web::Data<AppState>,
+) -> HttpResponse {
+    let library = match libraries::resolve_library_param(&app_state, request.library.as_deref()) {
+        Ok(Some(lib)) => lib,
+        Ok(None) => app_state.primary_library(),
+        Err(e) => {
+            return HttpResponse::BadRequest().json(serde_json::json!({
+                "error": format!("invalid library: {}", e)
+            }));
+        }
+    };
+
+    // Service-token sub falls through to user_id=1 (see chat_turn_handler).
+    let user_id = claims.sub.parse::<i32>().unwrap_or(1);
+
+    let chat_req = ChatTurnRequest {
+        library_id: library.id,
+        user_id,
+        file_path: request.file_path.clone(),
+        user_message: request.user_message.clone(),
+        model: request.model.clone(),
+        backend: request.backend.clone(),
+        num_ctx: request.num_ctx,
+        temperature: request.temperature,
+        top_p: request.top_p,
+        top_k: request.top_k,
+        min_p: request.min_p,
+        max_iterations: request.max_iterations,
+        system_prompt: request.system_prompt.clone(),
+        persona_id: request.persona_id.clone(),
+        amend: request.amend,
+        regenerate: request.regenerate,
+    };
+
+    let service = app_state.insight_chat.clone();
+    let events = service.chat_turn_stream(chat_req);
+
+    // Map ChatStreamEvent → SSE frame bytes.
+    let sse_stream = futures::stream::StreamExt::map(events, |ev| {
+        let frame = render_sse_frame(&ev);
+        Ok::<_, actix_web::Error>(actix_web::web::Bytes::from(frame))
+    });
+
+    HttpResponse::Ok()
+        .content_type("text/event-stream")
+        .insert_header(("Cache-Control", "no-cache"))
+        .insert_header(("X-Accel-Buffering", "no")) // nginx: disable response buffering
+        .streaming(sse_stream)
+}
+
+fn render_sse_frame(ev: &ChatStreamEvent) -> String {
+    let (event_name, payload) = match ev {
+        ChatStreamEvent::IterationStart { n, max } => {
+            ("iteration_start", serde_json::json!({ "n": n, "max": max }))
+        }
+        ChatStreamEvent::Truncated => ("truncated", serde_json::json!({})),
+        ChatStreamEvent::TextDelta(delta) => ("text", serde_json::json!({ "delta": delta })),
+        ChatStreamEvent::ToolCall {
+            index,
+            name,
+            arguments,
+        } => (
+            "tool_call",
+            serde_json::json!({ "index": index, "name": name, "arguments": arguments }),
+        ),
+        ChatStreamEvent::ToolResult {
+            index,
+            name,
+            result,
+            result_truncated,
+        } => (
+            "tool_result",
+            serde_json::json!({
+                "index": index,
+                "name": name,
+                "result": result,
+                "result_truncated": result_truncated,
+            }),
+        ),
+        ChatStreamEvent::Done {
+            tool_calls_made,
+            iterations_used,
+            truncated,
+            prompt_tokens,
+            eval_tokens,
+            num_ctx,
+            amended_insight_id,
+            backend_used,
+            model_used,
+        } => (
+            "done",
+            serde_json::json!({
+                "tool_calls_made": tool_calls_made,
+                "iterations_used": iterations_used,
+                "truncated": truncated,
+                "prompt_tokens": prompt_tokens,
+                "eval_tokens": eval_tokens,
+                "num_ctx": num_ctx,
+                "amended_insight_id": amended_insight_id,
+                "backend": backend_used,
+                "model": model_used,
+            }),
+        ),
+        // Apollo's frontend SSE consumer (and its free-chat backend, which
+        // is the de-facto convention) listens for `error_message`. Emitting
+        // `error` here meant any failure on the photo-chat path (e.g.
+        // "no insight found for path") was silently dropped, leaving an
+        // empty assistant bubble with no clue why the turn died.
+        ChatStreamEvent::Error(msg) => ("error_message", serde_json::json!({ "message": msg })),
+    };
+    let data = serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string());
+    format!("event: {}\ndata: {}\n\n", event_name, data)
+}
@@ -0,0 +1,172 @@
+use anyhow::Result;
+use async_trait::async_trait;
+use futures::stream::BoxStream;
+use serde::{Deserialize, Serialize};
+
+/// Provider-agnostic surface for LLM backends (Ollama, OpenRouter, …).
+///
+/// Impls translate these canonical shapes at the wire boundary: tool-call
+/// arguments stay as `serde_json::Value` in memory and are stringified only
+/// when a provider requires it (OpenAI-compatible APIs do), and `images`
+/// stays as base64 strings here and is rewritten into content-parts where
+/// needed.
+// First consumer lands in a later PR (OpenRouter impl + hybrid mode routing).
+#[allow(dead_code)]
+#[async_trait]
+pub trait LlmClient: Send + Sync {
+    /// Single-shot text generation. Optional system prompt and optional
+    /// base64 images (ignored by providers without vision support).
+    async fn generate(
+        &self,
+        prompt: &str,
+        system: Option<&str>,
+        images: Option<Vec<String>>,
+    ) -> Result<String>;
+
+    /// Multi-turn chat with tool definitions. Returns the assistant message
+    /// (which may contain tool_calls) plus optional prompt/eval token counts.
+    async fn chat_with_tools(
+        &self,
+        messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)>;
+
+    /// Streaming variant of `chat_with_tools`. The returned stream yields
+    /// `TextDelta` items as content is produced, then a single terminal
+    /// `Done` carrying the complete assembled message (with tool_calls, if
+    /// any) plus token usage counts. Implementations that can't stream may
+    /// fall back to calling `chat_with_tools` and emitting the full reply
+    /// as one `Done` event.
+    async fn chat_with_tools_stream(
+        &self,
+        messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+    ) -> Result<BoxStream<'static, Result<LlmStreamEvent>>>;
+
+    /// Batch embedding generation. Dimensionality is provider/model specific.
+    async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>>;
+
+    /// One-shot vision description of an image. Used to convert images into
+    /// plain text for the hybrid-mode conversation flow.
+    async fn describe_image(&self, image_base64: &str) -> Result<String>;
+
+    /// Enumerate available models with their capabilities.
+    async fn list_models(&self) -> Result<Vec<ModelCapabilities>>;
+
+    /// Look up capabilities for a single model.
+    async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities>;
+
+    /// Primary model identifier this client was constructed with.
+    fn primary_model(&self) -> &str;
+}
+
+/// Events emitted by streaming `chat_with_tools_stream`. A stream is a
+/// sequence of zero or more `TextDelta` events followed by exactly one
+/// `Done`. Callers should treat `Done` as terminal — further items (if any
+/// slip through due to upstream misbehavior) are safe to ignore.
+#[derive(Debug, Clone)]
+pub enum LlmStreamEvent {
+    /// Incremental content token(s) from the model. Concatenate in order to
+    /// reconstruct the assistant's final text.
+    TextDelta(String),
+    /// Terminal event with the full assembled message (content + any
+    /// tool_calls). `message.content` equals the concatenation of every
+    /// preceding `TextDelta.0`.
+    Done {
+        message: ChatMessage,
+        prompt_eval_count: Option<i32>,
+        eval_count: Option<i32>,
+    },
+}
+
+/// Tool definition sent to the model (OpenAI-compatible function schema).
+#[derive(Serialize, Clone, Debug)]
+pub struct Tool {
+    #[serde(rename = "type")]
+    pub tool_type: String, // always "function"
+    pub function: ToolFunction,
+}
+
+#[derive(Serialize, Clone, Debug)]
+pub struct ToolFunction {
+    pub name: String,
+    pub description: String,
+    pub parameters: serde_json::Value,
+}
+
+impl Tool {
+    pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
+        Self {
+            tool_type: "function".to_string(),
+            function: ToolFunction {
+                name: name.to_string(),
+                description: description.to_string(),
+                parameters,
+            },
+        }
+    }
+}
+
+/// A message in the chat conversation history.
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct ChatMessage {
+    pub role: String, // "system" | "user" | "assistant" | "tool"
+    /// Empty string (not null) when tool_calls is present — Ollama quirk.
+    #[serde(default)]
+    pub content: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_calls: Option<Vec<ToolCall>>,
+    /// Base64 images — only on user messages to vision-capable models.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub images: Option<Vec<String>>,
+}
+
+impl ChatMessage {
+    pub fn system(content: impl Into<String>) -> Self {
+        Self {
+            role: "system".to_string(),
+            content: content.into(),
+            tool_calls: None,
+            images: None,
+        }
+    }
+    pub fn user(content: impl Into<String>) -> Self {
+        Self {
+            role: "user".to_string(),
+            content: content.into(),
+            tool_calls: None,
+            images: None,
+        }
+    }
+    pub fn tool_result(content: impl Into<String>) -> Self {
+        Self {
+            role: "tool".to_string(),
+            content: content.into(),
+            tool_calls: None,
+            images: None,
+        }
+    }
+}
+
+/// Tool call returned by the model in an assistant message.
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct ToolCall {
+    pub function: ToolCallFunction,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct ToolCallFunction {
+    pub name: String,
+    /// Canonical shape: native JSON. Providers that use JSON-encoded-string
+    /// arguments (OpenAI-compatible) translate at their wire boundary.
+    pub arguments: serde_json::Value,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct ModelCapabilities {
+    pub name: String,
+    pub has_vision: bool,
+    pub has_tool_calling: bool,
+}
@@ -1,17 +1,40 @@
+pub mod apollo_client;
 pub mod daily_summary_job;
+pub mod face_client;
 pub mod handlers;
+pub mod insight_chat;
 pub mod insight_generator;
+pub mod llm_client;
 pub mod ollama;
+pub mod openrouter;
 pub mod sms_client;
+pub mod tag_client;

 // strip_summary_boilerplate is used by binaries (test_daily_summary), not the library
 #[allow(unused_imports)]
-pub use daily_summary_job::{generate_daily_summaries, strip_summary_boilerplate};
+pub use daily_summary_job::{
+    DAILY_SUMMARY_MESSAGE_LIMIT, DAILY_SUMMARY_SYSTEM_PROMPT, build_daily_summary_prompt,
+    generate_daily_summaries, strip_summary_boilerplate,
+};
 pub use handlers::{
+    chat_history_handler, chat_rewind_handler, chat_stream_handler, chat_turn_handler,
    delete_insight_handler, export_training_data_handler, generate_agentic_insight_handler,
    generate_insight_handler, get_all_insights_handler, get_available_models_handler,
-    get_insight_handler, rate_insight_handler,
+    get_insight_handler, get_openrouter_models_handler, rate_insight_handler,
 };
 pub use insight_generator::InsightGenerator;
-pub use ollama::{ModelCapabilities, OllamaClient};
+#[allow(unused_imports)]
+pub use llm_client::{
+    ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
+};
+pub use ollama::{EMBEDDING_MODEL, OllamaClient};
 pub use sms_client::{SmsApiClient, SmsMessage};
+
+/// Display name used for the user in message transcripts and first-person
+/// prompt text. Reads the `USER_NAME` env var; defaults to `"Me"`. Models
+/// often confuse `"Me:"` in a transcript with their own role — setting
+/// `USER_NAME=Cameron` (or similar) in the environment eliminates that
+/// ambiguity across daily summaries, insight generation, and chat.
+pub fn user_display_name() -> String {
+    std::env::var("USER_NAME").unwrap_or_else(|_| "Me".to_string())
+}
@@ -1,14 +1,43 @@
 use anyhow::{Context, Result};
+use async_trait::async_trait;
 use chrono::NaiveDate;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
+use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};

+use crate::ai::llm_client::{LlmClient, LlmStreamEvent};
+use futures::stream::{BoxStream, StreamExt};
+
+// Re-export shared types so existing `crate::ai::ollama::{...}` imports
+// continue to resolve.
+pub use crate::ai::llm_client::{ChatMessage, ModelCapabilities, Tool};
+#[allow(unused_imports)]
+pub use crate::ai::llm_client::{ToolCall, ToolCallFunction, ToolFunction};
+
 // Cache duration: 15 minutes
 const CACHE_DURATION_SECS: u64 = 15 * 60;

+/// Default total request timeout for generation calls, in seconds.
+/// Overridable via `OLLAMA_REQUEST_TIMEOUT_SECONDS` env var for slow
+/// CPU-offloaded models where inference can take several minutes.
+const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 120;
+
+fn configured_request_timeout_secs() -> u64 {
+    std::env::var("OLLAMA_REQUEST_TIMEOUT_SECONDS")
+        .ok()
+        .and_then(|v| v.parse::<u64>().ok())
+        .filter(|&s| s > 0)
+        .unwrap_or(DEFAULT_REQUEST_TIMEOUT_SECS)
+}
+
+/// Embedding model used across the app. Callers that persist a
+/// `model_version` alongside an embedding should read this constant so the
+/// stored label always matches what `generate_embeddings` actually ran.
+pub const EMBEDDING_MODEL: &str = "nomic-embed-text:v1.5";
+
 // Cached entry with timestamp
 #[derive(Clone)]
 struct CachedEntry<T> {
@@ -50,6 +79,12 @@ pub struct OllamaClient {
    top_p: Option<f32>,
    top_k: Option<i32>,
    min_p: Option<f32>,
+    /// Sticky preference shared across clones: when the fallback server
+    /// succeeded most recently, try it first on the next call. Avoids
+    /// re-probing the primary with a model it doesn't have loaded across
+    /// every iteration of the agent loop. `Arc<AtomicBool>` so cloning
+    /// `OllamaClient` shares the flag rather than resetting it.
+    prefer_fallback: Arc<AtomicBool>,
 }

 impl OllamaClient {
@@ -62,7 +97,7 @@ impl OllamaClient {
        Self {
            client: Client::builder()
                .connect_timeout(Duration::from_secs(5)) // Quick connection timeout
-                .timeout(Duration::from_secs(120)) // Total request timeout for generation
+                .timeout(Duration::from_secs(configured_request_timeout_secs()))
                .build()
                .unwrap_or_else(|_| Client::new()),
            primary_url,
@@ -74,9 +109,44 @@ impl OllamaClient {
            top_p: None,
            top_k: None,
            min_p: None,
+            prefer_fallback: Arc::new(AtomicBool::new(false)),
        }
    }

+    /// Return the server attempt order as `(label, url, model)` tuples.
+    /// Respects the sticky `prefer_fallback` flag so the most recently
+    /// successful server is tried first.
+    fn attempt_order(&self) -> Vec<(&'static str, String, String)> {
+        let primary = (
+            "primary",
+            self.primary_url.clone(),
+            self.primary_model.clone(),
+        );
+        let fallback = self.fallback_url.as_ref().map(|url| {
+            let model = self
+                .fallback_model
+                .clone()
+                .unwrap_or_else(|| self.primary_model.clone());
+            ("fallback", url.clone(), model)
+        });
+
+        let prefer_fallback = fallback.is_some() && self.prefer_fallback.load(Ordering::Relaxed);
+
+        let mut order = Vec::with_capacity(2);
+        if prefer_fallback {
+            if let Some(fb) = fallback.clone() {
+                order.push(fb);
+            }
+            order.push(primary);
+        } else {
+            order.push(primary);
+            if let Some(fb) = fallback {
+                order.push(fb);
+            }
+        }
+        order
+    }
+
    pub fn set_num_ctx(&mut self, num_ctx: Option<i32>) {
        self.num_ctx = num_ctx;
    }
@@ -120,6 +190,7 @@ impl OllamaClient {

    /// Replace the HTTP client with one using a custom request timeout.
    /// Useful for slow models where the default 120s may be insufficient.
+    #[allow(dead_code)]
    pub fn with_request_timeout(mut self, secs: u64) -> Self {
        self.client = Client::builder()
            .connect_timeout(Duration::from_secs(5))
@@ -174,6 +245,7 @@ impl OllamaClient {
    }

    /// Clear the model list cache for a specific URL or all URLs
+    #[allow(dead_code)]
    pub fn clear_model_cache(url: Option<&str>) {
        let mut cache = MODEL_LIST_CACHE.lock().unwrap();
        if let Some(url) = url {
@@ -186,6 +258,7 @@ impl OllamaClient {
    }

    /// Clear the model capabilities cache for a specific URL or all URLs
+    #[allow(dead_code)]
    pub fn clear_capabilities_cache(url: Option<&str>) {
        let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
        if let Some(url) = url {
@@ -308,6 +381,7 @@ impl OllamaClient {
        prompt: &str,
        system: Option<&str>,
        images: Option<Vec<String>>,
+        think: Option<bool>,
    ) -> Result<String> {
        let request = OllamaRequest {
            model: model.to_string(),
@@ -316,6 +390,7 @@ impl OllamaClient {
            system: system.map(|s| s.to_string()),
            options: self.build_options(),
            images,
+            think,
        };

        let response = self
@@ -336,6 +411,12 @@ impl OllamaClient {
        }

        let result: OllamaResponse = response.json().await?;
+        log_chat_metrics(
+            result.prompt_eval_count,
+            result.prompt_eval_duration,
+            result.eval_count,
+            result.eval_duration,
+        );
        Ok(result.response)
    }

@@ -343,11 +424,31 @@ impl OllamaClient {
        self.generate_with_images(prompt, system, None).await
    }

+    /// Variant of `generate` that sets Ollama's top-level `think: false`.
+    /// Used by latency-sensitive callers like the rerank pass, where the
+    /// task has nothing to reason about and chain-of-thought tokens are
+    /// wasted wall time. Server-side no-op on non-reasoning models.
+    pub async fn generate_no_think(&self, prompt: &str, system: Option<&str>) -> Result<String> {
+        self.generate_with_options(prompt, system, None, Some(false))
+            .await
+    }
+
    pub async fn generate_with_images(
        &self,
        prompt: &str,
        system: Option<&str>,
        images: Option<Vec<String>>,
+    ) -> Result<String> {
+        self.generate_with_options(prompt, system, images, None)
+            .await
+    }
+
+    async fn generate_with_options(
+        &self,
+        prompt: &str,
+        system: Option<&str>,
+        images: Option<Vec<String>>,
+        think: Option<bool>,
    ) -> Result<String> {
        log::debug!("=== Ollama Request ===");
        log::debug!("Primary model: {}", self.primary_model);
@@ -373,6 +474,7 @@ impl OllamaClient {
                prompt,
                system,
                images.clone(),
+                think,
            )
            .await;

@@ -396,7 +498,14 @@ impl OllamaClient {
                        fallback_model
                    );
                    match self
-                        .try_generate(fallback_url, fallback_model, prompt, system, images.clone())
+                        .try_generate(
+                            fallback_url,
+                            fallback_model,
+                            prompt,
+                            system,
+                            images.clone(),
+                            think,
+                        )
                        .await
                    {
                        Ok(response) => {
@@ -468,6 +577,7 @@ Capture the key moment or theme. Return ONLY the title, nothing else."#,
    ) -> Result<String> {
        let location_str = location.unwrap_or("Unknown");
        let sms_str = sms_summary.unwrap_or("No messages");
+        let user_name = crate::ai::user_display_name();

        let prompt = if image_base64.is_some() {
            if let Some(contact_name) = contact {
@@ -479,13 +589,14 @@ Location: {}
 Person/Contact: {}
 Messages: {}

-Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. Mention people's names (especially {}), places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
+Analyze the image and use specific details from both the visual content and the context above. The photo is from a folder for {}, so they are likely in or related to this photo. Mention people's names (especially {}), places, or activities if they appear in either the image or the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
                    date.format("%B %d, %Y"),
                    location_str,
                    contact_name,
                    sms_str,
                    contact_name,
-                    contact_name
+                    contact_name,
+                    user_name
                )
            } else {
                format!(
@@ -495,10 +606,11 @@ Date: {}
 Location: {}
 Messages: {}

-Analyze the image and use specific details from both the visual content and the context above. Mention people's names, places, or activities if they appear in either the image or the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
+Analyze the image and use specific details from both the visual content and the context above. Mention people's names, places, or activities if they appear in either the image or the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual based on what you see and know. If the location is unknown omit it"#,
                    date.format("%B %d, %Y"),
                    location_str,
-                    sms_str
+                    sms_str,
+                    user_name
                )
            }
        } else if let Some(contact_name) = contact {
@@ -510,13 +622,14 @@ Analyze the image and use specific details from both the visual content and the
        Person/Contact: {}
        Messages: {}

-        Use only the specific details provided above. The photo is from a folder for {}, so they are likely related to this moment. Mention people's names (especially {}), places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
+        Use only the specific details provided above. The photo is from a folder for {}, so they are likely related to this moment. Mention people's names (especially {}), places, or activities if they appear in the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
                date.format("%B %d, %Y"),
                location_str,
                contact_name,
                sms_str,
                contact_name,
-                contact_name
+                contact_name,
+                user_name
            )
        } else {
            format!(
@@ -526,10 +639,11 @@ Analyze the image and use specific details from both the visual content and the
        Location: {}
        Messages: {}

-        Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as Cameron with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
+        Use only the specific details provided above. Mention people's names, places, or activities if they appear in the context. Write in first person as {} with the tone of a journal entry. If limited information is available, keep it simple and factual. If the location is unknown omit it"#,
                date.format("%B %d, %Y"),
                location_str,
-                sms_str
+                sms_str,
+                user_name
            )
        };

@@ -558,68 +672,229 @@ Analyze the image and use specific details from both the visual content and the

    /// Send a chat request with tool definitions to /api/chat.
    /// Returns the assistant's response message (may contain tool_calls or final content).
-    /// Uses primary/fallback URL routing same as other generation methods.
+    /// Tries servers in preference order — most recently successful first —
+    /// so a fallback-only model doesn't re-404 against the primary on every
+    /// iteration of the agent loop.
    pub async fn chat_with_tools(
        &self,
        messages: Vec<ChatMessage>,
        tools: Vec<Tool>,
    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
-        // Try primary server first
-        log::info!(
-            "Attempting chat_with_tools with primary server: {} (model: {})",
-            self.primary_url,
-            self.primary_model
-        );
-        let primary_result = self
-            .try_chat_with_tools(&self.primary_url, messages.clone(), tools.clone())
-            .await;
-
-        match primary_result {
-            Ok(result) => {
-                log::info!("Successfully got chat_with_tools response from primary server");
-                Ok(result)
-            }
-            Err(e) => {
-                log::warn!("Primary server chat_with_tools failed: {}", e);
-
-                // Try fallback server if available
-                if let Some(fallback_url) = &self.fallback_url {
-                    let fallback_model =
-                        self.fallback_model.as_ref().unwrap_or(&self.primary_model);
+        let order = self.attempt_order();
+        let mut errors: Vec<String> = Vec::new();

+        for (label, url, model) in &order {
+            log::info!(
+                "Attempting chat_with_tools with {} server: {} (model: {})",
+                label,
+                url,
+                model
+            );
+            match self
+                .try_chat_with_tools(url, messages.clone(), tools.clone())
+                .await
+            {
+                Ok(result) => {
                    log::info!(
-                        "Attempting chat_with_tools with fallback server: {} (model: {})",
-                        fallback_url,
-                        fallback_model
+                        "Successfully got chat_with_tools response from {} server",
+                        label
                    );
-                    match self
-                        .try_chat_with_tools(fallback_url, messages, tools)
-                        .await
-                    {
-                        Ok(result) => {
-                            log::info!(
-                                "Successfully got chat_with_tools response from fallback server"
-                            );
-                            Ok(result)
-                        }
-                        Err(fallback_e) => {
-                            log::error!(
-                                "Fallback server chat_with_tools also failed: {}",
-                                fallback_e
-                            );
-                            Err(anyhow::anyhow!(
-                                "Both primary and fallback servers failed. Primary: {}, Fallback: {}",
-                                e,
-                                fallback_e
-                            ))
-                        }
-                    }
-                } else {
-                    log::error!("No fallback server configured");
-                    Err(e)
+                    self.prefer_fallback
+                        .store(*label == "fallback", Ordering::Relaxed);
+                    return Ok(result);
+                }
+                Err(e) => {
+                    log::warn!("{} server chat_with_tools failed: {}", label, e);
+                    errors.push(format!("{}: {}", label, e));
                }
            }
        }
+
+        if order.len() <= 1 {
+            log::error!("No fallback server configured; chat_with_tools exhausted");
+        } else {
+            log::error!(
+                "All {} servers failed for chat_with_tools ({})",
+                order.len(),
+                errors.join(" / ")
+            );
+        }
+        Err(anyhow::anyhow!(
+            "chat_with_tools failed on all servers: {}",
+            errors.join(" / ")
+        ))
+    }
+
+    /// Streaming variant of `chat_with_tools`. Tries primary, then falls
+    /// back if the initial connection fails; once the stream has begun
+    /// emitting, mid-stream errors propagate to the caller. Emits
+    /// `TextDelta` events as content tokens arrive and a single terminal
+    /// `Done` event when the model marks the turn complete (tool_calls, if
+    /// any, live on the final message).
+    pub async fn chat_with_tools_stream(
+        &self,
+        messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+    ) -> Result<BoxStream<'static, Result<LlmStreamEvent>>> {
+        // Same preference logic as `chat_with_tools`. Only the initial
+        // connection is retried across servers — once the stream begins,
+        // mid-stream errors propagate to the caller.
+        let order = self.attempt_order();
+        let mut last_err: Option<anyhow::Error> = None;
+
+        for (label, url, _model) in &order {
+            match self
+                .try_chat_with_tools_stream(url, messages.clone(), tools.clone())
+                .await
+            {
+                Ok(s) => {
+                    self.prefer_fallback
+                        .store(*label == "fallback", Ordering::Relaxed);
+                    return Ok(s);
+                }
+                Err(e) => {
+                    log::warn!("Streaming chat on {} server failed: {}", label, e);
+                    last_err = Some(e);
+                }
+            }
+        }
+
+        Err(last_err.unwrap_or_else(|| anyhow::anyhow!("No Ollama server configured")))
+    }
+
+    async fn try_chat_with_tools_stream(
+        &self,
+        base_url: &str,
+        messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+    ) -> Result<BoxStream<'static, Result<LlmStreamEvent>>> {
+        let url = format!("{}/api/chat", base_url);
+        let model = if base_url == self.primary_url {
+            &self.primary_model
+        } else {
+            self.fallback_model
+                .as_deref()
+                .unwrap_or(&self.primary_model)
+        };
+        let options = self.build_options();
+
+        let request_body = OllamaChatRequest {
+            model,
+            messages: &messages,
+            stream: true,
+            tools,
+            options,
+        };
+
+        let response = self
+            .client
+            .post(&url)
+            .json(&request_body)
+            .send()
+            .await
+            .with_context(|| format!("Failed to connect to Ollama at {}", url))?;
+
+        if !response.status().is_success() {
+            let status = response.status();
+            let body = response.text().await.unwrap_or_default();
+            anyhow::bail!(
+                "Ollama stream request failed with status {}: {}",
+                status,
+                body
+            );
+        }
+
+        // Ollama streams NDJSON: each line is a full `OllamaStreamChunk`.
+        // We buffer partial lines across chunks from the byte stream.
+        let byte_stream = response.bytes_stream();
+        let stream = async_stream::stream! {
+            let mut buf: Vec<u8> = Vec::new();
+            let mut accumulated = String::new();
+            let mut tool_calls: Option<Vec<crate::ai::llm_client::ToolCall>> = None;
+            let mut role = "assistant".to_string();
+            let mut prompt_eval_count: Option<i32> = None;
+            let mut eval_count: Option<i32> = None;
+            let mut prompt_eval_duration: Option<u64> = None;
+            let mut eval_duration: Option<u64> = None;
+            let mut done_seen = false;
+
+            let mut byte_stream = byte_stream;
+            while let Some(chunk) = byte_stream.next().await {
+                let chunk = match chunk {
+                    Ok(b) => b,
+                    Err(e) => {
+                        yield Err(anyhow::anyhow!("stream read failed: {}", e));
+                        return;
+                    }
+                };
+                buf.extend_from_slice(&chunk);
+
+                // Drain complete lines; hold any trailing partial.
+                while let Some(nl) = buf.iter().position(|b| *b == b'\n') {
+                    let line = buf.drain(..=nl).collect::<Vec<_>>();
+                    let line_str = match std::str::from_utf8(&line) {
+                        Ok(s) => s.trim(),
+                        Err(_) => continue,
+                    };
+                    if line_str.is_empty() {
+                        continue;
+                    }
+                    match serde_json::from_str::<OllamaStreamChunk>(line_str) {
+                        Ok(chunk) => {
+                            // Accumulate content delta.
+                            if !chunk.message.content.is_empty() {
+                                accumulated.push_str(&chunk.message.content);
+                                yield Ok(LlmStreamEvent::TextDelta(chunk.message.content));
+                            }
+                            if !chunk.message.role.is_empty() {
+                                role = chunk.message.role;
+                            }
+                            // Ollama only attaches tool_calls on the final chunk.
+                            if let Some(tcs) = chunk.message.tool_calls
+                                && !tcs.is_empty()
+                            {
+                                tool_calls = Some(tcs);
+                            }
+                            if chunk.done {
+                                prompt_eval_count = chunk.prompt_eval_count;
+                                eval_count = chunk.eval_count;
+                                prompt_eval_duration = chunk.prompt_eval_duration;
+                                eval_duration = chunk.eval_duration;
+                                done_seen = true;
+                                break;
+                            }
+                        }
+                        Err(e) => {
+                            log::warn!("malformed Ollama stream line: {} ({})", line_str, e);
+                        }
+                    }
+                }
+                if done_seen {
+                    break;
+                }
+            }
+
+            // Emit the terminal Done event with the assembled message.
+            log_chat_metrics(
+                prompt_eval_count,
+                prompt_eval_duration,
+                eval_count,
+                eval_duration,
+            );
+            let message = ChatMessage {
+                role,
+                content: accumulated,
+                tool_calls,
+                images: None,
+            };
+            yield Ok(LlmStreamEvent::Done {
+                message,
+                prompt_eval_count,
+                eval_count,
+            });
+        };
+
+        Ok(Box::pin(stream))
    }

    async fn try_chat_with_tools(
@@ -662,8 +937,12 @@ Analyze the image and use specific details from both the visual content and the
        if !response.status().is_success() {
            let status = response.status();
            let body = response.text().await.unwrap_or_default();
-            log::error!(
-                "chat_with_tools request body that caused {}: {}",
+            // warn, not error — the outer `chat_with_tools` may recover via
+            // the fallback server. When both fail, the outer layer emits the
+            // actual error log.
+            log::warn!(
+                "chat_with_tools request to {} got {}: {}",
+                base_url,
                status,
                request_json
            );
@@ -679,6 +958,17 @@ Analyze the image and use specific details from both the visual content and the
            .await
            .with_context(|| "Failed to parse Ollama chat response")?;

+        // Log performance counters returned by Ollama. Durations are
+        // reported in nanoseconds; we render ms + tokens/sec for skim-ability
+        // in the server log. Missing fields are left off the line rather
+        // than printed as `None`.
+        log_chat_metrics(
+            chat_response.prompt_eval_count,
+            chat_response.prompt_eval_duration,
+            chat_response.eval_count,
+            chat_response.eval_duration,
+        );
+
        Ok((
            chat_response.message,
            chat_response.prompt_eval_count,
@@ -700,7 +990,7 @@ Analyze the image and use specific details from both the visual content and the
    /// Returns a vector of 768-dimensional vectors
    /// This is much more efficient than calling generate_embedding multiple times
    pub async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
-        let embedding_model = "nomic-embed-text:v1.5";
+        let embedding_model = EMBEDDING_MODEL;

        log::debug!("=== Ollama Batch Embedding Request ===");
        log::debug!("Model: {}", embedding_model);
@@ -815,6 +1105,54 @@ Analyze the image and use specific details from both the visual content and the
    }
 }

+#[async_trait]
+impl LlmClient for OllamaClient {
+    async fn generate(
+        &self,
+        prompt: &str,
+        system: Option<&str>,
+        images: Option<Vec<String>>,
+    ) -> Result<String> {
+        self.generate_with_images(prompt, system, images).await
+    }
+
+    async fn chat_with_tools(
+        &self,
+        messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
+        OllamaClient::chat_with_tools(self, messages, tools).await
+    }
+
+    async fn chat_with_tools_stream(
+        &self,
+        messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+    ) -> Result<BoxStream<'static, Result<LlmStreamEvent>>> {
+        OllamaClient::chat_with_tools_stream(self, messages, tools).await
+    }
+
+    async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
+        OllamaClient::generate_embeddings(self, texts).await
+    }
+
+    async fn describe_image(&self, image_base64: &str) -> Result<String> {
+        self.generate_photo_description(image_base64).await
+    }
+
+    async fn list_models(&self) -> Result<Vec<ModelCapabilities>> {
+        Self::list_models_with_capabilities(&self.primary_url).await
+    }
+
+    async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities> {
+        Self::check_model_capabilities(&self.primary_url, model).await
+    }
+
+    fn primary_model(&self) -> &str {
+        &self.primary_model
+    }
+}
+
 #[derive(Serialize)]
 struct OllamaRequest {
    model: String,
@@ -826,6 +1164,12 @@ struct OllamaRequest {
    options: Option<OllamaOptions>,
    #[serde(skip_serializing_if = "Option::is_none")]
    images: Option<Vec<String>>,
+    /// Ollama's top-level reasoning-mode toggle (~0.4+). `Some(false)`
+    /// asks the server to skip thinking on models that expose a toggle
+    /// (Qwen3, Ollama-integrated DeepSeek-R1 distills, GPT-OSS, etc).
+    /// Ignored by non-reasoning models. None = use the model's default.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    think: Option<bool>,
 }

 #[derive(Serialize)]
@@ -842,90 +1186,6 @@ struct OllamaOptions {
    min_p: Option<f32>,
 }

-/// Tool definition sent in /api/chat requests (OpenAI-compatible format)
-#[derive(Serialize, Clone, Debug)]
-pub struct Tool {
-    #[serde(rename = "type")]
-    pub tool_type: String, // always "function"
-    pub function: ToolFunction,
-}
-
-#[derive(Serialize, Clone, Debug)]
-pub struct ToolFunction {
-    pub name: String,
-    pub description: String,
-    pub parameters: serde_json::Value,
-}
-
-impl Tool {
-    pub fn function(name: &str, description: &str, parameters: serde_json::Value) -> Self {
-        Self {
-            tool_type: "function".to_string(),
-            function: ToolFunction {
-                name: name.to_string(),
-                description: description.to_string(),
-                parameters,
-            },
-        }
-    }
-}
-
-/// A message in the chat conversation history
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ChatMessage {
-    pub role: String, // "system" | "user" | "assistant" | "tool"
-    /// Empty string (not null) when tool_calls is present — Ollama quirk
-    #[serde(default)]
-    pub content: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_calls: Option<Vec<ToolCall>>,
-    /// Base64 images — only on user messages to vision-capable models
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub images: Option<Vec<String>>,
-}
-
-impl ChatMessage {
-    pub fn system(content: impl Into<String>) -> Self {
-        Self {
-            role: "system".to_string(),
-            content: content.into(),
-            tool_calls: None,
-            images: None,
-        }
-    }
-    pub fn user(content: impl Into<String>) -> Self {
-        Self {
-            role: "user".to_string(),
-            content: content.into(),
-            tool_calls: None,
-            images: None,
-        }
-    }
-    pub fn tool_result(content: impl Into<String>) -> Self {
-        Self {
-            role: "tool".to_string(),
-            content: content.into(),
-            tool_calls: None,
-            images: None,
-        }
-    }
-}
-
-/// Tool call returned by the model in an assistant message
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ToolCall {
-    pub function: ToolCallFunction,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub id: Option<String>,
-}
-
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ToolCallFunction {
-    pub name: String,
-    /// Native JSON object (NOT a JSON-encoded string like OpenAI)
-    pub arguments: serde_json::Value,
-}
-
 #[derive(Serialize)]
 struct OllamaChatRequest<'a> {
    model: &'a str,
@@ -947,13 +1207,102 @@ struct OllamaChatResponse {
    done_reason: String,
    #[serde(default)]
    prompt_eval_count: Option<i32>,
+    /// Nanoseconds spent evaluating the prompt (context ingestion).
+    #[serde(default)]
+    prompt_eval_duration: Option<u64>,
    #[serde(default)]
    eval_count: Option<i32>,
+    /// Nanoseconds spent generating the response tokens.
+    #[serde(default)]
+    eval_duration: Option<u64>,
+}
+
+/// One chunk in the NDJSON stream from `/api/chat` with `stream: true`.
+/// Early chunks carry content deltas in `message.content`; the final chunk
+/// has `done: true`, optional `tool_calls`, and usage counters.
+#[derive(Deserialize, Debug)]
+struct OllamaStreamChunk {
+    #[serde(default)]
+    message: OllamaStreamMessage,
+    #[serde(default)]
+    done: bool,
+    #[serde(default)]
+    prompt_eval_count: Option<i32>,
+    #[serde(default)]
+    prompt_eval_duration: Option<u64>,
+    #[serde(default)]
+    eval_count: Option<i32>,
+    #[serde(default)]
+    eval_duration: Option<u64>,
+}
+
+#[derive(Deserialize, Debug, Default)]
+struct OllamaStreamMessage {
+    #[serde(default)]
+    role: String,
+    #[serde(default)]
+    content: String,
+    #[serde(default)]
+    tool_calls: Option<Vec<crate::ai::llm_client::ToolCall>>,
 }

 #[derive(Deserialize)]
 struct OllamaResponse {
    response: String,
+    #[serde(default)]
+    prompt_eval_count: Option<i32>,
+    #[serde(default)]
+    prompt_eval_duration: Option<u64>,
+    #[serde(default)]
+    eval_count: Option<i32>,
+    #[serde(default)]
+    eval_duration: Option<u64>,
+}
+
+fn log_chat_metrics(
+    prompt_eval_count: Option<i32>,
+    prompt_eval_duration_ns: Option<u64>,
+    eval_count: Option<i32>,
+    eval_duration_ns: Option<u64>,
+) {
+    // Compute tokens/sec when both count and duration are present.
+    fn tokens_per_sec(count: Option<i32>, duration_ns: Option<u64>) -> Option<f64> {
+        match (count, duration_ns) {
+            (Some(c), Some(d)) if c > 0 && d > 0 => Some((c as f64) * 1_000_000_000.0 / (d as f64)),
+            _ => None,
+        }
+    }
+    let prompt_ms = prompt_eval_duration_ns.map(|ns| ns as f64 / 1_000_000.0);
+    let eval_ms = eval_duration_ns.map(|ns| ns as f64 / 1_000_000.0);
+    let prompt_tps = tokens_per_sec(prompt_eval_count, prompt_eval_duration_ns);
+    let eval_tps = tokens_per_sec(eval_count, eval_duration_ns);
+
+    let mut parts: Vec<String> = Vec::new();
+    if let Some(c) = prompt_eval_count {
+        let mut s = format!("prompt={} tok", c);
+        if let Some(ms) = prompt_ms {
+            s.push_str(&format!(" ({:.0} ms", ms));
+            if let Some(tps) = prompt_tps {
+                s.push_str(&format!(", {:.1} tok/s", tps));
+            }
+            s.push(')');
+        }
+        parts.push(s);
+    }
+    if let Some(c) = eval_count {
+        let mut s = format!("gen={} tok", c);
+        if let Some(ms) = eval_ms {
+            s.push_str(&format!(" ({:.0} ms", ms));
+            if let Some(tps) = eval_tps {
+                s.push_str(&format!(", {:.1} tok/s", tps));
+            }
+            s.push(')');
+        }
+        parts.push(s);
+    }
+    if !parts.is_empty() {
+        log::info!("Ollama chat metrics — {}", parts.join(", "));
+    }
 }

 #[derive(Deserialize)]
@@ -972,13 +1321,6 @@ struct OllamaShowResponse {
    capabilities: Vec<String>,
 }

-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct ModelCapabilities {
-    pub name: String,
-    pub has_vision: bool,
-    pub has_tool_calling: bool,
-}
-
 #[derive(Serialize)]
 struct OllamaBatchEmbedRequest {
    model: String,
@@ -992,7 +1334,6 @@ struct OllamaEmbedResponse {

 #[cfg(test)]
 mod tests {
-    use super::*;

    #[test]
    fn generate_photo_description_prompt_is_concise() {
@@ -0,0 +1,998 @@
+// First consumer lands in a later PR (hybrid backend routing). Tests exercise
+// the translation helpers directly.
+#![allow(dead_code)]
+
+use anyhow::{Context, Result, anyhow, bail};
+use async_trait::async_trait;
+use reqwest::Client;
+use serde::Deserialize;
+use serde_json::{Value, json};
+use std::collections::HashMap;
+use std::sync::{Arc, Mutex};
+use std::time::{Duration, Instant};
+
+use crate::ai::llm_client::{
+    ChatMessage, LlmClient, LlmStreamEvent, ModelCapabilities, Tool, ToolCall, ToolCallFunction,
+};
+use futures::stream::{BoxStream, StreamExt};
+
+const DEFAULT_BASE_URL: &str = "https://openrouter.ai/api/v1";
+const DEFAULT_EMBEDDING_MODEL: &str = "openai/text-embedding-3-small";
+const CACHE_DURATION_SECS: u64 = 15 * 60;
+
+#[derive(Clone)]
+struct CachedEntry<T> {
+    data: T,
+    cached_at: Instant,
+}
+
+impl<T> CachedEntry<T> {
+    fn new(data: T) -> Self {
+        Self {
+            data,
+            cached_at: Instant::now(),
+        }
+    }
+
+    fn is_expired(&self) -> bool {
+        self.cached_at.elapsed().as_secs() > CACHE_DURATION_SECS
+    }
+}
+
+lazy_static::lazy_static! {
+    static ref MODEL_CAPABILITIES_CACHE: Arc<Mutex<HashMap<String, CachedEntry<Vec<ModelCapabilities>>>>> =
+        Arc::new(Mutex::new(HashMap::new()));
+}
+
+/// OpenAI-compatible client for OpenRouter (https://openrouter.ai).
+///
+/// Translates canonical `ChatMessage` / `Tool` shapes to OpenAI wire format:
+/// - Tool-call `arguments` serialized as JSON-encoded strings (vs Ollama's
+///   native JSON).
+/// - Image content rewritten into content-parts array with `image_url` entries.
+/// - `role=tool` messages attach a `tool_call_id` inferred from the preceding
+///   assistant turn's tool call.
+#[derive(Clone)]
+pub struct OpenRouterClient {
+    client: Client,
+    pub api_key: String,
+    pub base_url: String,
+    pub primary_model: String,
+    pub embedding_model: String,
+    num_ctx: Option<i32>,
+    temperature: Option<f32>,
+    top_p: Option<f32>,
+    top_k: Option<i32>,
+    min_p: Option<f32>,
+    /// Optional `HTTP-Referer` header OpenRouter uses for attribution.
+    pub referer: Option<String>,
+    /// Optional `X-Title` header OpenRouter uses for attribution.
+    pub app_title: Option<String>,
+}
+
+impl OpenRouterClient {
+    pub fn new(api_key: String, base_url: Option<String>, primary_model: String) -> Self {
+        Self {
+            client: Client::builder()
+                .connect_timeout(Duration::from_secs(10))
+                .timeout(Duration::from_secs(180))
+                .build()
+                .unwrap_or_else(|_| Client::new()),
+            api_key,
+            base_url: base_url.unwrap_or_else(|| DEFAULT_BASE_URL.to_string()),
+            primary_model,
+            embedding_model: DEFAULT_EMBEDDING_MODEL.to_string(),
+            num_ctx: None,
+            temperature: None,
+            top_p: None,
+            top_k: None,
+            min_p: None,
+            referer: None,
+            app_title: None,
+        }
+    }
+
+    pub fn set_embedding_model(&mut self, model: String) {
+        self.embedding_model = model;
+    }
+
+    #[allow(dead_code)]
+    pub fn set_num_ctx(&mut self, num_ctx: Option<i32>) {
+        self.num_ctx = num_ctx;
+    }
+
+    #[allow(dead_code)]
+    pub fn set_sampling_params(
+        &mut self,
+        temperature: Option<f32>,
+        top_p: Option<f32>,
+        top_k: Option<i32>,
+        min_p: Option<f32>,
+    ) {
+        self.temperature = temperature;
+        self.top_p = top_p;
+        self.top_k = top_k;
+        self.min_p = min_p;
+    }
+
+    pub fn set_attribution(&mut self, referer: Option<String>, app_title: Option<String>) {
+        self.referer = referer;
+        self.app_title = app_title;
+    }
+
+    fn authed(&self, builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
+        let mut b = builder.bearer_auth(&self.api_key);
+        if let Some(r) = &self.referer {
+            b = b.header("HTTP-Referer", r);
+        }
+        if let Some(t) = &self.app_title {
+            b = b.header("X-Title", t);
+        }
+        b
+    }
+
+    /// Translate canonical messages to the OpenAI-compatible wire shape.
+    ///
+    /// Walks in order so it can attach `tool_call_id` to `role=tool` messages
+    /// based on the most recent assistant turn's tool call.
+    fn messages_to_openai(messages: &[ChatMessage]) -> Vec<Value> {
+        let mut out = Vec::with_capacity(messages.len());
+        let mut last_tool_call_ids: Vec<String> = Vec::new();
+        let mut next_tool_result_idx: usize = 0;
+
+        for msg in messages {
+            let mut obj = serde_json::Map::new();
+            obj.insert("role".into(), Value::String(msg.role.clone()));
+
+            // Content: string OR content-parts array (when images present).
+            match &msg.images {
+                Some(images) if !images.is_empty() => {
+                    let mut parts: Vec<Value> = Vec::new();
+                    if !msg.content.is_empty() {
+                        parts.push(json!({"type": "text", "text": msg.content}));
+                    }
+                    for img in images {
+                        let url = image_to_data_url(img);
+                        parts.push(json!({
+                            "type": "image_url",
+                            "image_url": { "url": url }
+                        }));
+                    }
+                    obj.insert("content".into(), Value::Array(parts));
+                }
+                _ => {
+                    obj.insert("content".into(), Value::String(msg.content.clone()));
+                }
+            }
+
+            // Assistant message with tool_calls: stringify arguments, remember
+            // the ids so the subsequent tool messages can reference them.
+            if let Some(tcs) = &msg.tool_calls
+                && msg.role == "assistant"
+            {
+                let converted: Vec<Value> = tcs
+                    .iter()
+                    .enumerate()
+                    .map(|(i, call)| {
+                        let id = call.id.clone().unwrap_or_else(|| format!("call_{}", i));
+                        let args_str = serde_json::to_string(&call.function.arguments)
+                            .unwrap_or_else(|_| "{}".to_string());
+                        json!({
+                            "id": id,
+                            "type": "function",
+                            "function": {
+                                "name": call.function.name,
+                                "arguments": args_str,
+                            }
+                        })
+                    })
+                    .collect();
+                last_tool_call_ids = converted
+                    .iter()
+                    .filter_map(|v| v.get("id").and_then(|x| x.as_str()).map(String::from))
+                    .collect();
+                next_tool_result_idx = 0;
+                obj.insert("tool_calls".into(), Value::Array(converted));
+            }
+
+            // Tool result messages: attach tool_call_id from the last assistant turn.
+            if msg.role == "tool" {
+                let id = last_tool_call_ids
+                    .get(next_tool_result_idx)
+                    .cloned()
+                    .unwrap_or_else(|| "call_0".to_string());
+                obj.insert("tool_call_id".into(), Value::String(id));
+                next_tool_result_idx += 1;
+            }
+
+            out.push(Value::Object(obj));
+        }
+
+        out
+    }
+
+    /// Parse an OpenAI-compatible assistant message back into canonical shape.
+    fn openai_message_to_chat(msg: &Value) -> Result<ChatMessage> {
+        let obj = msg
+            .as_object()
+            .ok_or_else(|| anyhow!("response message is not an object"))?;
+        let role = obj
+            .get("role")
+            .and_then(|v| v.as_str())
+            .unwrap_or("assistant")
+            .to_string();
+        let content = obj
+            .get("content")
+            .and_then(|v| v.as_str())
+            .unwrap_or("")
+            .to_string();
+
+        let tool_calls = if let Some(tcs) = obj.get("tool_calls").and_then(|v| v.as_array()) {
+            let mut parsed = Vec::with_capacity(tcs.len());
+            for tc in tcs {
+                let id = tc.get("id").and_then(|v| v.as_str()).map(String::from);
+                let function = tc
+                    .get("function")
+                    .ok_or_else(|| anyhow!("tool_call missing function field"))?;
+                let name = function
+                    .get("name")
+                    .and_then(|v| v.as_str())
+                    .unwrap_or_default()
+                    .to_string();
+                let args_value = match function.get("arguments") {
+                    // OpenAI-compat: stringified JSON.
+                    Some(Value::String(s)) => {
+                        serde_json::from_str::<Value>(s).unwrap_or_else(|_| json!({}))
+                    }
+                    // Some providers emit arguments as an object directly — accept both.
+                    Some(v @ Value::Object(_)) => v.clone(),
+                    _ => json!({}),
+                };
+                parsed.push(ToolCall {
+                    id,
+                    function: ToolCallFunction {
+                        name,
+                        arguments: args_value,
+                    },
+                });
+            }
+            Some(parsed)
+        } else {
+            None
+        };
+
+        Ok(ChatMessage {
+            role,
+            content,
+            tool_calls,
+            images: None,
+        })
+    }
+
+    fn build_options(&self) -> Vec<(&'static str, Value)> {
+        let mut v = Vec::new();
+        if let Some(t) = self.temperature {
+            v.push(("temperature", json!(t)));
+        }
+        if let Some(p) = self.top_p {
+            v.push(("top_p", json!(p)));
+        }
+        if let Some(k) = self.top_k {
+            v.push(("top_k", json!(k)));
+        }
+        if let Some(m) = self.min_p {
+            v.push(("min_p", json!(m)));
+        }
+        if let Some(c) = self.num_ctx {
+            // OpenAI uses max_tokens for generation bound; num_ctx isn't
+            // directly transferable. Skip rather than silently mis-map.
+            let _ = c;
+        }
+        v
+    }
+}
+
+#[async_trait]
+impl LlmClient for OpenRouterClient {
+    async fn generate(
+        &self,
+        prompt: &str,
+        system: Option<&str>,
+        images: Option<Vec<String>>,
+    ) -> Result<String> {
+        let mut messages: Vec<ChatMessage> = Vec::new();
+        if let Some(sys) = system {
+            messages.push(ChatMessage::system(sys));
+        }
+        let mut user = ChatMessage::user(prompt);
+        user.images = images;
+        messages.push(user);
+
+        let (reply, _, _) = self.chat_with_tools(messages, Vec::new()).await?;
+        Ok(reply.content)
+    }
+
+    async fn chat_with_tools(
+        &self,
+        messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+    ) -> Result<(ChatMessage, Option<i32>, Option<i32>)> {
+        let url = format!("{}/chat/completions", self.base_url);
+        let mut body = serde_json::Map::new();
+        body.insert("model".into(), Value::String(self.primary_model.clone()));
+        body.insert(
+            "messages".into(),
+            Value::Array(Self::messages_to_openai(&messages)),
+        );
+        body.insert("stream".into(), Value::Bool(false));
+        if !tools.is_empty() {
+            body.insert(
+                "tools".into(),
+                serde_json::to_value(&tools).context("serializing tools")?,
+            );
+        }
+        for (k, v) in self.build_options() {
+            body.insert(k.into(), v);
+        }
+
+        log::info!(
+            "OpenRouter chat_with_tools: model={} messages={} tools={}",
+            self.primary_model,
+            messages.len(),
+            tools.len()
+        );
+
+        let resp = self
+            .authed(self.client.post(&url))
+            .json(&Value::Object(body))
+            .send()
+            .await
+            .with_context(|| format!("POST {} failed", url))?;
+
+        if !resp.status().is_success() {
+            let status = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            bail!("OpenRouter chat request failed: {} — {}", status, body);
+        }
+
+        let parsed: Value = resp.json().await.context("parsing chat response")?;
+        let choice = parsed
+            .get("choices")
+            .and_then(|v| v.as_array())
+            .and_then(|a| a.first())
+            .ok_or_else(|| {
+                anyhow!(
+                    "response missing choices[0]: {}",
+                    extract_openrouter_error_detail(&parsed)
+                )
+            })?;
+        let msg = choice.get("message").ok_or_else(|| {
+            anyhow!(
+                "choices[0] missing message: {}",
+                extract_openrouter_error_detail(&parsed)
+            )
+        })?;
+        let chat_msg = Self::openai_message_to_chat(msg)?;
+
+        let usage = parsed.get("usage");
+        let prompt_tokens = usage
+            .and_then(|u| u.get("prompt_tokens"))
+            .and_then(|v| v.as_i64())
+            .map(|n| n as i32);
+        let completion_tokens = usage
+            .and_then(|u| u.get("completion_tokens"))
+            .and_then(|v| v.as_i64())
+            .map(|n| n as i32);
+
+        Ok((chat_msg, prompt_tokens, completion_tokens))
+    }
+
+    async fn chat_with_tools_stream(
+        &self,
+        messages: Vec<ChatMessage>,
+        tools: Vec<Tool>,
+    ) -> Result<BoxStream<'static, Result<LlmStreamEvent>>> {
+        let url = format!("{}/chat/completions", self.base_url);
+        let mut body = serde_json::Map::new();
+        body.insert("model".into(), Value::String(self.primary_model.clone()));
+        body.insert(
+            "messages".into(),
+            Value::Array(Self::messages_to_openai(&messages)),
+        );
+        body.insert("stream".into(), Value::Bool(true));
+        // Ask for usage data in the final chunk (OpenAI + OpenRouter
+        // both honor this options bag).
+        body.insert(
+            "stream_options".into(),
+            serde_json::json!({ "include_usage": true }),
+        );
+        if !tools.is_empty() {
+            body.insert(
+                "tools".into(),
+                serde_json::to_value(&tools).context("serializing tools")?,
+            );
+        }
+        for (k, v) in self.build_options() {
+            body.insert(k.into(), v);
+        }
+
+        let resp = self
+            .authed(self.client.post(&url))
+            .json(&Value::Object(body))
+            .send()
+            .await
+            .with_context(|| format!("POST {} failed", url))?;
+
+        if !resp.status().is_success() {
+            let status = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            bail!("OpenRouter stream request failed: {} — {}", status, body);
+        }
+
+        // OpenAI-compat SSE stream. Each event is `data: <json>\n\n`, with
+        // `data: [DONE]` signalling completion. Tool calls arrive as
+        // `delta.tool_calls[i]` chunks that must be concatenated by index.
+        let byte_stream = resp.bytes_stream();
+        let stream = async_stream::stream! {
+            let mut byte_stream = byte_stream;
+            let mut buf: Vec<u8> = Vec::new();
+            let mut accumulated_content = String::new();
+            // tool call state: index -> (id, name, args_string)
+            let mut tool_state: std::collections::BTreeMap<
+                usize,
+                (Option<String>, Option<String>, String),
+            > = std::collections::BTreeMap::new();
+            let mut role = "assistant".to_string();
+            let mut prompt_tokens: Option<i32> = None;
+            let mut completion_tokens: Option<i32> = None;
+            let mut done_seen = false;
+
+            while let Some(chunk) = byte_stream.next().await {
+                let chunk = match chunk {
+                    Ok(b) => b,
+                    Err(e) => {
+                        yield Err(anyhow!("stream read failed: {}", e));
+                        return;
+                    }
+                };
+                buf.extend_from_slice(&chunk);
+
+                // SSE frames are delimited by a blank line. Walk the buffer
+                // for "\n\n" markers; anything before them is a complete
+                // frame (possibly multi-line).
+                while let Some(sep) = find_double_newline(&buf) {
+                    let frame = buf.drain(..sep + 2).collect::<Vec<_>>();
+                    let frame_str = match std::str::from_utf8(&frame) {
+                        Ok(s) => s,
+                        Err(_) => continue,
+                    };
+                    // A frame is one or more lines; the payload is on data:
+                    // lines. Ignore comments and other fields.
+                    for line in frame_str.lines() {
+                        let line = line.trim_end_matches('\r');
+                        let payload = match line.strip_prefix("data: ") {
+                            Some(p) => p,
+                            None => continue,
+                        };
+                        if payload == "[DONE]" {
+                            done_seen = true;
+                            break;
+                        }
+                        let v: Value = match serde_json::from_str(payload) {
+                            Ok(v) => v,
+                            Err(e) => {
+                                log::warn!(
+                                    "malformed OpenRouter SSE frame: {} ({})",
+                                    payload,
+                                    e
+                                );
+                                continue;
+                            }
+                        };
+
+                        // Usage can arrive in a dedicated final frame with
+                        // empty choices.
+                        if let Some(usage) = v.get("usage") {
+                            prompt_tokens = usage
+                                .get("prompt_tokens")
+                                .and_then(|n| n.as_i64())
+                                .map(|n| n as i32);
+                            completion_tokens = usage
+                                .get("completion_tokens")
+                                .and_then(|n| n.as_i64())
+                                .map(|n| n as i32);
+                        }
+
+                        let Some(choices) = v.get("choices").and_then(|c| c.as_array())
+                        else {
+                            continue;
+                        };
+                        let Some(choice) = choices.first() else { continue };
+                        let delta = match choice.get("delta") {
+                            Some(d) => d,
+                            None => continue,
+                        };
+                        if let Some(r) = delta.get("role").and_then(|v| v.as_str()) {
+                            role = r.to_string();
+                        }
+                        if let Some(content) =
+                            delta.get("content").and_then(|v| v.as_str())
+                            && !content.is_empty()
+                        {
+                            accumulated_content.push_str(content);
+                            yield Ok(LlmStreamEvent::TextDelta(content.to_string()));
+                        }
+                        if let Some(tcs) = delta.get("tool_calls").and_then(|v| v.as_array()) {
+                            for tc_delta in tcs {
+                                let idx = tc_delta
+                                    .get("index")
+                                    .and_then(|n| n.as_u64())
+                                    .unwrap_or(0) as usize;
+                                let entry = tool_state
+                                    .entry(idx)
+                                    .or_insert((None, None, String::new()));
+                                if let Some(id) =
+                                    tc_delta.get("id").and_then(|v| v.as_str())
+                                {
+                                    entry.0 = Some(id.to_string());
+                                }
+                                if let Some(func) = tc_delta.get("function") {
+                                    if let Some(name) =
+                                        func.get("name").and_then(|v| v.as_str())
+                                    {
+                                        entry.1 = Some(name.to_string());
+                                    }
+                                    if let Some(args) =
+                                        func.get("arguments").and_then(|v| v.as_str())
+                                    {
+                                        entry.2.push_str(args);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    if done_seen {
+                        break;
+                    }
+                }
+                if done_seen {
+                    break;
+                }
+            }
+
+            // Finalize tool calls: parse accumulated argument strings.
+            let tool_calls: Option<Vec<ToolCall>> = if tool_state.is_empty() {
+                None
+            } else {
+                let mut v = Vec::with_capacity(tool_state.len());
+                for (_idx, (id, name, args)) in tool_state {
+                    let arguments: Value = if args.trim().is_empty() {
+                        Value::Object(Default::default())
+                    } else {
+                        serde_json::from_str(&args).unwrap_or_else(|_| {
+                            Value::Object(Default::default())
+                        })
+                    };
+                    v.push(ToolCall {
+                        id,
+                        function: ToolCallFunction {
+                            name: name.unwrap_or_default(),
+                            arguments,
+                        },
+                    });
+                }
+                Some(v)
+            };
+
+            let message = ChatMessage {
+                role,
+                content: accumulated_content,
+                tool_calls,
+                images: None,
+            };
+            yield Ok(LlmStreamEvent::Done {
+                message,
+                prompt_eval_count: prompt_tokens,
+                eval_count: completion_tokens,
+            });
+        };
+
+        Ok(Box::pin(stream))
+    }
+
+    async fn generate_embeddings(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
+        let url = format!("{}/embeddings", self.base_url);
+        let body = json!({
+            "model": self.embedding_model,
+            "input": texts,
+        });
+
+        let resp = self
+            .authed(self.client.post(&url))
+            .json(&body)
+            .send()
+            .await
+            .with_context(|| format!("POST {} failed", url))?;
+
+        if !resp.status().is_success() {
+            let status = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            bail!("OpenRouter embedding request failed: {} — {}", status, body);
+        }
+
+        #[derive(Deserialize)]
+        struct EmbedResponse {
+            data: Vec<EmbedItem>,
+        }
+        #[derive(Deserialize)]
+        struct EmbedItem {
+            embedding: Vec<f32>,
+        }
+
+        let parsed: EmbedResponse = resp.json().await.context("parsing embed response")?;
+        Ok(parsed.data.into_iter().map(|i| i.embedding).collect())
+    }
+
+    async fn describe_image(&self, image_base64: &str) -> Result<String> {
+        let prompt = "Briefly describe what you see in this image in 1-2 sentences. \
+                      Focus on the people, location, and activity.";
+        self.generate(
+            prompt,
+            Some("You are a scene description assistant. Be concise and factual."),
+            Some(vec![image_base64.to_string()]),
+        )
+        .await
+    }
+
+    async fn list_models(&self) -> Result<Vec<ModelCapabilities>> {
+        {
+            let cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
+            if let Some(entry) = cache.get(&self.base_url)
+                && !entry.is_expired()
+            {
+                return Ok(entry.data.clone());
+            }
+        }
+
+        let url = format!("{}/models", self.base_url);
+        let resp = self
+            .authed(self.client.get(&url))
+            .send()
+            .await
+            .with_context(|| format!("GET {} failed", url))?;
+
+        if !resp.status().is_success() {
+            let status = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            bail!("OpenRouter list_models failed: {} — {}", status, body);
+        }
+
+        let parsed: Value = resp.json().await.context("parsing models response")?;
+        let data = parsed
+            .get("data")
+            .and_then(|v| v.as_array())
+            .ok_or_else(|| anyhow!("models response missing data[]"))?;
+
+        let caps: Vec<ModelCapabilities> = data.iter().map(parse_model_capabilities).collect();
+
+        {
+            let mut cache = MODEL_CAPABILITIES_CACHE.lock().unwrap();
+            cache.insert(self.base_url.clone(), CachedEntry::new(caps.clone()));
+        }
+
+        Ok(caps)
+    }
+
+    async fn model_capabilities(&self, model: &str) -> Result<ModelCapabilities> {
+        let all = self.list_models().await?;
+        all.into_iter()
+            .find(|m| m.name == model)
+            .ok_or_else(|| anyhow!("model '{}' not found on OpenRouter", model))
+    }
+
+    fn primary_model(&self) -> &str {
+        &self.primary_model
+    }
+}
+
+/// Extract a diagnostic fragment from an OpenRouter response body that
+/// doesn't match the expected `{choices: [...]}` shape. OpenRouter will
+/// sometimes return 200 OK with `{"error": {"message": "...", "code": ...}}`
+/// when the upstream provider (Anthropic/OpenAI/Google/etc) errored out
+/// — rate limits, content moderation, model overload, provider timeout.
+/// Surface the structured error if present; otherwise fall back to a
+/// truncated raw-JSON view so the log line is actionable.
+fn extract_openrouter_error_detail(parsed: &Value) -> String {
+    if let Some(err) = parsed.get("error") {
+        let message = err
+            .get("message")
+            .and_then(|v| v.as_str())
+            .unwrap_or("(no message)");
+        let code = err
+            .get("code")
+            .map(|v| match v {
+                Value::String(s) => s.clone(),
+                other => other.to_string(),
+            })
+            .unwrap_or_else(|| "?".to_string());
+        let short_message: String = message.chars().take(240).collect();
+        return format!("error code={} message=\"{}\"", code, short_message);
+    }
+    let raw = parsed.to_string();
+    raw.chars().take(300).collect()
+}
+
+/// Find the byte offset of the first `\n\n` (end of an SSE frame) in `buf`.
+/// Returns the index of the first `\n` of the pair, so the full separator is
+/// `buf[idx..=idx+1]`. Also handles `\r\n\r\n` since some servers emit it.
+fn find_double_newline(buf: &[u8]) -> Option<usize> {
+    for i in 0..buf.len().saturating_sub(1) {
+        if buf[i] == b'\n' && buf[i + 1] == b'\n' {
+            return Some(i);
+        }
+        // \r\n\r\n: the second \n of this pattern is at i+2; flag at i so the
+        // drain call (which consumes ..sep+2) takes exactly the frame.
+        if i + 3 < buf.len()
+            && buf[i] == b'\r'
+            && buf[i + 1] == b'\n'
+            && buf[i + 2] == b'\r'
+            && buf[i + 3] == b'\n'
+        {
+            return Some(i + 1);
+        }
+    }
+    None
+}
+
+/// Build a `data:` URL if the provided string is raw base64, otherwise pass it through.
+fn image_to_data_url(img: &str) -> String {
+    if img.starts_with("data:") {
+        img.to_string()
+    } else {
+        format!("data:image/jpeg;base64,{}", img)
+    }
+}
+
+fn parse_model_capabilities(m: &Value) -> ModelCapabilities {
+    let name = m
+        .get("id")
+        .and_then(|v| v.as_str())
+        .unwrap_or_default()
+        .to_string();
+    let has_tool_calling = m
+        .get("supported_parameters")
+        .and_then(|v| v.as_array())
+        .map(|arr| arr.iter().any(|x| x.as_str() == Some("tools")))
+        .unwrap_or(false);
+    let has_vision = m
+        .get("architecture")
+        .and_then(|v| v.get("input_modalities"))
+        .and_then(|v| v.as_array())
+        .map(|arr| arr.iter().any(|x| x.as_str() == Some("image")))
+        .unwrap_or(false);
+    ModelCapabilities {
+        name,
+        has_vision,
+        has_tool_calling,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn tool_call_arguments_stringified_on_send() {
+        let mut msg = ChatMessage {
+            role: "assistant".into(),
+            content: String::new(),
+            tool_calls: Some(vec![ToolCall {
+                id: Some("call_abc".into()),
+                function: ToolCallFunction {
+                    name: "search_sms".into(),
+                    arguments: json!({"query": "hello", "limit": 5}),
+                },
+            }]),
+            images: None,
+        };
+        msg.tool_calls.as_mut().unwrap()[0].function.arguments =
+            json!({"query": "hello", "limit": 5});
+
+        let wire = OpenRouterClient::messages_to_openai(&[msg]);
+        let tcs = wire[0]
+            .get("tool_calls")
+            .and_then(|v| v.as_array())
+            .expect("tool_calls present");
+        let args = tcs[0]
+            .get("function")
+            .and_then(|f| f.get("arguments"))
+            .and_then(|a| a.as_str())
+            .expect("arguments stringified");
+        let parsed: Value = serde_json::from_str(args).unwrap();
+        assert_eq!(parsed["query"], "hello");
+        assert_eq!(parsed["limit"], 5);
+    }
+
+    #[test]
+    fn tool_call_arguments_parsed_on_receive() {
+        let response_msg = json!({
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{
+                "id": "call_xyz",
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "arguments": "{\"city\":\"Boston\",\"units\":\"celsius\"}"
+                }
+            }]
+        });
+
+        let parsed = OpenRouterClient::openai_message_to_chat(&response_msg).unwrap();
+        let tcs = parsed.tool_calls.unwrap();
+        assert_eq!(tcs.len(), 1);
+        assert_eq!(tcs[0].function.name, "get_weather");
+        assert_eq!(tcs[0].function.arguments["city"], "Boston");
+        assert_eq!(tcs[0].function.arguments["units"], "celsius");
+        assert_eq!(tcs[0].id.as_deref(), Some("call_xyz"));
+    }
+
+    #[test]
+    fn tool_call_arguments_accept_native_json_on_receive() {
+        // Some providers return arguments as an object directly; accept both.
+        let response_msg = json!({
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{
+                "id": "call_1",
+                "type": "function",
+                "function": {
+                    "name": "foo",
+                    "arguments": {"nested": {"k": 1}}
+                }
+            }]
+        });
+        let parsed = OpenRouterClient::openai_message_to_chat(&response_msg).unwrap();
+        let tc = &parsed.tool_calls.unwrap()[0];
+        assert_eq!(tc.function.arguments["nested"]["k"], 1);
+    }
+
+    #[test]
+    fn images_become_content_parts() {
+        let mut msg = ChatMessage::user("What is in this photo?");
+        msg.images = Some(vec!["BASE64DATA".into()]);
+
+        let wire = OpenRouterClient::messages_to_openai(&[msg]);
+        let content = wire[0].get("content").and_then(|v| v.as_array()).unwrap();
+        assert_eq!(content.len(), 2);
+        assert_eq!(content[0]["type"], "text");
+        assert_eq!(content[0]["text"], "What is in this photo?");
+        assert_eq!(content[1]["type"], "image_url");
+        assert_eq!(
+            content[1]["image_url"]["url"],
+            "data:image/jpeg;base64,BASE64DATA"
+        );
+    }
+
+    #[test]
+    fn data_url_images_pass_through_unchanged() {
+        let mut msg = ChatMessage::user("");
+        msg.images = Some(vec!["data:image/png;base64,ABCDEF".into()]);
+        let wire = OpenRouterClient::messages_to_openai(&[msg]);
+        let content = wire[0].get("content").and_then(|v| v.as_array()).unwrap();
+        // No text part when content is empty.
+        assert_eq!(content.len(), 1);
+        assert_eq!(
+            content[0]["image_url"]["url"],
+            "data:image/png;base64,ABCDEF"
+        );
+    }
+
+    #[test]
+    fn text_only_message_stays_string() {
+        let msg = ChatMessage::user("hello");
+        let wire = OpenRouterClient::messages_to_openai(&[msg]);
+        assert_eq!(wire[0]["content"], "hello");
+        assert!(wire[0]["content"].as_str().is_some());
+    }
+
+    #[test]
+    fn tool_result_inherits_tool_call_id_from_prior_assistant() {
+        let assistant = ChatMessage {
+            role: "assistant".into(),
+            content: String::new(),
+            tool_calls: Some(vec![ToolCall {
+                id: Some("call_42".into()),
+                function: ToolCallFunction {
+                    name: "lookup".into(),
+                    arguments: json!({}),
+                },
+            }]),
+            images: None,
+        };
+        let tool_result = ChatMessage::tool_result("found it");
+
+        let wire = OpenRouterClient::messages_to_openai(&[assistant, tool_result]);
+        assert_eq!(wire[1]["role"], "tool");
+        assert_eq!(wire[1]["tool_call_id"], "call_42");
+    }
+
+    #[test]
+    fn multiple_tool_results_map_to_sequential_call_ids() {
+        let assistant = ChatMessage {
+            role: "assistant".into(),
+            content: String::new(),
+            tool_calls: Some(vec![
+                ToolCall {
+                    id: Some("call_A".into()),
+                    function: ToolCallFunction {
+                        name: "a".into(),
+                        arguments: json!({}),
+                    },
+                },
+                ToolCall {
+                    id: Some("call_B".into()),
+                    function: ToolCallFunction {
+                        name: "b".into(),
+                        arguments: json!({}),
+                    },
+                },
+            ]),
+            images: None,
+        };
+        let r1 = ChatMessage::tool_result("a result");
+        let r2 = ChatMessage::tool_result("b result");
+
+        let wire = OpenRouterClient::messages_to_openai(&[assistant, r1, r2]);
+        assert_eq!(wire[1]["tool_call_id"], "call_A");
+        assert_eq!(wire[2]["tool_call_id"], "call_B");
+    }
+
+    #[test]
+    fn missing_tool_call_id_gets_synthetic_fallback() {
+        let assistant = ChatMessage {
+            role: "assistant".into(),
+            content: String::new(),
+            tool_calls: Some(vec![ToolCall {
+                id: None,
+                function: ToolCallFunction {
+                    name: "noid".into(),
+                    arguments: json!({}),
+                },
+            }]),
+            images: None,
+        };
+        let wire = OpenRouterClient::messages_to_openai(&[assistant]);
+        let tcs = wire[0]
+            .get("tool_calls")
+            .and_then(|v| v.as_array())
+            .unwrap();
+        assert_eq!(tcs[0]["id"], "call_0");
+    }
+
+    #[test]
+    fn parse_model_capabilities_extracts_tools_and_vision() {
+        let m = json!({
+            "id": "anthropic/claude-sonnet-4",
+            "supported_parameters": ["temperature", "top_p", "tools", "max_tokens"],
+            "architecture": {
+                "input_modalities": ["text", "image"]
+            }
+        });
+        let caps = parse_model_capabilities(&m);
+        assert_eq!(caps.name, "anthropic/claude-sonnet-4");
+        assert!(caps.has_tool_calling);
+        assert!(caps.has_vision);
+    }
+
+    #[test]
+    fn parse_model_capabilities_handles_missing_fields() {
+        let m = json!({
+            "id": "some/text-only-model"
+        });
+        let caps = parse_model_capabilities(&m);
+        assert_eq!(caps.name, "some/text-only-model");
+        assert!(!caps.has_tool_calling);
+        assert!(!caps.has_vision);
+    }
+}
@@ -20,31 +20,36 @@ impl SmsApiClient {
        }
    }

-    /// Fetch messages for a specific contact within ±4 days of the given timestamp
-    /// Falls back to all contacts if no messages found for the specific contact
-    /// Messages are sorted by proximity to the center timestamp
+    /// Compute a `[start, end]` unix-second window of `2 * radius_days`
+    /// centered on `center_ts`. `radius_days < 1` is clamped to 1 to avoid
+    /// degenerate zero-width windows.
+    pub(crate) fn window_for_radius(center_ts: i64, radius_days: i64) -> (i64, i64) {
+        let r = radius_days.max(1);
+        let span = r * 86400;
+        (center_ts - span, center_ts + span)
+    }
+
+    /// Fetch messages for a specific contact within ±`radius_days` of the
+    /// given timestamp. Falls back to all contacts when no messages found
+    /// for the named contact. Sorted by proximity to the center timestamp.
    pub async fn fetch_messages_for_contact(
        &self,
        contact: Option<&str>,
        center_timestamp: i64,
+        radius_days: i64,
    ) -> Result<Vec<SmsMessage>> {
-        use chrono::Duration;
+        let effective_radius = radius_days.max(1);
+        let (start_ts, end_ts) = Self::window_for_radius(center_timestamp, radius_days);

-        // Calculate ±4 days range around the center timestamp
        let center_dt = chrono::DateTime::from_timestamp(center_timestamp, 0)
            .ok_or_else(|| anyhow::anyhow!("Invalid timestamp"))?;

-        let start_dt = center_dt - Duration::days(4);
-        let end_dt = center_dt + Duration::days(4);
-
-        let start_ts = start_dt.timestamp();
-        let end_ts = end_dt.timestamp();
-
        // If contact specified, try fetching for that contact first
        if let Some(contact_name) = contact {
            log::info!(
-                "Fetching SMS for contact: {} (±4 days from {})",
+                "Fetching SMS for contact: {} (±{} days from {})",
                contact_name,
+                effective_radius,
                center_dt.format("%Y-%m-%d %H:%M:%S")
            );
            let messages = self
@@ -68,7 +73,8 @@ impl SmsApiClient {

        // Fallback to all contacts
        log::info!(
-            "Fetching all SMS messages (±4 days from {})",
+            "Fetching all SMS messages (±{} days from {})",
+            effective_radius,
            center_dt.format("%Y-%m-%d %H:%M:%S")
        );
        self.fetch_messages(start_ts, end_ts, None, Some(center_timestamp))
@@ -250,6 +256,67 @@ impl SmsApiClient {
            .collect())
    }

+    /// Search message bodies via the Django side's FTS5 / semantic / hybrid
+    /// endpoint. `params.mode` selects the ranking strategy:
+    ///   - "fts5"     keyword-only, supports phrase / prefix / boolean / NEAR
+    ///   - "semantic" embedding similarity
+    ///   - "hybrid"   both merged via reciprocal rank fusion (recommended)
+    ///
+    /// All of `contact_id`, `date_from` / `date_to` (unix seconds), `is_mms`,
+    /// `has_media`, and `offset` are pushed to SMS-API server-side so the
+    /// filtered+paginated result set is exact rather than a client-side
+    /// over-fetch.
+    pub async fn search_messages(
+        &self,
+        query: &str,
+        params: &SmsSearchParams<'_>,
+    ) -> Result<Vec<SmsSearchHit>> {
+        let mut url = format!(
+            "{}/api/messages/search/?q={}&mode={}&limit={}",
+            self.base_url,
+            urlencoding::encode(query),
+            urlencoding::encode(params.mode),
+            params.limit,
+        );
+        if let Some(cid) = params.contact_id {
+            url.push_str(&format!("&contact_id={}", cid));
+        }
+        if let Some(off) = params.offset {
+            url.push_str(&format!("&offset={}", off));
+        }
+        if let Some(from) = params.date_from {
+            url.push_str(&format!("&date_from={}", from));
+        }
+        if let Some(to) = params.date_to {
+            url.push_str(&format!("&date_to={}", to));
+        }
+        if let Some(is_mms) = params.is_mms {
+            url.push_str(&format!("&is_mms={}", is_mms));
+        }
+        if let Some(has_media) = params.has_media {
+            url.push_str(&format!("&has_media={}", has_media));
+        }
+
+        let mut request = self.client.get(&url);
+        if let Some(token) = &self.token {
+            request = request.header("Authorization", format!("Bearer {}", token));
+        }
+
+        let response = request.send().await?;
+        if !response.status().is_success() {
+            let status = response.status();
+            let body = response.text().await.unwrap_or_default();
+            return Err(anyhow::anyhow!(
+                "SMS search request failed: {} - {}",
+                status,
+                body
+            ));
+        }
+
+        let data: SmsSearchResponse = response.json().await?;
+        Ok(data.results)
+    }
+
    pub async fn summarize_context(
        &self,
        messages: &[SmsMessage],
@@ -260,12 +327,13 @@ impl SmsApiClient {
        }

        // Create prompt for Ollama with sender/receiver distinction
+        let user_name = crate::ai::user_display_name();
        let messages_text: String = messages
            .iter()
            .take(60) // Limit to avoid token overflow
            .map(|m| {
                if m.is_sent {
-                    format!("Me: {}", m.body)
+                    format!("{}: {}", user_name, m.body)
                } else {
                    format!("{}: {}", m.contact, m.body)
                }
@@ -314,3 +382,78 @@ struct SmsApiMessage {
    #[serde(rename = "type")]
    type_: i32,
 }
+
+#[derive(Debug, Clone, Deserialize)]
+pub struct SmsSearchHit {
+    #[allow(dead_code)]
+    pub message_id: i64,
+    pub contact_name: String,
+    #[allow(dead_code)]
+    pub contact_address: String,
+    pub body: String,
+    pub date: i64,
+    /// Message direction code: 1 = received, 2 = sent.
+    #[serde(rename = "type")]
+    pub type_: i32,
+    /// Present for semantic / hybrid modes; absent for fts5.
+    #[serde(default)]
+    pub similarity_score: Option<f32>,
+    /// SMS-API-generated excerpt around the match, wrapped in `<mark>` tags.
+    /// For MMS messages that only matched via attachment text / filename
+    /// (empty `body`), the snippet is the only meaningful preview.
+    #[serde(default)]
+    pub snippet: Option<String>,
+}
+
+/// Optional filter / paging knobs for [`SmsApiClient::search_messages`].
+/// All fields except `mode` and `limit` map 1:1 to the same-named SMS-API
+/// query params (added in the 2026-05 search-enhancements release).
+#[derive(Debug, Clone)]
+pub struct SmsSearchParams<'a> {
+    pub mode: &'a str,
+    pub limit: usize,
+    pub contact_id: Option<i64>,
+    /// Unix-seconds inclusive lower bound on `date`.
+    pub date_from: Option<i64>,
+    /// Unix-seconds inclusive upper bound on `date`.
+    pub date_to: Option<i64>,
+    /// `Some(true)` = MMS only, `Some(false)` = SMS only, `None` = both.
+    pub is_mms: Option<bool>,
+    /// `Some(true)` = only messages with image/video/audio attachments.
+    pub has_media: Option<bool>,
+    pub offset: Option<usize>,
+}
+
+#[derive(Deserialize)]
+struct SmsSearchResponse {
+    results: Vec<SmsSearchHit>,
+    #[allow(dead_code)]
+    #[serde(default)]
+    search_method: String,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn window_for_radius_produces_2n_day_span() {
+        let center: i64 = 1_700_000_000;
+        let (start, end) = SmsApiClient::window_for_radius(center, 7);
+        assert_eq!(end - start, 14 * 86400);
+        assert_eq!(start + 7 * 86400, center);
+        assert_eq!(end - 7 * 86400, center);
+    }
+
+    #[test]
+    fn window_for_radius_clamps_zero_to_one() {
+        let (start, end) = SmsApiClient::window_for_radius(100_000, 0);
+        assert_eq!(end - start, 2 * 86400);
+    }
+
+    #[test]
+    fn window_for_radius_clamps_negative_to_one() {
+        let (start, end) = SmsApiClient::window_for_radius(100_000, -7);
+        assert_eq!(end - start, 2 * 86400);
+    }
+}
@@ -0,0 +1,319 @@
+//! Thin async HTTP client for Apollo's `/api/internal/tags/*` endpoints.
+//!
+//! Apollo hosts the RAM++ auto-tag inference service alongside insightface.
+//! This client is the ImageApi side — shove image bytes through `/auto` and
+//! get back a list of `(name, confidence)` predictions over RAM++'s
+//! ~4585-tag vocabulary.
+//!
+//! Mirrors `face_client.rs` shape: optional base URL (None = disabled), one
+//! reqwest client with a generous timeout because GPU inference under a
+//! backlog can queue server-side (Apollo's threadpool is bounded to 1
+//! worker on CUDA).
+//!
+//! Configured via `APOLLO_TAG_API_BASE_URL`, falling back to
+//! `APOLLO_API_BASE_URL` when the dedicated var is unset (single-Apollo
+//! deploys are the common case). Both unset → `is_enabled()` returns false
+//! and the probe binary / future backlog drain no-op.
+//!
+//! Wire format: multipart/form-data with `file=<bytes>` and `meta=<json>`.
+//! `meta` carries `{content_hash, library_id, rel_path, threshold?}` —
+//! Apollo logs the path/lib for traceability and reads `threshold` to
+//! override the engine default for that call (the probe binary uses this
+//! to sweep without restarting Apollo).
+//!
+//! Error mapping (reflected in [`TagDetectError`]):
+//! - 422 `decode_failed` → permanent: ImageApi marks `status='failed'` and
+//!   doesn't retry until a manual rerun.
+//! - 200 with `tags:[]` → `status='no_tags'` marker (success-with-zero).
+//! - 503 `cuda_oom` / `engine_unavailable` → defer-and-retry: no marker
+//!   written.
+//! - Any other 5xx / network error → defer.
+
+use anyhow::{Context, Result};
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+#[derive(Debug, Clone, Serialize)]
+pub struct TagMeta {
+    pub content_hash: String,
+    pub library_id: i32,
+    pub rel_path: String,
+    /// Per-call threshold override. Apollo's engine default (0.68 for
+    /// ram_plus_swin_large_14m) is used when unset. The probe binary
+    /// uses this to sweep without restarting Apollo.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub threshold: Option<f32>,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+pub struct TagPrediction {
+    pub name: String,
+    pub confidence: f32,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+pub struct TagResponse {
+    pub model_version: String,
+    pub duration_ms: i64,
+    pub threshold: f32,
+    pub tags: Vec<TagPrediction>,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+#[allow(dead_code)] // Reported by Apollo; load_error consumed by future health probe
+pub struct TagHealth {
+    pub loaded: bool,
+    pub device: String,
+    pub model_version: String,
+    pub image_size: i32,
+    pub threshold: f32,
+    #[serde(default)]
+    pub load_error: Option<String>,
+}
+
+/// Distinguishes permanent failures (don't retry) from transient ones
+/// (defer and retry on next scan tick). Mirrors `FaceDetectError` so the
+/// future backlog drain can use the same marker-row decision tree.
+#[derive(Debug)]
+pub enum TagDetectError {
+    /// Apollo refused the bytes for a reason that won't change on retry
+    /// (decode failure, zero-dim image). Mark `status='failed'`.
+    Permanent(anyhow::Error),
+    /// Apollo couldn't process this turn but might next time (CUDA OOM,
+    /// engine not loaded yet, network hiccup). Don't mark anything.
+    Transient(anyhow::Error),
+    /// Feature is disabled (no APOLLO_TAG_API_BASE_URL / APOLLO_API_BASE_URL).
+    /// Caller should silently no-op.
+    Disabled,
+}
+
+impl std::fmt::Display for TagDetectError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            TagDetectError::Permanent(e) => write!(f, "permanent: {e}"),
+            TagDetectError::Transient(e) => write!(f, "transient: {e}"),
+            TagDetectError::Disabled => write!(f, "tag client disabled"),
+        }
+    }
+}
+
+impl std::error::Error for TagDetectError {}
+
+#[derive(Clone)]
+pub struct TagClient {
+    client: Client,
+    /// `None` → disabled. Trailing slash trimmed at construction so url
+    /// building doesn't double up.
+    base_url: Option<String>,
+}
+
+impl TagClient {
+    pub fn new(base_url: Option<String>) -> Self {
+        // 60 s timeout: GPU inference is fast (~50–150 ms on RTX-class
+        // hardware) but Apollo's 1-worker threadpool means a backlog drain
+        // queues server-side. 60 s is enough headroom for a small queue
+        // depth without surfacing a false transient.
+        let timeout_secs = std::env::var("TAG_DETECT_TIMEOUT_SEC")
+            .ok()
+            .and_then(|s| s.parse::<u64>().ok())
+            .unwrap_or(60);
+        let client = Client::builder()
+            .timeout(Duration::from_secs(timeout_secs))
+            .build()
+            .expect("reqwest client build");
+        Self {
+            client,
+            base_url: base_url.map(|u| u.trim_end_matches('/').to_string()),
+        }
+    }
+
+    /// Construct a client from the standard env vars. APOLLO_TAG_API_BASE_URL
+    /// wins; falls back to APOLLO_API_BASE_URL. Both unset → disabled.
+    pub fn from_env() -> Self {
+        let base = std::env::var("APOLLO_TAG_API_BASE_URL")
+            .ok()
+            .filter(|s| !s.trim().is_empty())
+            .or_else(|| {
+                std::env::var("APOLLO_API_BASE_URL")
+                    .ok()
+                    .filter(|s| !s.trim().is_empty())
+            });
+        Self::new(base)
+    }
+
+    pub fn is_enabled(&self) -> bool {
+        self.base_url.is_some()
+    }
+
+    /// Run RAM++ auto-tagging over `bytes`. Empty `tags[]` is the no-tags
+    /// signal — caller writes a marker row in the persistence phase.
+    pub async fn auto_tag(
+        &self,
+        bytes: Vec<u8>,
+        meta: TagMeta,
+    ) -> std::result::Result<TagResponse, TagDetectError> {
+        let Some(base) = self.base_url.as_deref() else {
+            return Err(TagDetectError::Disabled);
+        };
+        let url = format!("{}/api/internal/tags/auto", base);
+        self.post_multipart(&url, bytes, &meta).await
+    }
+
+    /// Engine reachability + device/model report.
+    #[allow(dead_code)] // consumed by future startup probe
+    pub async fn health(&self) -> Result<TagHealth> {
+        let base = self.base_url.as_deref().context("tag client disabled")?;
+        let url = format!("{}/api/internal/tags/health", base);
+        let resp = self.client.get(&url).send().await?.error_for_status()?;
+        let body: TagHealth = resp.json().await?;
+        Ok(body)
+    }
+
+    async fn post_multipart(
+        &self,
+        url: &str,
+        bytes: Vec<u8>,
+        meta: &TagMeta,
+    ) -> std::result::Result<TagResponse, TagDetectError> {
+        let meta_json = serde_json::to_string(meta)
+            .map_err(|e| TagDetectError::Permanent(anyhow::anyhow!("meta serialize: {e}")))?;
+        let form = reqwest::multipart::Form::new()
+            .text("meta", meta_json)
+            .part(
+                "file",
+                reqwest::multipart::Part::bytes(bytes)
+                    .file_name(meta.rel_path.clone())
+                    .mime_str("application/octet-stream")
+                    .unwrap_or_else(|_| reqwest::multipart::Part::bytes(Vec::new())),
+            );
+
+        let resp = match self.client.post(url).multipart(form).send().await {
+            Ok(r) => r,
+            Err(e) if e.is_timeout() || e.is_connect() => {
+                return Err(TagDetectError::Transient(anyhow::anyhow!(
+                    "tag client network: {e}"
+                )));
+            }
+            Err(e) => {
+                return Err(TagDetectError::Transient(anyhow::anyhow!(
+                    "tag client request: {e}"
+                )));
+            }
+        };
+
+        let status = resp.status();
+        if status.is_success() {
+            let body: TagResponse = resp.json().await.map_err(|e| {
+                TagDetectError::Transient(anyhow::anyhow!("tag response decode: {e}"))
+            })?;
+            return Ok(body);
+        }
+
+        let body_text = resp.text().await.unwrap_or_default();
+        Err(classify_error_response(status.as_u16(), &body_text))
+    }
+}
+
+/// Pulled out as a pure function so the marker-row contract is unit-testable
+/// without spinning up an HTTP server. Behavior matches face_client::classify
+/// so the future backlog drain can share the same retry policy.
+fn classify_error_response(status: u16, body_text: &str) -> TagDetectError {
+    let detail_code = serde_json::from_str::<serde_json::Value>(body_text)
+        .ok()
+        .and_then(|v| {
+            v.get("detail")
+                .and_then(|d| d.as_str().map(str::to_string))
+                .or_else(|| {
+                    v.get("detail")
+                        .and_then(|d| d.get("code"))
+                        .and_then(|c| c.as_str())
+                        .map(str::to_string)
+                })
+        })
+        .unwrap_or_default();
+
+    if status == 422 {
+        return TagDetectError::Permanent(anyhow::anyhow!(
+            "tag detect 422 {}: {}",
+            detail_code,
+            body_text
+        ));
+    }
+    if status == 503 {
+        return TagDetectError::Transient(anyhow::anyhow!(
+            "tag detect 503 {}: {}",
+            detail_code,
+            body_text
+        ));
+    }
+    // 408 / 413 / 429 are operator-fixable infra issues — defer so the
+    // next pass retries naturally once the proxy is fixed (see
+    // face_client::classify_error_response for the cautionary tale).
+    if matches!(status, 408 | 413 | 429) {
+        return TagDetectError::Transient(anyhow::anyhow!(
+            "tag detect {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ));
+    }
+    if (400..500).contains(&status) {
+        TagDetectError::Permanent(anyhow::anyhow!(
+            "tag detect {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ))
+    } else {
+        TagDetectError::Transient(anyhow::anyhow!(
+            "tag detect {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn is_permanent(e: &TagDetectError) -> bool {
+        matches!(e, TagDetectError::Permanent(_))
+    }
+    fn is_transient(e: &TagDetectError) -> bool {
+        matches!(e, TagDetectError::Transient(_))
+    }
+
+    #[test]
+    fn classify_422_decode_failed_is_permanent() {
+        let e = classify_error_response(422, r#"{"detail":"decode_failed: bad bytes"}"#);
+        assert!(is_permanent(&e));
+        assert!(format!("{e}").contains("decode_failed"));
+    }
+
+    #[test]
+    fn classify_503_cuda_oom_is_transient() {
+        let e = classify_error_response(
+            503,
+            r#"{"detail":{"code":"cuda_oom","error":"out of memory"}}"#,
+        );
+        assert!(is_transient(&e));
+        assert!(format!("{e}").contains("cuda_oom"));
+    }
+
+    #[test]
+    fn classify_5xx_is_transient_other_4xx_is_permanent() {
+        assert!(is_transient(&classify_error_response(500, "")));
+        assert!(is_permanent(&classify_error_response(400, "{}")));
+        assert!(is_permanent(&classify_error_response(404, "{}")));
+    }
+
+    #[test]
+    fn classify_infra_4xx_is_transient() {
+        assert!(is_transient(&classify_error_response(408, "")));
+        assert!(is_transient(&classify_error_response(413, "<html>")));
+        assert!(is_transient(&classify_error_response(429, "{}")));
+    }
+}
@@ -0,0 +1,721 @@
+//! Per-tick drains the watcher runs alongside ingest.
+//!
+//! These passes were previously inlined in `main.rs`; they exist because
+//! a quick scan only walks recently-modified files, so any backlog of
+//! rows missing a `content_hash` / `date_taken` / face detection
+//! wouldn't otherwise drain except during the once-an-hour full scan.
+//! Each function is bounded per call by a `*_PER_TICK` env-var cap.
+
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::{Arc, Mutex};
+
+use log::{debug, info, warn};
+
+use crate::content_hash;
+use crate::database::ExifDao;
+use crate::date_resolver;
+use crate::face_watch;
+use crate::faces;
+use crate::file_types;
+use crate::libraries;
+use crate::tags;
+
+/// Compute and persist content_hash for image_exif rows where it's NULL.
+///
+/// Bounded per call by `FACE_HASH_BACKFILL_MAX_PER_TICK` (default 2000)
+/// so a watcher tick on a large legacy library doesn't block for hours
+/// blake3-ing every photo at once. Subsequent scans pick up the rest.
+/// For 50k+ libraries the dedicated `cargo run --bin backfill_hashes`
+/// is still faster (it doesn't fight a watcher loop for the DAO mutex).
+///
+/// Drains unhashed image_exif rows by querying them directly, independent
+/// of the filesystem walk. Quick scans only walk recently-modified files,
+/// so a backlog of pre-existing unhashed rows never enters
+/// `process_new_files`'s candidate set — left alone, it would only drain
+/// on full scans (default once an hour). Calling this every tick keeps
+/// the face-detection backlog moving regardless.
+///
+/// Returns the number of rows successfully backfilled this pass.
+pub fn backfill_unhashed_backlog(
+    context: &opentelemetry::Context,
+    library: &libraries::Library,
+    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
+) -> usize {
+    let cap: i64 = dotenv::var("FACE_HASH_BACKFILL_MAX_PER_TICK")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|n: &i64| *n > 0)
+        .unwrap_or(2000);
+
+    // Fetch up to cap+1 rows so we can tell "more remain" without a
+    // separate count query. Across libraries — there's no per-library
+    // filter on get_rows_missing_hash today — but we only ever update
+    // rows whose library_id matches the caller's library, so other
+    // libraries' rows just get skipped here and picked up on the next
+    // library's tick. Negligible cost given the cap.
+    let rows: Vec<(i32, String)> = {
+        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+        dao.get_rows_missing_hash(context, cap + 1)
+            .unwrap_or_default()
+    };
+    if rows.is_empty() {
+        return 0;
+    }
+
+    let more_than_cap = rows.len() as i64 > cap;
+    let base_path = std::path::Path::new(&library.root_path);
+
+    let mut backfilled = 0usize;
+    let mut errors = 0usize;
+    let mut skipped_other_lib = 0usize;
+    for (lib_id, rel_path) in rows.iter().take(cap as usize) {
+        if *lib_id != library.id {
+            skipped_other_lib += 1;
+            continue;
+        }
+        let abs = base_path.join(rel_path);
+        if !abs.exists() {
+            // File walked away — the watcher's reconciliation pass will
+            // remove the orphan exif row eventually.
+            continue;
+        }
+        match content_hash::compute(&abs) {
+            Ok(id) => {
+                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+                if let Err(e) = dao.backfill_content_hash(
+                    context,
+                    library.id,
+                    rel_path,
+                    &id.content_hash,
+                    id.size_bytes,
+                ) {
+                    warn!(
+                        "face_watch: backfill_content_hash failed for {}: {:?}",
+                        rel_path, e
+                    );
+                    errors += 1;
+                } else {
+                    backfilled += 1;
+                }
+            }
+            Err(e) => {
+                debug!(
+                    "face_watch: hash compute failed for {} ({:?})",
+                    abs.display(),
+                    e
+                );
+                errors += 1;
+            }
+        }
+    }
+
+    if backfilled > 0 || errors > 0 || more_than_cap {
+        info!(
+            "face_watch: backfill pass for library '{}': hashed {} ({} error(s), {} skipped to other libraries; {} cap, more_remain={})",
+            library.name, backfilled, errors, skipped_other_lib, cap, more_than_cap
+        );
+    }
+    backfilled
+}
+
+/// Drain image_exif rows whose `date_taken` was never resolved or was
+/// resolved by the weakest fallback (`fs_time`). Runs the canonical-date
+/// waterfall — exiftool batch (one subprocess for the whole tick's
+/// rows) → filename regex → earliest_fs_time — and persists each
+/// resolution with its source tag. Capped per tick by
+/// `DATE_BACKFILL_MAX_PER_TICK` (default 500) so a 14k-row library
+/// drains over a few quick-scan ticks without blocking the watcher.
+///
+/// kamadak-exif is intentionally skipped here: the row already has a
+/// NULL date_taken because the ingest path's kamadak-exif call returned
+/// nothing, and re-running it would just produce the same answer.
+/// exiftool is the meaningful new attempt — it handles videos and
+/// MakerNote-hosted dates kamadak can't reach.
+pub fn backfill_missing_date_taken(
+    context: &opentelemetry::Context,
+    library: &libraries::Library,
+    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
+) -> usize {
+    let cap: i64 = dotenv::var("DATE_BACKFILL_MAX_PER_TICK")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|n: &i64| *n > 0)
+        .unwrap_or(500);
+
+    let rows: Vec<(i32, String)> = {
+        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+        dao.get_rows_needing_date_backfill(context, library.id, cap + 1)
+            .unwrap_or_default()
+    };
+    if rows.is_empty() {
+        return 0;
+    }
+
+    let more_than_cap = rows.len() as i64 > cap;
+    let base_path = std::path::Path::new(&library.root_path);
+
+    // Build absolute paths and drop rows whose files no longer exist —
+    // the missing-file scan in library_maintenance retires deleted rows
+    // separately. Without this filter, NULL-date rows for missing files
+    // would loop through the drain forever (no source can resolve them).
+    let mut existing: Vec<(String, PathBuf)> = Vec::with_capacity(rows.len());
+    for (_, rel_path) in rows.iter().take(cap as usize) {
+        let abs = base_path.join(rel_path);
+        if abs.exists() {
+            existing.push((rel_path.clone(), abs));
+        }
+    }
+    if existing.is_empty() {
+        return 0;
+    }
+
+    // One exiftool subprocess for the whole batch; the resolver falls
+    // through to filename / fs_time per file when exiftool can't supply
+    // a date (or isn't installed at all).
+    let paths: Vec<PathBuf> = existing.iter().map(|(_, p)| p.clone()).collect();
+    let resolved = date_resolver::resolve_dates_batch(&paths, &HashMap::new());
+
+    let mut backfilled = 0usize;
+    let mut unresolved = 0usize;
+    let mut by_source: HashMap<&'static str, usize> = HashMap::new();
+    {
+        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+        for (rel_path, abs) in &existing {
+            let Some(rd) = resolved.get(abs).copied() else {
+                unresolved += 1;
+                continue;
+            };
+            match dao.backfill_date_taken(
+                context,
+                library.id,
+                rel_path,
+                rd.timestamp,
+                rd.source.as_str(),
+            ) {
+                Ok(()) => {
+                    backfilled += 1;
+                    *by_source.entry(rd.source.as_str()).or_insert(0) += 1;
+                }
+                Err(e) => {
+                    warn!(
+                        "date_backfill: update failed for lib {} {}: {:?}",
+                        library.id, rel_path, e
+                    );
+                }
+            }
+        }
+    }
+
+    if backfilled > 0 || unresolved > 0 || more_than_cap {
+        info!(
+            "date_backfill: library '{}': resolved {} ({:?}), {} unresolved, cap={}, more_remain={}",
+            library.name, backfilled, by_source, unresolved, cap, more_than_cap
+        );
+    }
+    backfilled
+}
+
+/// Per-tick face-detection drain. Pulls a capped batch of hashed-but-
+/// unscanned image_exif rows directly via the FaceDao anti-join and
+/// hands them to the existing detection pass. Runs on every tick (not
+/// just full scans) so the backlog moves at quick-scan cadence.
+pub fn process_face_backlog(
+    context: &opentelemetry::Context,
+    library: &libraries::Library,
+    face_client: &crate::ai::face_client::FaceClient,
+    face_dao: &Arc<Mutex<Box<dyn faces::FaceDao>>>,
+    tag_dao: &Arc<Mutex<Box<dyn tags::TagDao>>>,
+    excluded_dirs: &[String],
+) {
+    let cap: i64 = dotenv::var("FACE_BACKLOG_MAX_PER_TICK")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|n: &i64| *n > 0)
+        .unwrap_or(64);
+
+    let rows: Vec<(String, String)> = {
+        let mut dao = face_dao.lock().expect("face dao");
+        match dao.list_unscanned_candidates(context, library.id, cap) {
+            Ok(r) => r,
+            Err(e) => {
+                warn!(
+                    "face_watch: list_unscanned_candidates failed for library '{}': {:?}",
+                    library.name, e
+                );
+                return;
+            }
+        }
+    };
+    if rows.is_empty() {
+        return;
+    }
+
+    info!(
+        "face_watch: backlog drain — running detection on {} candidate(s) for library '{}' (cap={})",
+        rows.len(),
+        library.name,
+        cap
+    );
+
+    let candidates: Vec<face_watch::FaceCandidate> = rows
+        .into_iter()
+        .map(|(rel_path, content_hash)| face_watch::FaceCandidate {
+            rel_path,
+            content_hash,
+        })
+        .collect();
+
+    face_watch::run_face_detection_pass(
+        library,
+        excluded_dirs,
+        face_client,
+        Arc::clone(face_dao),
+        Arc::clone(tag_dao),
+        candidates,
+    );
+}
+
+/// Compute content_hash for any image rows the walker just touched
+/// whose stored EXIF row is still hash-less. Called from
+/// `process_new_files` so freshly-ingested files don't have to wait for
+/// the next standalone `backfill_unhashed_backlog` tick before face
+/// detection can key on their bytes.
+///
+/// Cap is on **successes only**. An earlier version counted errors too,
+/// so a pocket of chronically-unhashable files at the front of the
+/// table (vanished mid-scan, permission denied, etc.) burned the budget
+/// every tick and the rest of the backlog never advanced.
+pub fn backfill_missing_content_hashes(
+    context: &opentelemetry::Context,
+    files: &[(PathBuf, String)],
+    library: &libraries::Library,
+    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
+) {
+    let image_paths: Vec<String> = files
+        .iter()
+        .filter(|(p, _)| !file_types::is_video_file(p))
+        .map(|(_, rel)| rel.clone())
+        .collect();
+    if image_paths.is_empty() {
+        return;
+    }
+
+    let exif_records = {
+        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+        dao.get_exif_batch(context, Some(library.id), &image_paths)
+            .unwrap_or_default()
+    };
+    // Cheap lookup back from rel_path → absolute file_path so
+    // content_hash::compute can read the bytes.
+    let path_by_rel: HashMap<String, &PathBuf> =
+        files.iter().map(|(p, rel)| (rel.clone(), p)).collect();
+
+    let cap: usize = dotenv::var("FACE_HASH_BACKFILL_MAX_PER_TICK")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|n: &usize| *n > 0)
+        .unwrap_or(2000);
+
+    // Count the unhashed backlog up front so we can surface "still needs
+    // backfill: N" in the log — without it, a face-scan that's stuck at
+    // 44% looks stalled when really it's chipping through hashes.
+    let unhashed_total = exif_records
+        .iter()
+        .filter(|r| r.content_hash.is_none())
+        .count();
+
+    let mut backfilled = 0usize;
+    let mut errors = 0usize;
+    for record in &exif_records {
+        if backfilled >= cap {
+            break;
+        }
+        if record.content_hash.is_some() {
+            continue;
+        }
+        let Some(file_path) = path_by_rel.get(&record.file_path) else {
+            // Walked file went missing between the directory scan and now;
+            // next tick will retry naturally.
+            continue;
+        };
+        match content_hash::compute(file_path) {
+            Ok(id) => {
+                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+                if let Err(e) = dao.backfill_content_hash(
+                    context,
+                    library.id,
+                    &record.file_path,
+                    &id.content_hash,
+                    id.size_bytes,
+                ) {
+                    warn!(
+                        "face_watch: backfill_content_hash failed for {}: {:?}",
+                        record.file_path, e
+                    );
+                    errors += 1;
+                } else {
+                    backfilled += 1;
+                }
+            }
+            Err(e) => {
+                debug!(
+                    "face_watch: hash compute failed for {} ({:?})",
+                    file_path.display(),
+                    e
+                );
+                errors += 1;
+            }
+        }
+    }
+    // Always log when there's an unhashed backlog so an operator
+    // looking at "scan stuck at 44%" can see backfill is running and
+    // how much remains. Quiet only when there's nothing to do.
+    if unhashed_total > 0 || backfilled > 0 || errors > 0 {
+        let remaining = unhashed_total.saturating_sub(backfilled);
+        info!(
+            "face_watch: backfilled {}/{} content_hash for library '{}' ({} error(s); {} still need backfill; cap={})",
+            backfilled, unhashed_total, library.name, errors, remaining, cap
+        );
+    }
+}
+
+/// Build the face-detection candidate list for a scan tick.
+///
+/// Returns `(rel_path, content_hash)` for every image file that has a
+/// content_hash recorded in image_exif but no row in face_detections
+/// yet. Re-querying image_exif here picks up rows the EXIF write loop
+/// just inserted alongside any pre-existing rows the watcher walked
+/// over — covers both new uploads and the initial backlog scan.
+pub fn build_face_candidates(
+    context: &opentelemetry::Context,
+    library: &libraries::Library,
+    files: &[(PathBuf, String)],
+    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
+    face_dao: &Arc<Mutex<Box<dyn faces::FaceDao>>>,
+) -> Vec<face_watch::FaceCandidate> {
+    // Restrict to image files; videos aren't face-scanned in v1 (kamadak
+    // doesn't even register them in image_exif).
+    let image_paths: Vec<String> = files
+        .iter()
+        .filter(|(p, _)| !file_types::is_video_file(p))
+        .map(|(_, rel)| rel.clone())
+        .collect();
+    if image_paths.is_empty() {
+        return Vec::new();
+    }
+
+    let exif_records = {
+        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+        dao.get_exif_batch(context, Some(library.id), &image_paths)
+            .unwrap_or_default()
+    };
+    // rel_path → content_hash (only rows with a hash; without one we have
+    // nothing to key face data against).
+    let mut hash_by_path: HashMap<String, String> = HashMap::with_capacity(exif_records.len());
+    for record in exif_records {
+        if let Some(h) = record.content_hash {
+            hash_by_path.insert(record.file_path, h);
+        }
+    }
+
+    let mut candidates = Vec::new();
+    let mut dao = face_dao.lock().expect("face dao");
+    for rel_path in image_paths {
+        let Some(hash) = hash_by_path.get(&rel_path) else {
+            continue;
+        };
+        match dao.already_scanned(context, hash) {
+            Ok(true) => continue,
+            Ok(false) => candidates.push(face_watch::FaceCandidate {
+                rel_path,
+                content_hash: hash.clone(),
+            }),
+            Err(e) => {
+                warn!("face_watch: already_scanned errored for {}: {:?}", hash, e);
+            }
+        }
+    }
+    candidates
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::fs;
+    use std::sync::{Arc, Mutex};
+
+    use diesel::prelude::*;
+    use tempfile::TempDir;
+
+    use crate::database::models::{InsertImageExif, InsertLibrary};
+    use crate::database::test::in_memory_db_connection;
+    use crate::database::{ExifDao, SqliteExifDao, schema};
+    use crate::faces::{FaceDao, SqliteFaceDao};
+    use crate::libraries::Library;
+
+    fn ctx() -> opentelemetry::Context {
+        opentelemetry::Context::new()
+    }
+
+    /// Build a tempdir-backed library + DAOs sharing a single in-memory
+    /// SQLite connection (so cross-table joins like
+    /// `list_unscanned_candidates` see consistent state).
+    fn setup() -> (
+        TempDir,
+        Library,
+        Arc<Mutex<diesel::SqliteConnection>>,
+        Arc<Mutex<Box<dyn ExifDao>>>,
+        Arc<Mutex<Box<dyn FaceDao>>>,
+    ) {
+        let tmp = TempDir::new().expect("tempdir");
+        let mut conn = in_memory_db_connection();
+        // Migration seeds library id=1 with a placeholder root; rewrite it
+        // to point at the tempdir so `<root>/<rel_path>` resolves to real
+        // files this test creates.
+        diesel::update(schema::libraries::table.filter(schema::libraries::id.eq(1)))
+            .set(schema::libraries::root_path.eq(tmp.path().to_string_lossy().to_string()))
+            .execute(&mut conn)
+            .expect("rewrite library 1 root");
+        // Add a second library so cross-library skip cases have somewhere
+        // to put their rows.
+        diesel::insert_into(schema::libraries::table)
+            .values(InsertLibrary {
+                name: "other",
+                root_path: "/tmp/other-test-lib",
+                created_at: 0,
+                enabled: true,
+                excluded_dirs: None,
+            })
+            .execute(&mut conn)
+            .expect("seed second library");
+
+        let library = Library {
+            id: 1,
+            name: "main".to_string(),
+            root_path: tmp.path().to_string_lossy().to_string(),
+            enabled: true,
+            excluded_dirs: Vec::new(),
+        };
+        let shared = Arc::new(Mutex::new(conn));
+        let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(
+            SqliteExifDao::from_shared(Arc::clone(&shared)),
+        )));
+        let face_dao: Arc<Mutex<Box<dyn FaceDao>>> = Arc::new(Mutex::new(Box::new(
+            SqliteFaceDao::from_connection(Arc::clone(&shared)),
+        )));
+        (tmp, library, shared, exif_dao, face_dao)
+    }
+
+    fn insert_exif(
+        exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
+        lib_id: i32,
+        rel: &str,
+        content_hash: Option<&str>,
+    ) {
+        let mut dao = exif_dao.lock().unwrap();
+        dao.store_exif(
+            &ctx(),
+            InsertImageExif {
+                library_id: lib_id,
+                file_path: rel.to_string(),
+                camera_make: None,
+                camera_model: None,
+                lens_model: None,
+                width: None,
+                height: None,
+                orientation: None,
+                gps_latitude: None,
+                gps_longitude: None,
+                gps_altitude: None,
+                focal_length: None,
+                aperture: None,
+                shutter_speed: None,
+                iso: None,
+                date_taken: None,
+                created_time: 0,
+                last_modified: 0,
+                content_hash: content_hash.map(|s| s.to_string()),
+                size_bytes: None,
+                phash_64: None,
+                dhash_64: None,
+                date_taken_source: None,
+            },
+        )
+        .expect("insert");
+    }
+
+    fn write_image(root: &std::path::Path, rel: &str, bytes: &[u8]) {
+        let abs = root.join(rel);
+        if let Some(parent) = abs.parent() {
+            fs::create_dir_all(parent).expect("mkdir");
+        }
+        fs::write(abs, bytes).expect("write file");
+    }
+
+    #[test]
+    fn backfill_unhashed_backlog_hashes_missing_rows_in_this_library() {
+        let (tmp, library, _conn, exif_dao, _face_dao) = setup();
+        write_image(tmp.path(), "a.jpg", b"alpha-bytes");
+        write_image(tmp.path(), "b.jpg", b"bravo-bytes");
+        insert_exif(&exif_dao, 1, "a.jpg", None);
+        insert_exif(&exif_dao, 1, "b.jpg", None);
+
+        let backfilled = backfill_unhashed_backlog(&ctx(), &library, &exif_dao);
+        assert_eq!(backfilled, 2);
+
+        let mut dao = exif_dao.lock().unwrap();
+        let rows = dao
+            .get_exif_batch(&ctx(), Some(1), &["a.jpg".to_string(), "b.jpg".to_string()])
+            .unwrap();
+        assert_eq!(rows.len(), 2);
+        for r in rows {
+            assert!(
+                r.content_hash.is_some(),
+                "row {} should have a hash",
+                r.file_path
+            );
+        }
+    }
+
+    #[test]
+    fn backfill_unhashed_backlog_skips_other_libraries_and_missing_files() {
+        let (tmp, library, _conn, exif_dao, _face_dao) = setup();
+        write_image(tmp.path(), "exists.jpg", b"hello");
+        // Row for this library whose file is missing on disk:
+        insert_exif(&exif_dao, 1, "ghost.jpg", None);
+        insert_exif(&exif_dao, 1, "exists.jpg", None);
+        // Row in the other library — must be skipped (different lib_id).
+        insert_exif(&exif_dao, 2, "other.jpg", None);
+
+        let backfilled = backfill_unhashed_backlog(&ctx(), &library, &exif_dao);
+        assert_eq!(backfilled, 1, "only the existing in-library file hashes");
+
+        let mut dao = exif_dao.lock().unwrap();
+        let other = dao
+            .get_exif_batch(&ctx(), Some(2), &["other.jpg".to_string()])
+            .unwrap();
+        assert_eq!(other.len(), 1);
+        assert!(
+            other[0].content_hash.is_none(),
+            "other-library row must remain unhashed"
+        );
+        let ghost = dao
+            .get_exif_batch(&ctx(), Some(1), &["ghost.jpg".to_string()])
+            .unwrap();
+        assert_eq!(ghost.len(), 1);
+        assert!(
+            ghost[0].content_hash.is_none(),
+            "missing-on-disk row stays unhashed (reconciliation removes it later)"
+        );
+    }
+
+    #[test]
+    fn backfill_unhashed_backlog_respects_per_tick_cap() {
+        // Env-var-driven cap; the function reads it on every call, so we
+        // can set it just for this test and unset before returning.
+        // Serial guard: tests in the same binary may share env, but each
+        // backfill call re-reads — and we only care that the cap shape
+        // (success count <= cap, more_remain logged) holds.
+        unsafe {
+            std::env::set_var("FACE_HASH_BACKFILL_MAX_PER_TICK", "2");
+        }
+        let (tmp, library, _conn, exif_dao, _face_dao) = setup();
+        for i in 0..5 {
+            let rel = format!("img_{}.jpg", i);
+            write_image(tmp.path(), &rel, format!("bytes-{}", i).as_bytes());
+            insert_exif(&exif_dao, 1, &rel, None);
+        }
+
+        let backfilled = backfill_unhashed_backlog(&ctx(), &library, &exif_dao);
+        assert_eq!(backfilled, 2, "cap=2 must bound the per-tick successes");
+        unsafe {
+            std::env::remove_var("FACE_HASH_BACKFILL_MAX_PER_TICK");
+        }
+    }
+
+    #[test]
+    fn backfill_missing_content_hashes_skips_videos_and_hashed_rows() {
+        let (tmp, library, _conn, exif_dao, _face_dao) = setup();
+        // Two image rows (one already hashed, one not), one video.
+        write_image(tmp.path(), "fresh.jpg", b"fresh-pixels");
+        write_image(tmp.path(), "already.jpg", b"already-pixels");
+        write_image(tmp.path(), "clip.mp4", b"video-bytes");
+        insert_exif(&exif_dao, 1, "fresh.jpg", None);
+        insert_exif(&exif_dao, 1, "already.jpg", Some("pre-existing-hash"));
+        insert_exif(&exif_dao, 1, "clip.mp4", None);
+
+        let files: Vec<(PathBuf, String)> = vec![
+            (tmp.path().join("fresh.jpg"), "fresh.jpg".to_string()),
+            (tmp.path().join("already.jpg"), "already.jpg".to_string()),
+            (tmp.path().join("clip.mp4"), "clip.mp4".to_string()),
+        ];
+        backfill_missing_content_hashes(&ctx(), &files, &library, &exif_dao);
+
+        let mut dao = exif_dao.lock().unwrap();
+        let rows = dao
+            .get_exif_batch(
+                &ctx(),
+                Some(1),
+                &[
+                    "fresh.jpg".to_string(),
+                    "already.jpg".to_string(),
+                    "clip.mp4".to_string(),
+                ],
+            )
+            .unwrap();
+        let by_path: HashMap<String, Option<String>> = rows
+            .into_iter()
+            .map(|r| (r.file_path, r.content_hash))
+            .collect();
+        assert!(
+            by_path["fresh.jpg"].is_some(),
+            "fresh image must get a hash"
+        );
+        assert_eq!(
+            by_path["already.jpg"].as_deref(),
+            Some("pre-existing-hash"),
+            "already-hashed image left untouched"
+        );
+        assert!(
+            by_path["clip.mp4"].is_none(),
+            "video skipped (not face-scanned, no hash needed via this path)"
+        );
+    }
+
+    #[test]
+    fn build_face_candidates_filters_videos_unhashed_and_already_scanned() {
+        let (tmp, library, _conn, exif_dao, face_dao) = setup();
+
+        // Seed image_exif with: hashed unscanned, hashed scanned, unhashed,
+        // and a video. Files don't need to exist on disk — the function
+        // doesn't read them, only the DB rows.
+        insert_exif(&exif_dao, 1, "fresh.jpg", Some("hash-fresh"));
+        insert_exif(&exif_dao, 1, "scanned.jpg", Some("hash-scanned"));
+        insert_exif(&exif_dao, 1, "unhashed.jpg", None);
+        insert_exif(&exif_dao, 1, "clip.mp4", Some("hash-video"));
+        // Mark `scanned.jpg`'s hash as already detected.
+        {
+            let mut dao = face_dao.lock().unwrap();
+            dao.mark_status(&ctx(), 1, "hash-scanned", "scanned.jpg", "no_faces", "test")
+                .expect("mark scanned");
+        }
+
+        let files: Vec<(PathBuf, String)> = vec![
+            (tmp.path().join("fresh.jpg"), "fresh.jpg".to_string()),
+            (tmp.path().join("scanned.jpg"), "scanned.jpg".to_string()),
+            (tmp.path().join("unhashed.jpg"), "unhashed.jpg".to_string()),
+            (tmp.path().join("clip.mp4"), "clip.mp4".to_string()),
+        ];
+        let candidates = build_face_candidates(&ctx(), &library, &files, &exif_dao, &face_dao);
+
+        assert_eq!(
+            candidates.len(),
+            1,
+            "exactly fresh.jpg should be a candidate"
+        );
+        assert_eq!(candidates[0].rel_path, "fresh.jpg");
+        assert_eq!(candidates[0].content_hash, "hash-fresh");
+    }
+}
@@ -0,0 +1,186 @@
+//! Backfill `image_exif.content_hash` + `size_bytes` for rows that were
+//! ingested before hash computation was wired into the watcher.
+//!
+//! The watcher computes hashes for new files as they're ingested, so this
+//! binary is a one-shot tool for the historical backlog. Safe to re-run;
+//! only rows with NULL content_hash are processed.
+
+use std::path::Path;
+use std::sync::{Arc, Mutex};
+use std::time::Instant;
+
+use clap::Parser;
+use log::{error, warn};
+use rayon::prelude::*;
+
+use image_api::bin_progress;
+use image_api::content_hash;
+use image_api::database::{ExifDao, SqliteExifDao, connect};
+use image_api::libraries::{self, Library};
+
+#[derive(Parser, Debug)]
+#[command(name = "backfill_hashes")]
+#[command(about = "Compute content_hash for image_exif rows missing one")]
+struct Args {
+    /// Max rows to hash per batch. The process loops until no rows remain.
+    #[arg(long, default_value_t = 500)]
+    batch_size: i64,
+
+    /// Rayon parallelism override. 0 uses the default thread pool size.
+    #[arg(long, default_value_t = 0)]
+    parallelism: usize,
+
+    /// Dry-run: log what would be hashed without writing to the DB.
+    #[arg(long)]
+    dry_run: bool,
+}
+
+fn main() -> anyhow::Result<()> {
+    env_logger::init();
+    dotenv::dotenv().ok();
+
+    let args = Args::parse();
+    if args.parallelism > 0 {
+        rayon::ThreadPoolBuilder::new()
+            .num_threads(args.parallelism)
+            .build_global()
+            .expect("Unable to configure rayon thread pool");
+    }
+
+    // Resolve libraries (patch placeholder if still unset) so we can map
+    // library_id back to a root_path on disk.
+    let base_path = dotenv::var("BASE_PATH").ok();
+    let mut seed_conn = connect();
+    if let Some(base) = base_path.as_deref() {
+        libraries::seed_or_patch_from_env(&mut seed_conn, base);
+    }
+    let libs = libraries::load_all(&mut seed_conn);
+    drop(seed_conn);
+    if libs.is_empty() {
+        anyhow::bail!("No libraries configured; cannot backfill hashes");
+    }
+    let libs_by_id: std::collections::HashMap<i32, Library> =
+        libs.into_iter().map(|lib| (lib.id, lib)).collect();
+    println!(
+        "Configured libraries: {}",
+        libs_by_id
+            .values()
+            .map(|l| format!("{} -> {}", l.name, l.root_path))
+            .collect::<Vec<_>>()
+            .join(", ")
+    );
+
+    let dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
+    let ctx = opentelemetry::Context::new();
+
+    let mut total_hashed = 0u64;
+    let mut total_missing = 0u64;
+    let mut total_errors = 0u64;
+    let start = Instant::now();
+
+    let pb = bin_progress::spinner("hashing");
+
+    loop {
+        let rows = {
+            let mut guard = dao.lock().expect("Unable to lock ExifDao");
+            guard
+                .get_rows_missing_hash(&ctx, args.batch_size)
+                .map_err(|e| anyhow::anyhow!("DB error: {:?}", e))?
+        };
+        if rows.is_empty() {
+            break;
+        }
+        let batch_size = rows.len();
+        pb.set_message(format!(
+            "batch of {} (hashed={} missing={} errors={})",
+            batch_size, total_hashed, total_missing, total_errors
+        ));
+
+        // Compute hashes in parallel (I/O-bound; rayon helps on local disks,
+        // throttled by network on SMB mounts — use --parallelism to tune).
+        let results: Vec<(i32, String, Option<content_hash::FileIdentity>)> = rows
+            .into_par_iter()
+            .map(|(library_id, rel_path)| {
+                let abs = libs_by_id
+                    .get(&library_id)
+                    .map(|lib| Path::new(&lib.root_path).join(&rel_path));
+                match abs {
+                    Some(abs_path) if abs_path.exists() => match content_hash::compute(&abs_path) {
+                        Ok(id) => (library_id, rel_path, Some(id)),
+                        Err(e) => {
+                            error!("hash error for {}: {:?}", abs_path.display(), e);
+                            (library_id, rel_path, None)
+                        }
+                    },
+                    Some(_) => (library_id, rel_path, None), // file missing on disk
+                    None => {
+                        warn!("Row refers to unknown library_id {}", library_id);
+                        (library_id, rel_path, None)
+                    }
+                }
+            })
+            .collect();
+
+        // Persist sequentially — SQLite writes serialize anyway.
+        if !args.dry_run {
+            let mut guard = dao.lock().expect("Unable to lock ExifDao");
+            for (library_id, rel_path, ident) in &results {
+                match ident {
+                    Some(id) => {
+                        match guard.backfill_content_hash(
+                            &ctx,
+                            *library_id,
+                            rel_path,
+                            &id.content_hash,
+                            id.size_bytes,
+                        ) {
+                            Ok(_) => {
+                                total_hashed += 1;
+                                pb.inc(1);
+                            }
+                            Err(e) => {
+                                pb.println(format!("persist error for {}: {:?}", rel_path, e));
+                                total_errors += 1;
+                            }
+                        }
+                    }
+                    None => {
+                        total_missing += 1;
+                    }
+                }
+            }
+        } else {
+            for (_, rel_path, ident) in &results {
+                match ident {
+                    Some(id) => {
+                        pb.println(format!(
+                            "[dry-run] {} -> {} ({} bytes)",
+                            rel_path, id.content_hash, id.size_bytes
+                        ));
+                        total_hashed += 1;
+                        pb.inc(1);
+                    }
+                    None => {
+                        total_missing += 1;
+                    }
+                }
+            }
+            pb.println(format!(
+                "[dry-run] processed one batch of {}. Stopping — a real run would continue \
+                 until no NULL content_hash rows remain.",
+                results.len()
+            ));
+            break;
+        }
+    }
+
+    pb.finish_and_clear();
+    println!(
+        "Done. hashed={}, skipped (missing on disk)={}, errors={}, elapsed={:.1}s",
+        total_hashed,
+        total_missing,
+        total_errors,
+        start.elapsed().as_secs_f64()
+    );
+    Ok(())
+}
@@ -0,0 +1,243 @@
+//! Backfill `image_exif.phash_64` + `dhash_64` for image rows that
+//! were ingested before perceptual hashing was wired into the watcher.
+//!
+//! The watcher computes perceptual hashes for new images as they're
+//! ingested, so this binary is a one-shot for the historical backlog.
+//! Idempotent — only rows with a non-null content_hash and a null
+//! phash are processed, so re-runs are safe and pick up where they
+//! left off (e.g. after a crash or interrupt).
+//!
+//! Image-only by design: `get_rows_missing_perceptual_hash` filters by
+//! file extension at the DB layer so videos and other non-decodable
+//! media are skipped without round-tripping `image_hasher`. Files that
+//! can't be opened (missing on disk, permission errors) are quietly
+//! left as null and counted as "missing"; on next run, if the file is
+//! restored, the row will surface again.
+
+use std::path::Path;
+use std::sync::{Arc, Mutex};
+use std::time::Instant;
+
+use clap::Parser;
+use log::{error, warn};
+use rayon::prelude::*;
+
+use image_api::bin_progress;
+use image_api::database::{ExifDao, SqliteExifDao, connect};
+use image_api::libraries::{self, Library};
+use image_api::perceptual_hash;
+
+#[derive(Parser, Debug)]
+#[command(name = "backfill_perceptual_hash")]
+#[command(about = "Compute pHash + dHash for image_exif rows missing one")]
+struct Args {
+    /// Max rows to hash per batch. The process loops until no rows remain.
+    #[arg(long, default_value_t = 256)]
+    batch_size: i64,
+
+    /// Rayon parallelism override. 0 uses the default thread pool size.
+    #[arg(long, default_value_t = 0)]
+    parallelism: usize,
+
+    /// Dry-run: log what would be hashed without writing to the DB.
+    #[arg(long)]
+    dry_run: bool,
+}
+
+fn main() -> anyhow::Result<()> {
+    env_logger::init();
+    dotenv::dotenv().ok();
+
+    let args = Args::parse();
+    if args.parallelism > 0 {
+        rayon::ThreadPoolBuilder::new()
+            .num_threads(args.parallelism)
+            .build_global()
+            .expect("Unable to configure rayon thread pool");
+    }
+
+    let base_path = dotenv::var("BASE_PATH").ok();
+    let mut seed_conn = connect();
+    if let Some(base) = base_path.as_deref() {
+        libraries::seed_or_patch_from_env(&mut seed_conn, base);
+    }
+    let libs = libraries::load_all(&mut seed_conn);
+    drop(seed_conn);
+    if libs.is_empty() {
+        anyhow::bail!("No libraries configured; cannot backfill perceptual hashes");
+    }
+    let libs_by_id: std::collections::HashMap<i32, Library> =
+        libs.into_iter().map(|lib| (lib.id, lib)).collect();
+    println!(
+        "Configured libraries: {}",
+        libs_by_id
+            .values()
+            .map(|l| format!("{} -> {}", l.name, l.root_path))
+            .collect::<Vec<_>>()
+            .join(", ")
+    );
+
+    let dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
+    let ctx = opentelemetry::Context::new();
+
+    let mut total_hashed = 0u64;
+    let mut total_missing = 0u64;
+    let mut total_decode_failures = 0u64;
+    let mut total_errors = 0u64;
+    let start = Instant::now();
+
+    let pb = bin_progress::spinner("perceptual-hashing");
+
+    loop {
+        let rows = {
+            let mut guard = dao.lock().expect("Unable to lock ExifDao");
+            guard
+                .get_rows_missing_perceptual_hash(&ctx, args.batch_size)
+                .map_err(|e| anyhow::anyhow!("DB error: {:?}", e))?
+        };
+        if rows.is_empty() {
+            break;
+        }
+        let batch_size = rows.len();
+        pb.set_message(format!(
+            "batch of {} (hashed={} decode_fail={} missing={} errors={})",
+            batch_size, total_hashed, total_decode_failures, total_missing, total_errors
+        ));
+
+        // Compute perceptual hashes in parallel — CPU-bound, decoder
+        // releases the GIL-equivalent. rayon's default thread pool
+        // matches the host's logical-core count which is the right
+        // ceiling for image_hasher's DCT pass.
+        let results: Vec<(i32, String, FilePerceptualResult)> = rows
+            .into_par_iter()
+            .map(|(library_id, rel_path)| {
+                let abs = libs_by_id
+                    .get(&library_id)
+                    .map(|lib| Path::new(&lib.root_path).join(&rel_path));
+                match abs {
+                    Some(abs_path) if abs_path.exists() => {
+                        match perceptual_hash::compute(&abs_path) {
+                            Some(id) => (library_id, rel_path, FilePerceptualResult::Ok(id)),
+                            None => (library_id, rel_path, FilePerceptualResult::DecodeFailed),
+                        }
+                    }
+                    Some(_) => (library_id, rel_path, FilePerceptualResult::MissingOnDisk),
+                    None => {
+                        warn!("Row refers to unknown library_id {}", library_id);
+                        (library_id, rel_path, FilePerceptualResult::MissingOnDisk)
+                    }
+                }
+            })
+            .collect();
+
+        // Persist sequentially — SQLite writes serialize anyway.
+        if !args.dry_run {
+            let mut guard = dao.lock().expect("Unable to lock ExifDao");
+            for (library_id, rel_path, result) in &results {
+                match result {
+                    FilePerceptualResult::Ok(id) => {
+                        match guard.backfill_perceptual_hash(
+                            &ctx,
+                            *library_id,
+                            rel_path,
+                            Some(id.phash_64),
+                            Some(id.dhash_64),
+                        ) {
+                            Ok(_) => {
+                                total_hashed += 1;
+                                pb.inc(1);
+                            }
+                            Err(e) => {
+                                pb.println(format!("persist error for {}: {:?}", rel_path, e));
+                                total_errors += 1;
+                            }
+                        }
+                    }
+                    FilePerceptualResult::DecodeFailed => {
+                        // Persist phash_64=0/dhash_64=0 as a "tried,
+                        // unhashable" sentinel so this row leaves the
+                        // `phash_64 IS NULL` candidate set and the
+                        // backfill doesn't infinite-loop on a queue of
+                        // unbreakable formats (HEIC, RAW, CMYK JPEGs,
+                        // truncated bytes). The all-zero hash is
+                        // explicitly excluded from clustering by
+                        // is_informative_hash in duplicates.rs, so it
+                        // won't pollute group output — it just becomes
+                        // invisible to the duplicate finder.
+                        log::debug!(
+                            "perceptual decode failed for {} (lib {}); marking unhashable",
+                            rel_path,
+                            library_id
+                        );
+                        match guard.backfill_perceptual_hash(
+                            &ctx,
+                            *library_id,
+                            rel_path,
+                            Some(0),
+                            Some(0),
+                        ) {
+                            Ok(_) => {
+                                total_decode_failures += 1;
+                            }
+                            Err(e) => {
+                                pb.println(format!(
+                                    "persist error (decode-fail sentinel) for {}: {:?}",
+                                    rel_path, e
+                                ));
+                                total_errors += 1;
+                            }
+                        }
+                    }
+                    FilePerceptualResult::MissingOnDisk => {
+                        total_missing += 1;
+                    }
+                }
+            }
+        } else {
+            for (_, rel_path, result) in &results {
+                match result {
+                    FilePerceptualResult::Ok(id) => {
+                        pb.println(format!(
+                            "[dry-run] {} -> phash={:016x} dhash={:016x}",
+                            rel_path, id.phash_64, id.dhash_64
+                        ));
+                        total_hashed += 1;
+                        pb.inc(1);
+                    }
+                    FilePerceptualResult::DecodeFailed => {
+                        total_decode_failures += 1;
+                    }
+                    FilePerceptualResult::MissingOnDisk => {
+                        total_missing += 1;
+                    }
+                }
+            }
+            pb.println(format!(
+                "[dry-run] processed one batch of {}. Stopping — a real run would continue \
+                 until no NULL phash_64 image rows remain.",
+                results.len()
+            ));
+            break;
+        }
+    }
+
+    pb.finish_and_clear();
+    println!(
+        "Done. hashed={}, decode_failed={}, skipped (missing on disk)={}, errors={}, elapsed={:.1}s",
+        total_hashed,
+        total_decode_failures,
+        total_missing,
+        total_errors,
+        start.elapsed().as_secs_f64()
+    );
+    if total_errors > 0 {
+        error!("Backfill completed with {} persist errors", total_errors);
+    }
+    Ok(())
+}
+
+enum FilePerceptualResult {
+    Ok(perceptual_hash::PerceptualIdentity),
+    DecodeFailed,
+    MissingOnDisk,
+}
@@ -2,10 +2,10 @@ use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Parser;
 use image_api::ai::ollama::OllamaClient;
+use image_api::bin_progress;
 use image_api::database::calendar_dao::{InsertCalendarEvent, SqliteCalendarEventDao};
 use image_api::parsers::ical_parser::parse_ics_file;
 use log::{error, info};
-use std::sync::{Arc, Mutex};

 // Import the trait to use its methods
 use image_api::database::CalendarEventDao;
@@ -64,9 +64,11 @@ async fn main() -> Result<()> {
        None
    };

-    let inserted_count = Arc::new(Mutex::new(0));
-    let skipped_count = Arc::new(Mutex::new(0));
-    let error_count = Arc::new(Mutex::new(0));
+    let mut inserted_count = 0usize;
+    let mut skipped_count = 0usize;
+    let mut error_count = 0usize;
+
+    let pb = bin_progress::determinate(events.len() as u64, "importing");

    // Process events in batches
    // Can't use rayon with async, so process sequentially
@@ -82,7 +84,8 @@ async fn main() -> Result<()> {
            )
            && exists
        {
-            *skipped_count.lock().unwrap() += 1;
+            skipped_count += 1;
+            pb.inc(1);
            continue;
        }

@@ -101,10 +104,7 @@ async fn main() -> Result<()> {
            }) {
                Ok(emb) => Some(emb),
                Err(e) => {
-                    error!(
-                        "Failed to generate embedding for event '{}': {}",
-                        event.summary, e
-                    );
+                    pb.println(format!("embedding failed for '{}': {}", event.summary, e));
                    None
                }
            }
@@ -133,28 +133,26 @@ async fn main() -> Result<()> {
        };

        match dao_instance.store_event(&context, insert_event) {
-            Ok(_) => {
-                *inserted_count.lock().unwrap() += 1;
-                if *inserted_count.lock().unwrap() % 100 == 0 {
-                    info!("Imported {} events...", *inserted_count.lock().unwrap());
-                }
-            }
+            Ok(_) => inserted_count += 1,
            Err(e) => {
-                error!("Failed to store event '{}': {:?}", event.summary, e);
-                *error_count.lock().unwrap() += 1;
+                pb.println(format!("store failed for '{}': {:?}", event.summary, e));
+                error_count += 1;
            }
        }
+        pb.set_message(format!(
+            "inserted={} skipped={} errors={}",
+            inserted_count, skipped_count, error_count
+        ));
+        pb.inc(1);
    }

-    let final_inserted = *inserted_count.lock().unwrap();
-    let final_skipped = *skipped_count.lock().unwrap();
-    let final_errors = *error_count.lock().unwrap();
+    pb.finish_and_clear();

-    info!("\n=== Import Summary ===");
+    info!("=== Import Summary ===");
    info!("Total events found: {}", events.len());
-    info!("Successfully inserted: {}", final_inserted);
-    info!("Skipped (already exist): {}", final_skipped);
-    info!("Errors: {}", final_errors);
+    info!("Successfully inserted: {}", inserted_count);
+    info!("Skipped (already exist): {}", skipped_count);
+    info!("Errors: {}", error_count);

    if args.generate_embeddings {
        info!("Embeddings were generated for semantic search");
@@ -162,5 +160,12 @@ async fn main() -> Result<()> {
        info!("No embeddings generated (use --generate-embeddings to enable semantic search)");
    }

+    if error_count > 0 {
+        error!(
+            "Completed with {} errors — review log output above",
+            error_count
+        );
+    }
+
    Ok(())
 }
@@ -1,6 +1,7 @@
 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Parser;
+use image_api::bin_progress;
 use image_api::database::location_dao::{InsertLocationRecord, SqliteLocationHistoryDao};
 use image_api::parsers::location_json_parser::parse_location_json;
 use log::{error, info};
@@ -38,23 +39,20 @@ async fn main() -> Result<()> {

    let context = opentelemetry::Context::current();

-    let mut inserted_count = 0;
-    let mut skipped_count = 0;
-    let mut error_count = 0;
+    let mut inserted_count = 0usize;
+    let mut skipped_count = 0usize;
+    let mut error_count = 0usize;

    let mut dao_instance = SqliteLocationHistoryDao::new();
    let created_at = Utc::now().timestamp();

-    // Process in batches using batch insert for massive speedup
-    for (batch_idx, chunk) in locations.chunks(args.batch_size).enumerate() {
-        info!(
-            "Processing batch {} ({} records)...",
-            batch_idx + 1,
-            chunk.len()
-        );
+    let pb = bin_progress::determinate(locations.len() as u64, "importing");

+    // Process in batches using batch insert for massive speedup
+    for chunk in locations.chunks(args.batch_size) {
        // Convert to InsertLocationRecord
        let mut batch_inserts = Vec::with_capacity(chunk.len());
+        let mut chunk_skipped = 0usize;

        for location in chunk {
            // Skip existing check if requested (makes import much slower)
@@ -68,6 +66,7 @@ async fn main() -> Result<()> {
                && exists
            {
                skipped_count += 1;
+                chunk_skipped += 1;
                continue;
            }

@@ -89,26 +88,35 @@ async fn main() -> Result<()> {
        // Batch insert entire chunk in single transaction
        if !batch_inserts.is_empty() {
            match dao_instance.store_locations_batch(&context, batch_inserts) {
-                Ok(count) => {
-                    inserted_count += count;
-                    info!(
-                        "Imported {} locations (total: {})...",
-                        count, inserted_count
-                    );
-                }
+                Ok(count) => inserted_count += count,
                Err(e) => {
-                    error!("Failed to store batch: {:?}", e);
-                    error_count += chunk.len();
+                    pb.println(format!("batch insert failed: {:?}", e));
+                    error_count += chunk.len() - chunk_skipped;
                }
            }
        }
+
+        pb.set_message(format!(
+            "inserted={} skipped={} errors={}",
+            inserted_count, skipped_count, error_count
+        ));
+        pb.inc(chunk.len() as u64);
    }

-    info!("\n=== Import Summary ===");
+    pb.finish_and_clear();
+
+    info!("=== Import Summary ===");
    info!("Total locations found: {}", locations.len());
    info!("Successfully inserted: {}", inserted_count);
    info!("Skipped (already exist): {}", skipped_count);
    info!("Errors: {}", error_count);

+    if error_count > 0 {
+        error!(
+            "Completed with {} errors — review log output above",
+            error_count
+        );
+    }
+
    Ok(())
 }
@@ -2,9 +2,10 @@ use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Parser;
 use image_api::ai::ollama::OllamaClient;
+use image_api::bin_progress;
 use image_api::database::search_dao::{InsertSearchRecord, SqliteSearchHistoryDao};
 use image_api::parsers::search_html_parser::parse_search_html;
-use log::{error, info, warn};
+use log::{error, info};

 // Import the trait to use its methods
 use image_api::database::SearchHistoryDao;
@@ -49,24 +50,22 @@ async fn main() -> Result<()> {
    let ollama = OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model);
    let context = opentelemetry::Context::current();

-    let mut inserted_count = 0;
-    let mut skipped_count = 0;
-    let mut error_count = 0;
+    let mut inserted_count = 0usize;
+    let mut skipped_count = 0usize;
+    let mut error_count = 0usize;

    let mut dao_instance = SqliteSearchHistoryDao::new();
    let created_at = Utc::now().timestamp();

+    let pb = bin_progress::determinate(searches.len() as u64, "importing");
+    let total_batches = searches.len().div_ceil(args.batch_size);
+
    // Process searches in batches (embeddings are REQUIRED for searches)
    for (batch_idx, chunk) in searches.chunks(args.batch_size).enumerate() {
-        info!(
-            "Processing batch {} ({} searches)...",
-            batch_idx + 1,
-            chunk.len()
-        );
-
        // Generate embeddings for this batch
        let queries: Vec<String> = chunk.iter().map(|s| s.query.clone()).collect();

+        let pb_for_warn = pb.clone();
        let embeddings_result = tokio::task::spawn({
            let ollama_client = ollama.clone();
            async move {
@@ -76,7 +75,7 @@ async fn main() -> Result<()> {
                    match ollama_client.generate_embedding(query).await {
                        Ok(emb) => embeddings.push(Some(emb)),
                        Err(e) => {
-                            warn!("Failed to generate embedding for query '{}': {}", query, e);
+                            pb_for_warn.println(format!("embedding failed for '{}': {}", query, e));
                            embeddings.push(None);
                        }
                    }
@@ -112,10 +111,7 @@ async fn main() -> Result<()> {
                    source_file: Some(args.path.clone()),
                });
            } else {
-                error!(
-                    "Skipping search '{}' due to missing embedding",
-                    search.query
-                );
+                pb.println(format!("skipping '{}' — missing embedding", search.query));
                error_count += 1;
            }
        }
@@ -123,30 +119,41 @@ async fn main() -> Result<()> {
        // Batch insert entire chunk in single transaction
        if !batch_inserts.is_empty() {
            match dao_instance.store_searches_batch(&context, batch_inserts) {
-                Ok(count) => {
-                    inserted_count += count;
-                    info!("Imported {} searches (total: {})...", count, inserted_count);
-                }
+                Ok(count) => inserted_count += count,
                Err(e) => {
-                    error!("Failed to store batch: {:?}", e);
+                    pb.println(format!("batch insert failed: {:?}", e));
                    error_count += chunk.len();
                }
            }
        }

+        pb.set_message(format!(
+            "inserted={} skipped={} errors={}",
+            inserted_count, skipped_count, error_count
+        ));
+        pb.inc(chunk.len() as u64);
+
        // Rate limiting between batches
-        if batch_idx < searches.len() / args.batch_size {
-            info!("Waiting 500ms before next batch...");
+        if batch_idx + 1 < total_batches {
            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
        }
    }

-    info!("\n=== Import Summary ===");
+    pb.finish_and_clear();
+
+    info!("=== Import Summary ===");
    info!("Total searches found: {}", searches.len());
    info!("Successfully inserted: {}", inserted_count);
    info!("Skipped (already exist): {}", skipped_count);
    info!("Errors: {}", error_count);
    info!("All imported searches have embeddings for semantic search");

+    if error_count > 0 {
+        error!(
+            "Completed with {} errors — review log output above",
+            error_count
+        );
+    }
+
    Ok(())
 }
@@ -1,195 +0,0 @@
-use std::path::PathBuf;
-use std::sync::{Arc, Mutex};
-
-use chrono::Utc;
-use clap::Parser;
-use rayon::prelude::*;
-use walkdir::WalkDir;
-
-use image_api::database::models::InsertImageExif;
-use image_api::database::{ExifDao, SqliteExifDao};
-use image_api::exif;
-
-#[derive(Parser, Debug)]
-#[command(name = "migrate_exif")]
-#[command(about = "Extract and store EXIF data from images", long_about = None)]
-struct Args {
-    #[arg(long, help = "Skip files that already have EXIF data in database")]
-    skip_existing: bool,
-}
-
-fn main() -> anyhow::Result<()> {
-    env_logger::init();
-    dotenv::dotenv()?;
-
-    let args = Args::parse();
-    let base_path = dotenv::var("BASE_PATH")?;
-    let base = PathBuf::from(&base_path);
-
-    println!("EXIF Migration Tool");
-    println!("===================");
-    println!("Base path: {}", base.display());
-    if args.skip_existing {
-        println!("Mode: Skip existing (incremental)");
-    } else {
-        println!("Mode: Upsert (insert new, update existing)");
-    }
-    println!();
-
-    // Collect all image files that support EXIF
-    println!("Scanning for images...");
-    let image_files: Vec<PathBuf> = WalkDir::new(&base)
-        .into_iter()
-        .filter_map(|e| e.ok())
-        .filter(|e| e.file_type().is_file())
-        .filter(|e| exif::supports_exif(e.path()))
-        .map(|e| e.path().to_path_buf())
-        .collect();
-
-    println!("Found {} images to process", image_files.len());
-
-    if image_files.is_empty() {
-        println!("No EXIF-supporting images found. Exiting.");
-        return Ok(());
-    }
-
-    println!();
-    println!("Extracting EXIF data...");
-
-    // Create a thread-safe DAO
-    let dao = Arc::new(Mutex::new(SqliteExifDao::new()));
-
-    // Process in parallel using rayon
-    let results: Vec<_> = image_files
-        .par_iter()
-        .map(|path| {
-            // Create context for this processing iteration
-            let context = opentelemetry::Context::new();
-
-            let relative_path = match path.strip_prefix(&base) {
-                Ok(p) => p.to_str().unwrap().to_string(),
-                Err(_) => {
-                    eprintln!(
-                        "Error: Could not create relative path for {}",
-                        path.display()
-                    );
-                    return Err(anyhow::anyhow!("Path error"));
-                }
-            };
-
-            // Check if EXIF data already exists
-            let existing = if let Ok(mut dao_lock) = dao.lock() {
-                dao_lock.get_exif(&context, &relative_path).ok().flatten()
-            } else {
-                eprintln!("✗ {} - Failed to acquire database lock", relative_path);
-                return Err(anyhow::anyhow!("Lock error"));
-            };
-
-            // Skip if exists and skip_existing flag is set
-            if args.skip_existing && existing.is_some() {
-                return Ok(("skip".to_string(), relative_path));
-            }
-
-            match exif::extract_exif_from_path(path) {
-                Ok(exif_data) => {
-                    let timestamp = Utc::now().timestamp();
-                    let insert_exif = InsertImageExif {
-                        file_path: relative_path.clone(),
-                        camera_make: exif_data.camera_make,
-                        camera_model: exif_data.camera_model,
-                        lens_model: exif_data.lens_model,
-                        width: exif_data.width,
-                        height: exif_data.height,
-                        orientation: exif_data.orientation,
-                        gps_latitude: exif_data.gps_latitude.map(|v| v as f32),
-                        gps_longitude: exif_data.gps_longitude.map(|v| v as f32),
-                        gps_altitude: exif_data.gps_altitude.map(|v| v as f32),
-                        focal_length: exif_data.focal_length.map(|v| v as f32),
-                        aperture: exif_data.aperture.map(|v| v as f32),
-                        shutter_speed: exif_data.shutter_speed,
-                        iso: exif_data.iso,
-                        date_taken: exif_data.date_taken,
-                        created_time: existing
-                            .as_ref()
-                            .map(|e| e.created_time)
-                            .unwrap_or(timestamp),
-                        last_modified: timestamp,
-                    };
-
-                    // Store or update in database
-                    if let Ok(mut dao_lock) = dao.lock() {
-                        let result = if existing.is_some() {
-                            // Update existing record
-                            dao_lock
-                                .update_exif(&context, insert_exif)
-                                .map(|_| "update")
-                        } else {
-                            // Insert new record
-                            dao_lock.store_exif(&context, insert_exif).map(|_| "insert")
-                        };
-
-                        match result {
-                            Ok(action) => {
-                                if action == "update" {
-                                    println!("↻ {} (updated)", relative_path);
-                                } else {
-                                    println!("✓ {} (inserted)", relative_path);
-                                }
-                                Ok((action.to_string(), relative_path))
-                            }
-                            Err(e) => {
-                                eprintln!("✗ {} - Database error: {:?}", relative_path, e);
-                                Err(anyhow::anyhow!("Database error"))
-                            }
-                        }
-                    } else {
-                        eprintln!("✗ {} - Failed to acquire database lock", relative_path);
-                        Err(anyhow::anyhow!("Lock error"))
-                    }
-                }
-                Err(e) => {
-                    eprintln!("✗ {} - No EXIF data: {:?}", relative_path, e);
-                    Err(e)
-                }
-            }
-        })
-        .collect();
-
-    // Count results
-    let mut success_count = 0;
-    let mut inserted_count = 0;
-    let mut updated_count = 0;
-    let mut skipped_count = 0;
-
-    for (action, _) in results.iter().flatten() {
-        success_count += 1;
-        match action.as_str() {
-            "insert" => inserted_count += 1,
-            "update" => updated_count += 1,
-            "skip" => skipped_count += 1,
-            _ => {}
-        }
-    }
-
-    let error_count = results.len() - success_count - skipped_count;
-
-    println!();
-    println!("===================");
-    println!("Migration complete!");
-    println!("Total images processed: {}", image_files.len());
-
-    if inserted_count > 0 {
-        println!("  New EXIF records inserted: {}", inserted_count);
-    }
-    if updated_count > 0 {
-        println!("  Existing records updated: {}", updated_count);
-    }
-    if skipped_count > 0 {
-        println!("  Skipped (already exists): {}", skipped_count);
-    }
-    if error_count > 0 {
-        println!("  Errors (no EXIF data or failures): {}", error_count);
-    }
-
-    Ok(())
-}
@@ -1,16 +1,22 @@
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
 use std::sync::{Arc, Mutex};

 use clap::Parser;
+use log::warn;
 use walkdir::WalkDir;

+use image_api::ai::apollo_client::ApolloClient;
 use image_api::ai::{InsightGenerator, OllamaClient, SmsApiClient};
+use image_api::bin_progress;
 use image_api::database::{
    CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao,
    SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, SqliteExifDao,
    SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao,
+    connect,
 };
+use image_api::faces::{FaceDao, SqliteFaceDao};
 use image_api::file_types::{IMAGE_EXTENSIONS, VIDEO_EXTENSIONS};
+use image_api::libraries::{self, Library};
 use image_api::tags::{SqliteTagDao, TagDao};

 #[derive(Parser, Debug)]
@@ -19,7 +25,13 @@ use image_api::tags::{SqliteTagDao, TagDao};
    about = "Batch populate the knowledge base by running the agentic insight loop over a folder"
 )]
 struct Args {
-    /// Directory to scan. Defaults to BASE_PATH from .env
+    /// Restrict to a single library by numeric id or name. Defaults to all
+    /// configured libraries.
+    #[arg(long)]
+    library: Option<String>,
+
+    /// Optional subdirectory to scan instead of full library roots. Must be
+    /// an absolute path under one of the selected libraries.
    #[arg(long)]
    path: Option<String>,

@@ -67,10 +79,57 @@ async fn main() -> anyhow::Result<()> {

    let args = Args::parse();

-    let base_path = dotenv::var("BASE_PATH")?;
-    let scan_path = args.path.as_deref().unwrap_or(&base_path).to_string();
+    // Load libraries from the DB. Patch the placeholder row from BASE_PATH
+    // first when present so a fresh install still gets a valid root.
+    let env_base_path = dotenv::var("BASE_PATH").ok();
+    let mut seed_conn = connect();
+    if let Some(base) = env_base_path.as_deref() {
+        libraries::seed_or_patch_from_env(&mut seed_conn, base);
+    }
+    let all_libs = libraries::load_all(&mut seed_conn);
+    drop(seed_conn);
+    if all_libs.is_empty() {
+        anyhow::bail!("No libraries configured");
+    }

-    // Ollama config from env with CLI overrides
+    // Resolve --library to a concrete subset.
+    let selected_libs: Vec<Library> = match args.library.as_deref() {
+        None => all_libs.clone(),
+        Some(raw) => {
+            let raw = raw.trim();
+            let matched = if let Ok(id) = raw.parse::<i32>() {
+                all_libs.iter().find(|l| l.id == id).cloned()
+            } else {
+                all_libs.iter().find(|l| l.name == raw).cloned()
+            };
+            match matched {
+                Some(lib) => vec![lib],
+                None => anyhow::bail!("Unknown library: {}", raw),
+            }
+        }
+    };
+
+    // Resolve --path to (target_library, walk_root). When provided, the path
+    // must live under exactly one of the selected libraries.
+    let scan_targets: Vec<(Library, PathBuf)> = match args.path.as_deref() {
+        None => selected_libs
+            .iter()
+            .map(|lib| (lib.clone(), PathBuf::from(&lib.root_path)))
+            .collect(),
+        Some(raw) => {
+            let abs = PathBuf::from(raw);
+            let matched = selected_libs
+                .iter()
+                .find(|lib| abs.starts_with(&lib.root_path))
+                .cloned();
+            match matched {
+                Some(lib) => vec![(lib, abs)],
+                None => anyhow::bail!("--path {} is not under any selected library root", raw),
+            }
+        }
+    };
+
+    // Ollama config from env with CLI overrides.
    let primary_url = std::env::var("OLLAMA_PRIMARY_URL")
        .or_else(|_| std::env::var("OLLAMA_URL"))
        .unwrap_or_else(|_| "http://localhost:11434".to_string());
@@ -106,8 +165,8 @@ async fn main() -> anyhow::Result<()> {
        std::env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
    let sms_api_token = std::env::var("SMS_API_TOKEN").ok();
    let sms_client = SmsApiClient::new(sms_api_url, sms_api_token);
+    let apollo_client = ApolloClient::new(std::env::var("APOLLO_API_BASE_URL").ok());

-    // Wire up all DAOs
    let insight_dao: Arc<Mutex<Box<dyn InsightDao>>> =
        Arc::new(Mutex::new(Box::new(SqliteInsightDao::new())));
    let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> =
@@ -124,10 +183,20 @@ async fn main() -> anyhow::Result<()> {
        Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
    let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
        Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
+    let face_dao: Arc<Mutex<Box<dyn FaceDao>>> =
+        Arc::new(Mutex::new(Box::new(SqliteFaceDao::new())));
+    let persona_dao: Arc<Mutex<Box<dyn image_api::database::PersonaDao>>> = Arc::new(Mutex::new(
+        Box::new(image_api::database::SqlitePersonaDao::new()),
+    ));

+    // Pass the full library set so `resolve_full_path` probes every root,
+    // even when --library restricts the walk. A rel_path shared across
+    // libraries will resolve against the first existing match.
    let generator = InsightGenerator::new(
        ollama,
+        None,
        sms_client,
+        apollo_client,
        insight_dao.clone(),
        exif_dao,
        daily_summary_dao,
@@ -135,13 +204,18 @@ async fn main() -> anyhow::Result<()> {
        location_dao,
        search_dao,
        tag_dao,
+        face_dao,
        knowledge_dao,
-        base_path.clone(),
+        persona_dao,
+        all_libs.clone(),
    );

    println!("Knowledge Base Population");
    println!("=========================");
-    println!("Scan path:      {}", scan_path);
+    for (lib, root) in &scan_targets {
+        println!("Library:        {} (id={})", lib.name, lib.id);
+        println!("Scan root:      {}", root.display());
+    }
    println!("Model:          {}", primary_model);
    println!("Max iterations: {}", args.max_iterations);
    println!("Timeout:        {}s", args.timeout_secs);
@@ -170,30 +244,56 @@ async fn main() -> anyhow::Result<()> {
    );
    println!();

-    // Collect all image and video files
    let all_extensions: Vec<&str> = IMAGE_EXTENSIONS
        .iter()
        .chain(VIDEO_EXTENSIONS.iter())
        .copied()
        .collect();

-    println!("Scanning {}...", scan_path);
-    let files: Vec<PathBuf> = WalkDir::new(&scan_path)
-        .into_iter()
-        .filter_map(|e| e.ok())
-        .filter(|e| e.file_type().is_file())
-        .filter(|e| {
-            e.path()
+    // Collect (library, abs_path, rel_path) for every media file across all
+    // scan targets so the progress counter spans the full job.
+    let mut files: Vec<(Library, PathBuf, String)> = Vec::new();
+    for (lib, walk_root) in &scan_targets {
+        let lib_root = Path::new(&lib.root_path);
+        let scan_pb = bin_progress::spinner(format!("scanning {}", walk_root.display()));
+        let count_before = files.len();
+        for entry in WalkDir::new(walk_root).into_iter().filter_map(|e| e.ok()) {
+            if !entry.file_type().is_file() {
+                continue;
+            }
+            let abs_path = entry.path().to_path_buf();
+            let ext_ok = abs_path
                .extension()
                .and_then(|ext| ext.to_str())
                .map(|ext| all_extensions.contains(&ext.to_lowercase().as_str()))
-                .unwrap_or(false)
-        })
-        .map(|e| e.path().to_path_buf())
-        .collect();
+                .unwrap_or(false);
+            if !ext_ok {
+                continue;
+            }
+            let rel = match abs_path.strip_prefix(lib_root) {
+                Ok(p) => p.to_string_lossy().replace('\\', "/"),
+                Err(_) => {
+                    warn!(
+                        "{} is not under library root {}; skipping",
+                        abs_path.display(),
+                        lib_root.display()
+                    );
+                    continue;
+                }
+            };
+            files.push((lib.clone(), abs_path, rel));
+            scan_pb.inc(1);
+        }
+        let added = files.len() - count_before;
+        scan_pb.finish_with_message(format!(
+            "scanned {} ({} media files)",
+            walk_root.display(),
+            added
+        ));
+    }

    let total = files.len();
-    println!("Found {} files\n", total);
+    println!("\nTotal files to consider: {}\n", total);

    if total == 0 {
        println!("Nothing to process.");
@@ -205,35 +305,29 @@ async fn main() -> anyhow::Result<()> {
    let mut skipped = 0usize;
    let mut errors = 0usize;

-    for (i, path) in files.iter().enumerate() {
-        let relative = match path.strip_prefix(&base_path) {
-            Ok(p) => p.to_string_lossy().replace('\\', "/"),
-            Err(_) => path.to_string_lossy().replace('\\', "/"),
-        };
+    let pb = bin_progress::determinate(total as u64, "");

-        let prefix = format!("[{}/{}]", i + 1, total);
+    for (lib, _abs_path, relative) in files.iter() {
+        pb.set_message(format!("{}: {}", lib.name, relative));

-        // Check for existing insight unless --reprocess
        if !args.reprocess {
            let has_insight = insight_dao
                .lock()
                .unwrap()
-                .get_insight(&cx, &relative)
+                .get_insight(&cx, relative)
                .unwrap_or(None)
                .is_some();

            if has_insight {
-                println!("{} skip  {}", prefix, relative);
                skipped += 1;
+                pb.inc(1);
                continue;
            }
        }

-        println!("{} start {}", prefix, relative);
-
        match generator
            .generate_agentic_insight_for_photo(
-                &relative,
+                relative,
                args.model.clone(),
                None,
                args.num_ctx,
@@ -242,20 +336,25 @@ async fn main() -> anyhow::Result<()> {
                args.top_k,
                args.min_p,
                args.max_iterations,
+                None,
+                Vec::new(),
+                Vec::new(),
+                1, // operator user_id — populate_knowledge is single-user offline tool
+                "default".to_string(),
            )
            .await
        {
-            Ok(_) => {
-                println!("{} done  {}", prefix, relative);
-                processed += 1;
-            }
+            Ok(_) => processed += 1,
            Err(e) => {
-                eprintln!("{} error {} — {:?}", prefix, relative, e);
+                pb.println(format!("error  {}: {} — {:?}", lib.name, relative, e));
                errors += 1;
            }
        }
+        pb.inc(1);
    }

+    pb.finish_and_clear();
+
    println!();
    println!("=========================");
    println!("Complete");
@@ -0,0 +1,250 @@
+//! Probe binary for RAM++ auto-tagging.
+//!
+//! No DB writes. Walks a library's `image_exif` rows, sends a sample
+//! through Apollo's `/api/internal/tags/auto`, and prints `(path, tags)`
+//! to stdout so the operator can eyeball whether the model's vocabulary
+//! and threshold defaults are appropriate for this library before
+//! committing to the persistence phase (new table, per-tick drain, UI).
+//!
+//! Usage:
+//!     cargo run --release --bin probe_auto_tags -- \
+//!         --library 1 --limit 50 --threshold 0.7
+//!
+//! Env: standard ImageApi `.env`. Requires either
+//! `APOLLO_TAG_API_BASE_URL` or `APOLLO_API_BASE_URL` to be set
+//! (otherwise the client is disabled and the probe bails).
+
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, Mutex};
+use std::time::Instant;
+
+use clap::Parser;
+use log::{info, warn};
+
+use image_api::ai::tag_client::{TagClient, TagDetectError, TagMeta};
+use image_api::database::{ExifDao, SqliteExifDao, connect};
+use image_api::exif;
+use image_api::file_types;
+use image_api::libraries::{self, Library};
+
+#[derive(Parser, Debug)]
+#[command(name = "probe_auto_tags")]
+#[command(about = "Print RAM++ auto-tags for a sample of image_exif rows")]
+struct Args {
+    /// Library id to sample from.
+    #[arg(long)]
+    library: i32,
+
+    /// Max files to probe. The binary scans more rows internally because
+    /// non-image rows (videos, junk) are skipped client-side.
+    #[arg(long, default_value_t = 25)]
+    limit: usize,
+
+    /// Per-call threshold sent to Apollo. Overrides the engine default.
+    /// Lower = more tags per photo, more noise. 0.5–0.75 is the useful
+    /// sweep range for ram_plus_swin_large_14m.
+    #[arg(long, default_value_t = 0.65)]
+    threshold: f32,
+
+    /// Offset into the library's rel_path listing (sorted by id ASC).
+    /// Bump on re-runs to sample a different slice.
+    #[arg(long, default_value_t = 0)]
+    offset: i64,
+
+    /// How many DB rows to scan before giving up on hitting the limit.
+    /// Useful when a library is mostly videos.
+    #[arg(long, default_value_t = 2000)]
+    max_scan: i64,
+}
+
+/// Mirror of `face_watch::read_image_bytes_for_detect` — it's pub(crate)
+/// so we can't import it across the bin boundary. The probe is throwaway
+/// scope; inlining is cleaner than changing the visibility.
+fn read_image_bytes(path: &Path) -> std::io::Result<Vec<u8>> {
+    if file_types::needs_ffmpeg_thumbnail(path)
+        && let Some(preview) = exif::extract_embedded_jpeg_preview(path)
+    {
+        return Ok(preview);
+    }
+    std::fs::read(path)
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    env_logger::init();
+    dotenv::dotenv().ok();
+
+    let args = Args::parse();
+
+    let client = TagClient::from_env();
+    if !client.is_enabled() {
+        anyhow::bail!(
+            "TagClient disabled: set APOLLO_TAG_API_BASE_URL or APOLLO_API_BASE_URL in .env"
+        );
+    }
+
+    // Quick health probe so we fail fast on a misconfig before grinding
+    // through a thousand rows.
+    match client.health().await {
+        Ok(h) => info!(
+            "tag engine: loaded={} device={} model={} threshold_default={}",
+            h.loaded, h.device, h.model_version, h.threshold
+        ),
+        Err(e) => warn!("health probe failed (continuing): {e}"),
+    }
+
+    let mut seed_conn = connect();
+    if let Some(base) = dotenv::var("BASE_PATH").ok().as_deref() {
+        libraries::seed_or_patch_from_env(&mut seed_conn, base);
+    }
+    let libs = libraries::load_all(&mut seed_conn);
+    drop(seed_conn);
+    let lib: Library = libs
+        .into_iter()
+        .find(|l| l.id == args.library)
+        .ok_or_else(|| anyhow::anyhow!("library id {} not found", args.library))?;
+    info!("probing library #{} ({}) at {}", lib.id, lib.name, lib.root_path);
+
+    let dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
+    let ctx = opentelemetry::Context::new();
+
+    // Paginate through (id, rel_path) for this library, filter to images
+    // on disk, take `limit`. Page size is tuned so we don't slam the DB
+    // when a library is video-heavy.
+    const PAGE: i64 = 500;
+    let mut offset = args.offset;
+    let mut scanned: i64 = 0;
+    let mut probed = 0usize;
+    let mut ok_count = 0usize;
+    let mut empty_count = 0usize;
+    let mut perm_fail = 0usize;
+    let mut transient_fail = 0usize;
+    let started = Instant::now();
+    let root = PathBuf::from(&lib.root_path);
+
+    'outer: loop {
+        if scanned >= args.max_scan {
+            warn!(
+                "scan cap ({}) reached before hitting limit ({}); bump --max-scan to scan deeper",
+                args.max_scan, args.limit
+            );
+            break;
+        }
+        let rows = {
+            let mut guard = dao.lock().expect("dao lock");
+            guard
+                .list_rel_paths_for_library_page(&ctx, lib.id, PAGE, offset)
+                .map_err(|e| anyhow::anyhow!("list rel_paths: {:?}", e))?
+        };
+        if rows.is_empty() {
+            info!("no more rows after offset {}", offset);
+            break;
+        }
+        offset += rows.len() as i64;
+        scanned += rows.len() as i64;
+
+        for (_id, rel_path) in rows {
+            if probed >= args.limit {
+                break 'outer;
+            }
+            let abs = root.join(&rel_path);
+            // Skip non-images and videos at the path level — same logic
+            // the face backlog drain uses, just inlined.
+            if !file_types::is_image_file(&abs) {
+                continue;
+            }
+            if !abs.exists() {
+                continue;
+            }
+            let bytes = match read_image_bytes(&abs) {
+                Ok(b) => b,
+                Err(e) => {
+                    warn!("read {rel_path}: {e}");
+                    continue;
+                }
+            };
+            // The probe doesn't need a real content_hash — Apollo only
+            // logs it. Pass an empty marker so we don't trip on no-hash
+            // image_exif rows.
+            let meta = TagMeta {
+                content_hash: String::new(),
+                library_id: lib.id,
+                rel_path: rel_path.clone(),
+                threshold: Some(args.threshold),
+            };
+
+            let call_start = Instant::now();
+            match client.auto_tag(bytes, meta).await {
+                Ok(resp) => {
+                    probed += 1;
+                    if resp.tags.is_empty() {
+                        empty_count += 1;
+                        println!(
+                            "[{:>3}] (no tags) {}ms  {}",
+                            probed, resp.duration_ms, rel_path
+                        );
+                    } else {
+                        ok_count += 1;
+                        let preview = resp
+                            .tags
+                            .iter()
+                            .map(|t| format!("{}({:.2})", t.name, t.confidence))
+                            .collect::<Vec<_>>()
+                            .join(", ");
+                        println!(
+                            "[{:>3}] {} tags {}ms  {}\n      {}",
+                            probed,
+                            resp.tags.len(),
+                            resp.duration_ms,
+                            rel_path,
+                            preview
+                        );
+                    }
+                }
+                Err(TagDetectError::Permanent(e)) => {
+                    probed += 1;
+                    perm_fail += 1;
+                    println!(
+                        "[{:>3}] PERMANENT FAIL ({:>4}ms) {}\n      {}",
+                        probed,
+                        call_start.elapsed().as_millis(),
+                        rel_path,
+                        e
+                    );
+                }
+                Err(TagDetectError::Transient(e)) => {
+                    probed += 1;
+                    transient_fail += 1;
+                    println!(
+                        "[{:>3}] TRANSIENT FAIL ({:>4}ms) {}\n      {}",
+                        probed,
+                        call_start.elapsed().as_millis(),
+                        rel_path,
+                        e
+                    );
+                }
+                Err(TagDetectError::Disabled) => {
+                    anyhow::bail!("tag client became disabled mid-run; impossible");
+                }
+            }
+        }
+    }
+
+    let elapsed = started.elapsed();
+    println!();
+    println!("── summary ───────────────────────────────────────");
+    println!("scanned rows         : {scanned}");
+    println!("probed files         : {probed}");
+    println!("  with tags          : {ok_count}");
+    println!("  empty (no tags)    : {empty_count}");
+    println!("  permanent failures : {perm_fail}");
+    println!("  transient failures : {transient_fail}");
+    println!("elapsed              : {:.1}s", elapsed.as_secs_f32());
+    if probed > 0 {
+        println!(
+            "throughput           : {:.2} photos/s",
+            probed as f32 / elapsed.as_secs_f32().max(0.001)
+        );
+    }
+    Ok(())
+}
@@ -1,7 +1,10 @@
 use anyhow::Result;
 use chrono::NaiveDate;
 use clap::Parser;
-use image_api::ai::{OllamaClient, SmsApiClient, strip_summary_boilerplate};
+use image_api::ai::{
+    EMBEDDING_MODEL, OllamaClient, SmsApiClient, build_daily_summary_prompt,
+    strip_summary_boilerplate, user_display_name,
+};
 use image_api::database::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
 use std::env;
 use std::sync::{Arc, Mutex};
@@ -25,6 +28,26 @@ struct Args {
    #[arg(short, long)]
    model: Option<String>,

+    /// Context window size passed as Ollama `num_ctx`. Omit for server default.
+    #[arg(long)]
+    num_ctx: Option<i32>,
+
+    /// Sampling temperature. Omit for server default.
+    #[arg(long)]
+    temperature: Option<f32>,
+
+    /// Top-p (nucleus) sampling. Omit for server default.
+    #[arg(long)]
+    top_p: Option<f32>,
+
+    /// Top-k sampling. Omit for server default.
+    #[arg(long)]
+    top_k: Option<i32>,
+
+    /// Min-p sampling. Omit for server default.
+    #[arg(long)]
+    min_p: Option<f32>,
+
    /// Test mode: Generate but don't save to database (shows output only)
    #[arg(short = 't', long, default_value_t = false)]
    test_mode: bool,
@@ -86,12 +109,28 @@ async fn main() -> Result<()> {
            .unwrap_or_else(|_| "nemotron-3-nano:30b".to_string())
    });

-    let ollama = OllamaClient::new(
+    let mut ollama = OllamaClient::new(
        ollama_primary_url,
        ollama_fallback_url.clone(),
        model_to_use.clone(),
        Some(model_to_use), // Use same model for fallback
    );
+    if let Some(ctx) = args.num_ctx {
+        ollama.set_num_ctx(Some(ctx));
+    }
+    if args.temperature.is_some()
+        || args.top_p.is_some()
+        || args.top_k.is_some()
+        || args.min_p.is_some()
+    {
+        ollama.set_sampling_params(args.temperature, args.top_p, args.top_k, args.min_p);
+    }
+
+    // Surface what's actually configured so comparison runs are auditable.
+    println!(
+        "num_ctx={:?} temperature={:?} top_p={:?} top_k={:?} min_p={:?}",
+        args.num_ctx, args.temperature, args.top_p, args.top_k, args.min_p
+    );

    let sms_api_url =
        env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
@@ -160,9 +199,14 @@ async fn main() -> Result<()> {
        println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");

        if args.verbose {
+            let user_name = user_display_name();
            println!("\nMessage preview:");
            for (i, msg) in messages.iter().take(3).enumerate() {
-                let sender = if msg.is_sent { "Me" } else { &msg.contact };
+                let sender: &str = if msg.is_sent {
+                    &user_name
+                } else {
+                    &msg.contact
+                };
                let preview = msg.body.chars().take(60).collect::<String>();
                println!("  {}. {}: {}...", i + 1, sender, preview);
            }
@@ -172,64 +216,11 @@ async fn main() -> Result<()> {
            println!();
        }

-        // Format messages for LLM
-        let messages_text: String = messages
-            .iter()
-            .take(200)
-            .map(|m| {
-                if m.is_sent {
-                    format!("Me: {}", m.body)
-                } else {
-                    format!("{}: {}", m.contact, m.body)
-                }
-            })
-            .collect::<Vec<_>>()
-            .join("\n");
-
-        let prompt = format!(
-            r#"Summarize this day's conversation between me and {}.
-
-CRITICAL FORMAT RULES:
- Do NOT start with "Based on the conversation..." or "Here is a summary..." or similar preambles
- Do NOT repeat the date at the beginning
- Start DIRECTLY with the content - begin with a person's name or action
- Write in past tense, as if recording what happened
-
-NARRATIVE (3-5 sentences):
- What specific topics, activities, or events were discussed?
- What places, people, or organizations were mentioned?
- What plans were made or decisions discussed?
- Clearly distinguish between what "I" did versus what {} did
-
-KEYWORDS (comma-separated):
-5-10 specific keywords that capture this conversation's unique content:
- Proper nouns (people, places, brands)
- Specific activities ("drum corps audition" not just "music")
- Distinctive terms that make this day unique
-
-Date: {} ({})
-Messages:
-{}
-
-YOUR RESPONSE (follow this format EXACTLY):
-Summary: [Start directly with content, NO preamble]
-
-Keywords: [specific, unique terms]"#,
-            args.contact,
-            args.contact,
-            date.format("%B %d, %Y"),
-            weekday,
-            messages_text
-        );
+        let (prompt, system_prompt) = build_daily_summary_prompt(&args.contact, date, messages);

        println!("Generating summary...");

-        let summary = ollama
-            .generate(
-                &prompt,
-                Some("You are a conversation summarizer. Create clear, factual summaries with precise subject attribution AND extract distinctive keywords. Focus on specific, unique terms that differentiate this conversation from others."),
-            )
-            .await?;
+        let summary = ollama.generate(&prompt, Some(system_prompt)).await?;

        println!("\n📝 GENERATED SUMMARY:");
        println!("─────────────────────────────────────────");
@@ -256,8 +247,7 @@ Keywords: [specific, unique terms]"#,
                message_count: messages.len() as i32,
                embedding,
                created_at: chrono::Utc::now().timestamp(),
-                // model_version: "nomic-embed-text:v1.5".to_string(),
-                model_version: "mxbai-embed-large:335m".to_string(),
+                model_version: EMBEDDING_MODEL.to_string(),
            };

            let mut dao = summary_dao.lock().expect("Unable to lock DailySummaryDao");
@@ -0,0 +1,34 @@
+//! Shared progress-bar styling for the utility binaries. Centralised so every
+//! `cargo run --bin ...` tool gets the same look and feel.
+
+use indicatif::{ProgressBar, ProgressStyle};
+
+const DETERMINATE_TEMPLATE: &str = "{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] \
+     {human_pos}/{human_len} ({percent}%) {per_sec} eta {eta} {msg}";
+
+const SPINNER_TEMPLATE: &str = "{spinner:.green} [{elapsed_precise}] {human_pos} {per_sec} {msg}";
+
+/// Determinate progress bar used when the total work is known up front.
+pub fn determinate(total: u64, message: impl Into<String>) -> ProgressBar {
+    let pb = ProgressBar::new(total);
+    pb.set_style(
+        ProgressStyle::with_template(DETERMINATE_TEMPLATE)
+            .expect("hard-coded template parses")
+            .progress_chars("=> "),
+    );
+    pb.set_message(message.into());
+    pb
+}
+
+/// Spinner used for open-ended work (e.g. paginated DB scans that loop until
+/// empty). Throughput is shown via `{per_sec}`; tick at a steady cadence so
+/// it animates even when work is bursty.
+pub fn spinner(message: impl Into<String>) -> ProgressBar {
+    let pb = ProgressBar::new_spinner();
+    pb.set_style(
+        ProgressStyle::with_template(SPINNER_TEMPLATE).expect("hard-coded template parses"),
+    );
+    pb.set_message(message.into());
+    pb.enable_steady_tick(std::time::Duration::from_millis(120));
+    pb
+}
@@ -1,8 +1,9 @@
+use crate::bin_progress;
 use crate::cleanup::database_updater::DatabaseUpdater;
 use crate::cleanup::types::{CleanupConfig, CleanupStats};
 use crate::file_types::IMAGE_EXTENSIONS;
 use anyhow::Result;
-use log::{error, warn};
+use log::error;
 use std::path::PathBuf;

 // All supported image extensions to try
@@ -25,15 +26,17 @@ pub fn resolve_missing_files(

    stats.files_checked = all_paths.len();

-    println!("Checking file existence...");
    let mut missing_count = 0;
    let mut resolved_count = 0;

+    let pb = bin_progress::determinate(stats.files_checked as u64, "checking");
+
    for path_str in all_paths {
        let full_path = config.base_path.join(&path_str);

        // Check if file exists
        if full_path.exists() {
+            pb.inc(1);
            continue;
        }

@@ -43,16 +46,16 @@ pub fn resolve_missing_files(
        // Try to find the file with different extensions
        match find_file_with_alternative_extension(&config.base_path, &path_str) {
            Some(new_path_str) => {
-                println!(
-                    "✓ {} → found as {} {}",
+                pb.println(format!(
+                    "✓ {} → found as {}{}",
                    path_str,
                    new_path_str,
                    if config.dry_run {
-                        "(dry-run, not updated)"
+                        " (dry-run, not updated)"
                    } else {
                        ""
                    }
-                );
+                ));

                if !config.dry_run {
                    // Update database
@@ -71,11 +74,18 @@ pub fn resolve_missing_files(
                }
            }
            None => {
-                warn!("✗ {} → not found with any extension", path_str);
+                pb.println(format!("✗ {} — not found with any extension", path_str));
            }
        }
+        pb.set_message(format!(
+            "missing={} resolved={}",
+            missing_count, resolved_count
+        ));
+        pb.inc(1);
    }

+    pb.finish_and_clear();
+
    println!("\nResults:");
    println!("- Files checked: {}", stats.files_checked);
    println!("- Missing files: {}", missing_count);
@@ -1,7 +1,9 @@
+use crate::bin_progress;
 use crate::cleanup::database_updater::DatabaseUpdater;
 use crate::cleanup::file_type_detector::{detect_file_type, should_rename};
 use crate::cleanup::types::{CleanupConfig, CleanupStats};
 use anyhow::Result;
+use indicatif::ProgressBar;
 use log::{error, warn};
 use std::fs;
 use std::path::{Path, PathBuf};
@@ -32,16 +34,20 @@ pub fn validate_file_types(
    println!("Files found: {}\n", files.len());
    stats.files_checked = files.len();

-    println!("Detecting file types...");
    let mut mismatches_found = 0;
    let mut files_renamed = 0;
    let mut user_skipped = 0;

+    let pb = bin_progress::determinate(files.len() as u64, "detecting");
+
    for file_path in files {
        // Get current extension
        let current_ext = match file_path.extension() {
            Some(ext) => ext.to_str().unwrap_or(""),
-            None => continue, // Skip files without extensions
+            None => {
+                pb.inc(1);
+                continue;
+            }
        };

        // Detect actual file type
@@ -57,14 +63,15 @@ pub fn validate_file_types(
                        Ok(rel) => rel.to_str().unwrap_or(""),
                        Err(_) => {
                            error!("Failed to get relative path for {:?}", file_path);
+                            pb.inc(1);
                            continue;
                        }
                    };

-                    println!("\nFile type mismatch:");
-                    println!("  Path: {}", relative_path);
-                    println!("  Current: .{}", current_ext);
-                    println!("  Actual: .{}", detected_ext);
+                    pb.println(format!(
+                        "mismatch: {}  .{} → .{}",
+                        relative_path, current_ext, detected_ext
+                    ));

                    // Calculate new path
                    let new_file_path = file_path.with_extension(&detected_ext);
@@ -72,6 +79,7 @@ pub fn validate_file_types(
                        Ok(rel) => rel.to_str().unwrap_or(""),
                        Err(_) => {
                            error!("Failed to get new relative path for {:?}", new_file_path);
+                            pb.inc(1);
                            continue;
                        }
                    };
@@ -83,22 +91,26 @@ pub fn validate_file_types(
                            "Destination exists for {}: {}",
                            relative_path, new_relative_path
                        ));
+                        pb.inc(1);
                        continue;
                    }

                    // Determine if we should proceed
                    let should_proceed = if config.dry_run {
-                        println!("  (dry-run mode - would rename to {})", new_relative_path);
+                        pb.println(format!(
+                            "  (dry-run — would rename to {})",
+                            new_relative_path
+                        ));
                        false
                    } else if skip_all {
-                        println!("  Skipped (skip all)");
                        user_skipped += 1;
                        false
                    } else if auto_fix_all {
                        true
                    } else {
-                        // Interactive prompt
-                        match prompt_for_rename(new_relative_path) {
+                        // Interactive prompt — suspend the bar so the prompt is visible.
+                        let decision = pb.suspend(|| prompt_for_rename(new_relative_path, &pb));
+                        match decision {
                            RenameDecision::Yes => true,
                            RenameDecision::No => {
                                user_skipped += 1;
@@ -120,8 +132,6 @@ pub fn validate_file_types(
                        // Rename the file
                        match fs::rename(&file_path, &new_file_path) {
                            Ok(_) => {
-                                println!("✓ Renamed file");
-
                                // Update database
                                match db_updater.update_file_path(relative_path, new_relative_path)
                                {
@@ -160,8 +170,15 @@ pub fn validate_file_types(
                warn!("Failed to detect type for {:?}: {:?}", file_path, e);
            }
        }
+        pb.set_message(format!(
+            "mismatches={} renamed={} skipped={}",
+            mismatches_found, files_renamed, user_skipped
+        ));
+        pb.inc(1);
    }

+    pb.finish_and_clear();
+
    println!("\nResults:");
    println!("- Files scanned: {}", stats.files_checked);
    println!("- Mismatches found: {}", mismatches_found);
@@ -195,8 +212,9 @@ enum RenameDecision {
    SkipAll,
 }

-/// Prompt the user for rename decision
-fn prompt_for_rename(new_path: &str) -> RenameDecision {
+/// Prompt the user for rename decision. Caller must `pb.suspend` so the
+/// progress bar isn't redrawing over the prompt.
+fn prompt_for_rename(new_path: &str, _pb: &ProgressBar) -> RenameDecision {
    println!("\nRename to {}?", new_path);
    println!("  [y] Yes");
    println!("  [n] No (default)");
@@ -0,0 +1,143 @@
+//! Content-based file identity used to dedup derivative outputs
+//! (thumbnails, HLS segments) across libraries.
+//!
+//! Hashes are computed with blake3 streaming so that network-mounted
+//! libraries don't need to load whole files into memory. The result is
+//! a 64-character hex string; we shard derivative directories on the
+//! first two characters to keep any single directory's fanout bounded.
+
+use std::fs::File;
+use std::io::{self, Read};
+use std::path::{Path, PathBuf};
+
+/// Size of the read buffer used when streaming a file through blake3.
+/// 1 MiB trades a bit of RSS for fewer syscalls on slow network mounts.
+const HASH_BUFFER_SIZE: usize = 1024 * 1024;
+
+/// Hash identity of a file, together with its byte length.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct FileIdentity {
+    pub content_hash: String,
+    pub size_bytes: i64,
+}
+
+/// Stream a file through blake3 and return the hex-encoded digest + size.
+pub fn compute(path: &Path) -> io::Result<FileIdentity> {
+    let mut file = File::open(path)?;
+    let size_bytes = file.metadata()?.len() as i64;
+
+    let mut hasher = blake3::Hasher::new();
+    let mut buf = vec![0u8; HASH_BUFFER_SIZE];
+    loop {
+        let n = file.read(&mut buf)?;
+        if n == 0 {
+            break;
+        }
+        hasher.update(&buf[..n]);
+    }
+
+    Ok(FileIdentity {
+        content_hash: hasher.finalize().to_hex().to_string(),
+        size_bytes,
+    })
+}
+
+/// Hash-keyed thumbnail path: `<thumbs_dir>/<hash[..2]>/<hash>.jpg`.
+/// Generation and serving both consult this first; the legacy mirrored
+/// path acts as a fallback for pre-backfill rows.
+pub fn thumbnail_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
+    let shard = shard_prefix(hash);
+    thumbs_dir.join(shard).join(format!("{}.jpg", hash))
+}
+
+/// Hash-keyed HLS output directory: `<video_dir>/<hash[..2]>/<hash>/`.
+/// The playlist lives at `playlist.m3u8` inside this directory and its
+/// segments are co-located so HLS relative references Just Work.
+///
+/// Allow-dead until Branch B/C rewires the HLS pipeline to use it; the
+/// helper lives here today so Branch A's path layout decisions stay
+/// adjacent to thumbnail/legacy ones.
+#[allow(dead_code)]
+pub fn hls_dir(video_dir: &Path, hash: &str) -> PathBuf {
+    let shard = shard_prefix(hash);
+    video_dir.join(shard).join(hash)
+}
+
+/// Library-scoped legacy mirrored path:
+/// `<derivative_dir>/<library_id>/<rel_path>`. Used as the fallback when
+/// `content_hash` isn't available — the library prefix prevents the
+/// "lib1 wrote `vacation/IMG.jpg` first, lib2 sees thumb_path.exists()
+/// and serves the wrong image" failure mode.
+///
+/// Existing single-library deployments may already have thumbnails at the
+/// bare-legacy `<derivative_dir>/<rel_path>` shape; serving code is
+/// expected to check both this scoped path and the bare-legacy path so
+/// nothing 404s during the transition.
+pub fn library_scoped_legacy_path(
+    derivative_dir: &Path,
+    library_id: i32,
+    rel_path: impl AsRef<Path>,
+) -> PathBuf {
+    derivative_dir.join(library_id.to_string()).join(rel_path)
+}
+
+fn shard_prefix(hash: &str) -> &str {
+    let end = hash
+        .char_indices()
+        .nth(2)
+        .map(|(i, _)| i)
+        .unwrap_or(hash.len());
+    &hash[..end]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn identical_content_yields_identical_hash() {
+        let dir = tempfile::tempdir().unwrap();
+        let a = dir.path().join("a.bin");
+        let b = dir.path().join("b.bin");
+        std::fs::write(&a, b"hello world").unwrap();
+        std::fs::write(&b, b"hello world").unwrap();
+        let ha = compute(&a).unwrap();
+        let hb = compute(&b).unwrap();
+        assert_eq!(ha, hb);
+        assert_eq!(ha.size_bytes, 11);
+    }
+
+    #[test]
+    fn different_content_yields_different_hash() {
+        let dir = tempfile::tempdir().unwrap();
+        let a = dir.path().join("a.bin");
+        let b = dir.path().join("b.bin");
+        std::fs::write(&a, b"aaa").unwrap();
+        std::fs::write(&b, b"bbb").unwrap();
+        assert_ne!(compute(&a).unwrap(), compute(&b).unwrap());
+    }
+
+    #[test]
+    fn derivative_paths_shard_by_first_two_hex() {
+        let thumbs = Path::new("/tmp/thumbs");
+        let p = thumbnail_path(thumbs, "abcdef0123");
+        assert_eq!(p, PathBuf::from("/tmp/thumbs/ab/abcdef0123.jpg"));
+
+        let video = Path::new("/tmp/video");
+        let d = hls_dir(video, "1234deadbeef");
+        assert_eq!(d, PathBuf::from("/tmp/video/12/1234deadbeef"));
+    }
+
+    #[test]
+    fn library_scoped_legacy_path_prefixes_with_library_id() {
+        let thumbs = Path::new("/tmp/thumbs");
+        let p = library_scoped_legacy_path(thumbs, 7, "vacation/IMG.jpg");
+        assert_eq!(p, PathBuf::from("/tmp/thumbs/7/vacation/IMG.jpg"));
+
+        // Same rel_path, different library — different output. This is
+        // the whole point: lib 1 and lib 2 don't clobber each other.
+        let p1 = library_scoped_legacy_path(thumbs, 1, "vacation/IMG.jpg");
+        let p2 = library_scoped_legacy_path(thumbs, 2, "vacation/IMG.jpg");
+        assert_ne!(p1, p2);
+    }
+}
@@ -102,6 +102,12 @@ pub struct PhotosResponse {
    pub photos: Vec<String>,
    pub dirs: Vec<String>,

+    /// Library id for each entry in `photos`, same length and ordering.
+    /// Parallel array rather than an object per row to keep the payload
+    /// small and backwards-compatible with older clients.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub photo_libraries: Vec<i32>,
+
    // Pagination metadata (only present when limit is set)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub total_count: Option<i64>,
@@ -155,6 +161,19 @@ pub struct FilesRequest {
    // Pagination parameters (optional - backward compatible)
    pub limit: Option<i64>,
    pub offset: Option<i64>,
+
+    /// Optional library filter. Accepts a library id (e.g. "1") or name
+    /// (e.g. "main"). When omitted, results span all libraries.
+    pub library: Option<String>,
+
+    /// When true, include rows soft-marked as duplicates of another file
+    /// (i.e. `image_exif.duplicate_of_hash IS NOT NULL`). Default false —
+    /// the standard /photos listing hides demoted siblings so the grid
+    /// silently shrinks after a resolve. The Apollo duplicates modal
+    /// passes `true` so it can show both survivors and demoted members
+    /// inside a group.
+    #[serde(default)]
+    pub include_duplicates: Option<bool>,
 }

 #[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
@@ -187,7 +206,12 @@ pub struct ThumbnailRequest {
    #[allow(dead_code)] // Part of API contract, may be used in future
    pub(crate) format: Option<ThumbnailFormat>,
    #[serde(default)]
+    #[allow(dead_code)] // Part of API contract, may be used in future
    pub(crate) shape: Option<ThumbnailShape>,
+    /// Optional library filter. Accepts a library id (e.g. "1") or name
+    /// (e.g. "main"). When omitted, defaults to the primary library.
+    #[serde(default)]
+    pub(crate) library: Option<String>,
 }

 #[derive(Debug, Deserialize, PartialEq)]
@@ -231,6 +255,8 @@ pub struct MetadataResponse {
    pub size: u64,
    pub exif: Option<ExifMetadata>,
    pub filename_date: Option<i64>, // Date extracted from filename
+    pub library_id: Option<i32>,
+    pub library_name: Option<String>,
 }

 impl From<fs::Metadata> for MetadataResponse {
@@ -247,6 +273,8 @@ impl From<fs::Metadata> for MetadataResponse {
            size: metadata.len(),
            exif: None,
            filename_date: None, // Will be set in endpoint handler
+            library_id: None,
+            library_name: None,
        }
    }
 }
@@ -258,6 +286,16 @@ pub struct ExifMetadata {
    pub gps: Option<GpsCoordinates>,
    pub capture_settings: Option<CaptureSettings>,
    pub date_taken: Option<i64>,
+    /// Which step of the canonical-date waterfall populated `date_taken`:
+    /// `"exif" | "exiftool" | "filename" | "fs_time" | "manual"`. NULL when
+    /// `date_taken` itself is NULL.
+    pub date_taken_source: Option<String>,
+    /// When `date_taken_source = "manual"`, the prior `date_taken` snapshot.
+    /// Used by the UI's revert affordance and to label "manually overridden;
+    /// originally X" in the details modal.
+    pub original_date_taken: Option<i64>,
+    /// When `date_taken_source = "manual"`, the prior source.
+    pub original_date_taken_source: Option<String>,
 }

 #[derive(Debug, Serialize)]
@@ -342,6 +380,9 @@ impl From<ImageExif> for ExifMetadata {
                None
            },
            date_taken: exif.date_taken,
+            date_taken_source: exif.date_taken_source,
+            original_date_taken: exif.original_date_taken,
+            original_date_taken_source: exif.original_date_taken_source,
        }
    }
 }
@@ -371,6 +412,40 @@ pub struct GpsPhotosResponse {
    pub total: usize,
 }

+/// Single-row projection of `image_exif` rich enough to drive Apollo's
+/// photo-to-track matcher (and any similar window-scoped consumer) without
+/// a per-file `/image/metadata` round-trip. Returned by `/photos/exif`.
+#[derive(Debug, Serialize)]
+pub struct ExifSummary {
+    pub file_path: String,
+    pub library_id: i32,
+    pub library_name: Option<String>,
+    pub camera_model: Option<String>,
+    pub width: Option<i32>,
+    pub height: Option<i32>,
+    pub gps_latitude: Option<f64>,
+    pub gps_longitude: Option<f64>,
+    pub date_taken: Option<i64>,
+}
+
+#[derive(Debug, Serialize)]
+pub struct ExifBatchResponse {
+    pub photos: Vec<ExifSummary>,
+    pub total: usize,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct ExifBatchRequest {
+    /// Lower bound (inclusive) for `image_exif.date_taken`, unix seconds.
+    pub date_from: Option<i64>,
+    /// Upper bound (inclusive). Same semantics as `date_to` on `/photos`.
+    pub date_to: Option<i64>,
+    /// Restrict results to a single library by id. Omit (or "" / "all") for
+    /// union mode — the default. Filtered post-query in the handler so the
+    /// existing `query_by_exif` DAO trait stays untouched.
+    pub library: Option<String>,
+}
+
 #[derive(Deserialize)]
 pub struct PreviewClipRequest {
    pub path: String,
@@ -422,11 +497,8 @@ mod tests {
        );

        match err.unwrap_err().into_kind() {
-            ErrorKind::ExpiredSignature => assert!(true),
-            kind => {
-                println!("Unexpected error: {:?}", kind);
-                assert!(false)
-            }
+            ErrorKind::ExpiredSignature => {}
+            kind => panic!("Unexpected error: {:?}", kind),
        }
    }

@@ -435,11 +507,8 @@ mod tests {
        let err = Claims::from_str("uni-֍ՓՓՓՓՓՓՓՓՓՓՓՓՓՓՓ");

        match err.unwrap_err().into_kind() {
-            ErrorKind::InvalidToken => assert!(true),
-            kind => {
-                println!("Unexpected error: {:?}", kind);
-                assert!(false)
-            }
+            ErrorKind::InvalidToken => {}
+            kind => panic!("Unexpected error: {:?}", kind),
        }
    }

@@ -1,3 +1,5 @@
+#![allow(dead_code)]
+
 use diesel::prelude::*;
 use diesel::sqlite::SqliteConnection;
 use serde::Serialize;
@@ -1,3 +1,5 @@
+#![allow(dead_code)]
+
 use chrono::NaiveDate;
 use diesel::prelude::*;
 use diesel::sqlite::SqliteConnection;
@@ -73,6 +75,11 @@ pub trait DailySummaryDao: Sync + Send {
        context: &opentelemetry::Context,
        contact: &str,
    ) -> Result<i64, DbError>;
+
+    /// Cheap presence check — returns true iff at least one daily summary row
+    /// exists. Used by gating logic that only needs "is the table empty?",
+    /// avoiding a `COUNT(*)` full scan on large corpora.
+    fn has_any_summaries(&mut self, context: &opentelemetry::Context) -> Result<bool, DbError>;
 }

 pub struct SqliteDailySummaryDao {
@@ -266,7 +273,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
                .into_iter()
                .take(limit)
                .map(|(similarity, summary)| {
-                    log::info!(
+                    log::debug!(
                        "Summary match: similarity={:.3}, date={}, contact={}, summary=\"{}\"",
                        similarity,
                        summary.date,
@@ -386,7 +393,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
                .into_iter()
                .take(limit)
                .map(|(combined, similarity, days, summary)| {
-                    log::info!(
+                    log::debug!(
                        "Summary match: combined={:.3} (sim={:.3}, days={}), date={}, contact={}, summary=\"{}\"",
                        combined,
                        similarity,
@@ -452,6 +459,30 @@ impl DailySummaryDao for SqliteDailySummaryDao {
        })
        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
+
+    fn has_any_summaries(&mut self, context: &opentelemetry::Context) -> Result<bool, DbError> {
+        trace_db_call(context, "query", "has_any_summaries", |_span| {
+            let mut conn = self
+                .connection
+                .lock()
+                .expect("Unable to get DailySummaryDao");
+
+            #[derive(QueryableByName)]
+            struct ProbeResult {
+                #[diesel(sql_type = diesel::sql_types::Integer)]
+                #[allow(dead_code)]
+                one: i32,
+            }
+
+            let rows: Vec<ProbeResult> =
+                diesel::sql_query("SELECT 1 as one FROM daily_conversation_summaries LIMIT 1")
+                    .load(conn.deref_mut())
+                    .map_err(|e| anyhow::anyhow!("Failed to probe daily summaries: {}", e))?;
+
+            Ok(!rows.is_empty())
+        })
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+    }
 }

 // Helper structs for raw SQL queries
@@ -21,12 +21,49 @@ pub trait InsightDao: Sync + Send {
        file_path: &str,
    ) -> Result<Option<PhotoInsight>, DbError>;

+    /// Library-scoped variant of `get_insight`. The default `get_insight`
+    /// finds any `is_current=true` row matching `file_path` across
+    /// libraries — fine for the photo-grid metadata fetch (cross-library
+    /// merge), wrong for the chat path: a regenerate on lib1 flips lib1's
+    /// row to `is_current=false` and inserts a new lib1 row, but
+    /// lib2's untouched `is_current=true` row for the same rel_path
+    /// would still satisfy the path-only query and shadow the regen on
+    /// the next history fetch. Always pass a library_id when you have
+    /// one (chat / insight write paths always do).
+    fn get_current_insight_for_library(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id: i32,
+        file_path: &str,
+    ) -> Result<Option<PhotoInsight>, DbError>;
+
+    /// Return the most recent current insight whose rel_path is one of
+    /// `paths`. Used for content-hash sharing: the caller expands a
+    /// single file into all rel_paths with the same content_hash, then
+    /// asks here for any existing insight attached to any of them.
+    fn get_insight_for_paths(
+        &mut self,
+        context: &opentelemetry::Context,
+        paths: &[String],
+    ) -> Result<Option<PhotoInsight>, DbError>;
+
+    #[allow(dead_code)]
    fn get_insight_history(
        &mut self,
        context: &opentelemetry::Context,
        file_path: &str,
    ) -> Result<Vec<PhotoInsight>, DbError>;

+    /// Fetch a single insight by primary key, regardless of `is_current`.
+    /// Used by the few-shot injection flow where the caller picks specific
+    /// historical insights (which may have been superseded) as training
+    /// exemplars for a fresh generation.
+    fn get_insight_by_id(
+        &mut self,
+        context: &opentelemetry::Context,
+        insight_id: i32,
+    ) -> Result<Option<PhotoInsight>, DbError>;
+
    fn delete_insight(
        &mut self,
        context: &opentelemetry::Context,
@@ -49,6 +86,17 @@ pub trait InsightDao: Sync + Send {
        &mut self,
        context: &opentelemetry::Context,
    ) -> Result<Vec<PhotoInsight>, DbError>;
+
+    /// Replace the `training_messages` JSON blob on the current row for
+    /// `(library_id, rel_path)`. Used by chat-turn append mode to persist
+    /// the extended conversation without inserting a new insight version.
+    fn update_training_messages(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id: i32,
+        file_path: &str,
+        training_messages_json: &str,
+    ) -> Result<(), DbError>;
 }

 pub struct SqliteInsightDao {
@@ -69,6 +117,7 @@ impl SqliteInsightDao {
    }

    #[cfg(test)]
+    #[allow(dead_code)]
    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
        SqliteInsightDao { connection: conn }
    }
@@ -78,18 +127,39 @@ impl InsightDao for SqliteInsightDao {
    fn store_insight(
        &mut self,
        context: &opentelemetry::Context,
-        insight: InsertPhotoInsight,
+        mut insight: InsertPhotoInsight,
    ) -> Result<PhotoInsight, DbError> {
        trace_db_call(context, "insert", "store_insight", |_span| {
            use schema::photo_insights::dsl::*;

            let mut connection = self.connection.lock().expect("Unable to get InsightDao");

+            // Eagerly populate content_hash so this insight follows the
+            // bytes (CLAUDE.md "Multi-library data model"). Caller-
+            // supplied hash wins; otherwise look it up from image_exif
+            // for the (library_id, rel_path) tuple. None is acceptable —
+            // reconciliation backfills it once the hash lands.
+            if insight.content_hash.is_none() {
+                use schema::image_exif as ie;
+                insight.content_hash = ie::table
+                    .filter(ie::library_id.eq(insight.library_id))
+                    .filter(ie::rel_path.eq(&insight.file_path))
+                    .filter(ie::content_hash.is_not_null())
+                    .select(ie::content_hash)
+                    .first::<Option<String>>(connection.deref_mut())
+                    .ok()
+                    .flatten();
+            }
+
            // Mark all existing insights for this file as no longer current
-            diesel::update(photo_insights.filter(file_path.eq(&insight.file_path)))
-                .set(is_current.eq(false))
-                .execute(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Update is_current error"))?;
+            diesel::update(
+                photo_insights
+                    .filter(library_id.eq(insight.library_id))
+                    .filter(rel_path.eq(&insight.file_path)),
+            )
+            .set(is_current.eq(false))
+            .execute(connection.deref_mut())
+            .map_err(|_| anyhow::anyhow!("Update is_current error"))?;

            // Insert the new insight as current
            diesel::insert_into(photo_insights)
@@ -99,7 +169,8 @@ impl InsightDao for SqliteInsightDao {

            // Retrieve the inserted record (is_current = true)
            photo_insights
-                .filter(file_path.eq(&insight.file_path))
+                .filter(library_id.eq(insight.library_id))
+                .filter(rel_path.eq(&insight.file_path))
                .filter(is_current.eq(true))
                .first::<PhotoInsight>(connection.deref_mut())
                .map_err(|_| anyhow::anyhow!("Query error"))
@@ -118,7 +189,7 @@ impl InsightDao for SqliteInsightDao {
            let mut connection = self.connection.lock().expect("Unable to get InsightDao");

            photo_insights
-                .filter(file_path.eq(path))
+                .filter(rel_path.eq(path))
                .filter(is_current.eq(true))
                .first::<PhotoInsight>(connection.deref_mut())
                .optional()
@@ -127,6 +198,57 @@ impl InsightDao for SqliteInsightDao {
        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

+    fn get_current_insight_for_library(
+        &mut self,
+        context: &opentelemetry::Context,
+        lib_id: i32,
+        path: &str,
+    ) -> Result<Option<PhotoInsight>, DbError> {
+        trace_db_call(
+            context,
+            "query",
+            "get_current_insight_for_library",
+            |_span| {
+                use schema::photo_insights::dsl::*;
+
+                let mut connection = self.connection.lock().expect("Unable to get InsightDao");
+
+                photo_insights
+                    .filter(library_id.eq(lib_id))
+                    .filter(rel_path.eq(path))
+                    .filter(is_current.eq(true))
+                    .first::<PhotoInsight>(connection.deref_mut())
+                    .optional()
+                    .map_err(|_| anyhow::anyhow!("Query error"))
+            },
+        )
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+    }
+
+    fn get_insight_for_paths(
+        &mut self,
+        context: &opentelemetry::Context,
+        paths: &[String],
+    ) -> Result<Option<PhotoInsight>, DbError> {
+        if paths.is_empty() {
+            return Ok(None);
+        }
+        trace_db_call(context, "query", "get_insight_for_paths", |_span| {
+            use schema::photo_insights::dsl::*;
+
+            let mut connection = self.connection.lock().expect("Unable to get InsightDao");
+
+            photo_insights
+                .filter(rel_path.eq_any(paths))
+                .filter(is_current.eq(true))
+                .order(generated_at.desc())
+                .first::<PhotoInsight>(connection.deref_mut())
+                .optional()
+                .map_err(|_| anyhow::anyhow!("Query error"))
+        })
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+    }
+
    fn get_insight_history(
        &mut self,
        context: &opentelemetry::Context,
@@ -138,7 +260,7 @@ impl InsightDao for SqliteInsightDao {
            let mut connection = self.connection.lock().expect("Unable to get InsightDao");

            photo_insights
-                .filter(file_path.eq(path))
+                .filter(rel_path.eq(path))
                .order(generated_at.desc())
                .load::<PhotoInsight>(connection.deref_mut())
                .map_err(|_| anyhow::anyhow!("Query error"))
@@ -146,6 +268,25 @@ impl InsightDao for SqliteInsightDao {
        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

+    fn get_insight_by_id(
+        &mut self,
+        context: &opentelemetry::Context,
+        insight_id: i32,
+    ) -> Result<Option<PhotoInsight>, DbError> {
+        trace_db_call(context, "query", "get_insight_by_id", |_span| {
+            use schema::photo_insights::dsl::*;
+
+            let mut connection = self.connection.lock().expect("Unable to get InsightDao");
+
+            photo_insights
+                .find(insight_id)
+                .first::<PhotoInsight>(connection.deref_mut())
+                .optional()
+                .map_err(|_| anyhow::anyhow!("Query error"))
+        })
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+    }
+
    fn delete_insight(
        &mut self,
        context: &opentelemetry::Context,
@@ -156,7 +297,7 @@ impl InsightDao for SqliteInsightDao {

            let mut connection = self.connection.lock().expect("Unable to get InsightDao");

-            diesel::delete(photo_insights.filter(file_path.eq(path)))
+            diesel::delete(photo_insights.filter(rel_path.eq(path)))
                .execute(connection.deref_mut())
                .map(|_| ())
                .map_err(|_| anyhow::anyhow!("Delete error"))
@@ -195,7 +336,7 @@ impl InsightDao for SqliteInsightDao {

            diesel::update(
                photo_insights
-                    .filter(file_path.eq(path))
+                    .filter(rel_path.eq(path))
                    .filter(is_current.eq(true)),
            )
            .set(approved.eq(Some(is_approved)))
@@ -224,4 +365,30 @@ impl InsightDao for SqliteInsightDao {
        })
        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
+
+    fn update_training_messages(
+        &mut self,
+        context: &opentelemetry::Context,
+        lib_id: i32,
+        path: &str,
+        training_messages_json: &str,
+    ) -> Result<(), DbError> {
+        trace_db_call(context, "update", "update_training_messages", |_span| {
+            use schema::photo_insights::dsl::*;
+
+            let mut connection = self.connection.lock().expect("Unable to get InsightDao");
+
+            diesel::update(
+                photo_insights
+                    .filter(library_id.eq(lib_id))
+                    .filter(rel_path.eq(path))
+                    .filter(is_current.eq(true)),
+            )
+            .set(training_messages.eq(Some(training_messages_json.to_string())))
+            .execute(connection.deref_mut())
+            .map(|_| ())
+            .map_err(|_| anyhow::anyhow!("Update error"))
+        })
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+    }
 }
@@ -1,3 +1,5 @@
+#![allow(dead_code)]
+
 use diesel::prelude::*;
 use diesel::sqlite::SqliteConnection;
 use serde::Serialize;
@@ -1,6 +1,6 @@
 use crate::database::schema::{
-    entities, entity_facts, entity_photo_links, favorites, image_exif, photo_insights, users,
-    video_preview_clips,
+    entities, entity_facts, entity_photo_links, favorites, image_exif, libraries, personas,
+    photo_insights, users, video_preview_clips,
 };
 use serde::Serialize;

@@ -23,6 +23,7 @@ pub struct User {
 #[diesel(table_name = favorites)]
 pub struct InsertFavorite<'a> {
    pub userid: &'a i32,
+    #[diesel(column_name = rel_path)]
    pub path: &'a str,
 }

@@ -30,12 +31,15 @@ pub struct InsertFavorite<'a> {
 pub struct Favorite {
    pub id: i32,
    pub userid: i32,
+    #[diesel(column_name = rel_path)]
    pub path: String,
 }

 #[derive(Insertable)]
 #[diesel(table_name = image_exif)]
 pub struct InsertImageExif {
+    pub library_id: i32,
+    #[diesel(column_name = rel_path)]
    pub file_path: String,
    pub camera_make: Option<String>,
    pub camera_model: Option<String>,
@@ -53,11 +57,26 @@ pub struct InsertImageExif {
    pub date_taken: Option<i64>,
    pub created_time: i64,
    pub last_modified: i64,
+    pub content_hash: Option<String>,
+    pub size_bytes: Option<i64>,
+    /// 64-bit pHash (DCT) packed as i64. NULL for videos and decode failures.
+    pub phash_64: Option<i64>,
+    /// 64-bit dHash (gradient). NULL for videos and decode failures.
+    pub dhash_64: Option<i64>,
+    /// Which step of the canonical-date waterfall populated `date_taken`:
+    /// `"exif"` | `"exiftool"` | `"filename"` | `"fs_time"`. NULL when
+    /// `date_taken` is NULL (no source resolved it). The per-tick backfill
+    /// drain re-resolves rows whose source is `"fs_time"` once exiftool
+    /// has had a chance to run.
+    pub date_taken_source: Option<String>,
 }

+// Field order matches the post-migration column order in `image_exif`.
 #[derive(Serialize, Queryable, Clone, Debug)]
 pub struct ImageExif {
    pub id: i32,
+    pub library_id: i32,
+    #[diesel(column_name = rel_path)]
    pub file_path: String,
    pub camera_make: Option<String>,
    pub camera_model: Option<String>,
@@ -75,11 +94,33 @@ pub struct ImageExif {
    pub date_taken: Option<i64>,
    pub created_time: i64,
    pub last_modified: i64,
+    pub content_hash: Option<String>,
+    pub size_bytes: Option<i64>,
+    pub phash_64: Option<i64>,
+    pub dhash_64: Option<i64>,
+    /// When non-null, this row is a soft-marked duplicate of the file
+    /// whose `content_hash` matches this value. The default `/photos`
+    /// listing filters such rows out.
+    pub duplicate_of_hash: Option<String>,
+    /// Unix seconds at which the resolve was committed.
+    pub duplicate_decided_at: Option<i64>,
+    /// Which step of the canonical-date waterfall populated `date_taken`.
+    /// Plus `"manual"` when the operator has set it via POST /image/exif/date.
+    pub date_taken_source: Option<String>,
+    /// Snapshot of the prior `date_taken` taken on first manual override.
+    /// NULL when no override is active. POST /image/exif/date/clear restores
+    /// `date_taken` from this column and nulls it back out.
+    pub original_date_taken: Option<i64>,
+    /// Snapshot of the prior `date_taken_source` taken on first manual
+    /// override. NULL when no override is active.
+    pub original_date_taken_source: Option<String>,
 }

 #[derive(Insertable)]
 #[diesel(table_name = photo_insights)]
 pub struct InsertPhotoInsight {
+    pub library_id: i32,
+    #[diesel(column_name = rel_path)]
    pub file_path: String,
    pub title: String,
    pub summary: String,
@@ -87,11 +128,28 @@ pub struct InsertPhotoInsight {
    pub model_version: String,
    pub is_current: bool,
    pub training_messages: Option<String>,
+    /// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
+    pub backend: String,
+    /// JSON array of insight ids whose `training_messages` were compressed
+    /// and injected into the system prompt as few-shot exemplars when this
+    /// row was generated. `None` means no few-shot was used (pristine
+    /// generation). Used downstream to filter out contaminated rows when
+    /// assembling an unbiased training / evaluation set.
+    pub fewshot_source_ids: Option<String>,
+    /// Bytes-keyed identity. When present, this insight is considered
+    /// to belong to the content rather than the path — see CLAUDE.md
+    /// "Multi-library data model". The DAO populates this from
+    /// `image_exif.content_hash` at insert time when known; rows
+    /// inserted before the hash is available stay null and the
+    /// reconciliation pass backfills them.
+    pub content_hash: Option<String>,
 }

 #[derive(Serialize, Queryable, Clone, Debug)]
 pub struct PhotoInsight {
    pub id: i32,
+    pub library_id: i32,
+    #[diesel(column_name = rel_path)]
    pub file_path: String,
    pub title: String,
    pub summary: String,
@@ -100,6 +158,44 @@ pub struct PhotoInsight {
    pub is_current: bool,
    pub training_messages: Option<String>,
    pub approved: Option<bool>,
+    /// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
+    pub backend: String,
+    pub fewshot_source_ids: Option<String>,
+    pub content_hash: Option<String>,
+}
+
+// --- Libraries ---
+
+#[derive(Serialize, Queryable, Clone, Debug)]
+pub struct LibraryRow {
+    pub id: i32,
+    pub name: String,
+    pub root_path: String,
+    pub created_at: i64,
+    /// Operator kill switch. `false` = the watcher skips this library
+    /// entirely (no probe, no ingest, no maintenance) and orphan-GC
+    /// treats it as out-of-scope for the all-online consensus rule.
+    /// Toggle via SQL today — there is intentionally no HTTP endpoint
+    /// for library mutation (see CLAUDE.md "Multi-library data model").
+    pub enabled: bool,
+    /// Per-library excluded paths/patterns, stored comma-separated
+    /// (same shape as the global `EXCLUDED_DIRS` env var). NULL = no
+    /// extra excludes for this library; the global env var still
+    /// applies. The runtime `Library` struct parses this into a
+    /// `Vec<String>` and the walker applies the union of (global,
+    /// library) excludes when scanning. Use case: mount a parent
+    /// directory while another library covers a child subtree.
+    pub excluded_dirs: Option<String>,
+}
+
+#[derive(Insertable)]
+#[diesel(table_name = libraries)]
+pub struct InsertLibrary<'a> {
+    pub name: &'a str,
+    pub root_path: &'a str,
+    pub created_at: i64,
+    pub enabled: bool,
+    pub excluded_dirs: Option<&'a str>,
 }

 // --- Knowledge memory models ---
@@ -142,6 +238,44 @@ pub struct InsertEntityFact {
    pub confidence: f32,
    pub status: String,
    pub created_at: i64,
+    /// Which persona authored this fact. Shared entities, persona-tagged
+    /// facts: each persona accumulates its own voice over the same
+    /// real-world referents. Defaults to `'default'` for legacy rows
+    /// (see migration 2026-05-09-000000).
+    pub persona_id: String,
+    /// Author's user_id. Required for the composite FK to
+    /// `personas(user_id, persona_id)` (migration 2026-05-10-000000) and
+    /// for cross-user fact isolation: two users with the same 'default'
+    /// persona must not see each other's facts. Always paired with
+    /// `persona_id` — they're a unit.
+    pub user_id: i32,
+    /// Real-world period the fact is/was true (unix seconds). NULL on
+    /// either side = unbounded — `valid_from IS NULL` reads as
+    /// "always-true-back-to-the-beginning", `valid_until IS NULL` as
+    /// "still-true-now-or-unknown". Distinguishes valid time from
+    /// transaction time (`created_at` is when we recorded the fact,
+    /// not when it was true in the world). See migration
+    /// 2026-05-10-000100.
+    pub valid_from: Option<i64>,
+    pub valid_until: Option<i64>,
+    /// Points at the entity_facts.id that replaced this one. Set by
+    /// the supersede endpoint; status flips to 'superseded' in the
+    /// same transaction. See migration 2026-05-10-000200.
+    pub superseded_by: Option<i32>,
+    /// Provenance for model audit — see migration 2026-05-10-000300.
+    /// `created_by_model` is the LLM identifier (e.g. "qwen2.5:7b",
+    /// "anthropic/claude-sonnet-4") or NULL for legacy / manual rows.
+    /// `created_by_backend` is "local" / "hybrid" / "manual" / NULL.
+    pub created_by_model: Option<String>,
+    pub created_by_backend: Option<String>,
+    /// Audit trail for mutations after creation — see migration
+    /// 2026-05-10-000500. `last_modified_*` stamp on any update
+    /// (status flip, valid-time edit, supersede, manual PATCH);
+    /// `last_modified_at` is unix seconds. NULL on rows that have
+    /// never been touched since creation.
+    pub last_modified_by_model: Option<String>,
+    pub last_modified_by_backend: Option<String>,
+    pub last_modified_at: Option<i64>,
 }

 #[derive(Serialize, Queryable, Clone, Debug)]
@@ -156,12 +290,24 @@ pub struct EntityFact {
    pub confidence: f32,
    pub status: String,
    pub created_at: i64,
+    pub persona_id: String,
+    pub user_id: i32,
+    pub valid_from: Option<i64>,
+    pub valid_until: Option<i64>,
+    pub superseded_by: Option<i32>,
+    pub created_by_model: Option<String>,
+    pub created_by_backend: Option<String>,
+    pub last_modified_by_model: Option<String>,
+    pub last_modified_by_backend: Option<String>,
+    pub last_modified_at: Option<i64>,
 }

 #[derive(Insertable)]
 #[diesel(table_name = entity_photo_links)]
 pub struct InsertEntityPhotoLink {
    pub entity_id: i32,
+    pub library_id: i32,
+    #[diesel(column_name = rel_path)]
    pub file_path: String,
    pub role: String,
 }
@@ -170,13 +316,56 @@ pub struct InsertEntityPhotoLink {
 pub struct EntityPhotoLink {
    pub id: i32,
    pub entity_id: i32,
+    pub library_id: i32,
+    #[diesel(column_name = rel_path)]
    pub file_path: String,
    pub role: String,
 }

+// --- Personas ---
+
+#[derive(Insertable)]
+#[diesel(table_name = personas)]
+pub struct InsertPersona<'a> {
+    pub user_id: i32,
+    pub persona_id: &'a str,
+    pub name: &'a str,
+    pub system_prompt: &'a str,
+    pub is_built_in: bool,
+    pub include_all_memories: bool,
+    pub created_at: i64,
+    pub updated_at: i64,
+    /// "Strict mode" — agent reads only see facts with status =
+    /// 'reviewed' (human-verified). Default false. See migration
+    /// 2026-05-10-000400.
+    pub reviewed_only_facts: bool,
+    /// Gate for the agent's update_fact / supersede_fact tools.
+    /// Default false — fresh personas let the agent create but not
+    /// alter or replace. Operator opts in once a model has earned
+    /// trust. See migration 2026-05-10-000500.
+    pub allow_agent_corrections: bool,
+}
+
+#[derive(Serialize, Queryable, Clone, Debug)]
+pub struct Persona {
+    pub id: i32,
+    pub user_id: i32,
+    pub persona_id: String,
+    pub name: String,
+    pub system_prompt: String,
+    pub is_built_in: bool,
+    pub include_all_memories: bool,
+    pub created_at: i64,
+    pub updated_at: i64,
+    pub reviewed_only_facts: bool,
+    pub allow_agent_corrections: bool,
+}
+
 #[derive(Insertable)]
 #[diesel(table_name = video_preview_clips)]
 pub struct InsertVideoPreviewClip {
+    pub library_id: i32,
+    #[diesel(column_name = rel_path)]
    pub file_path: String,
    pub status: String,
    pub created_at: String,
@@ -186,6 +375,8 @@ pub struct InsertVideoPreviewClip {
 #[derive(Serialize, Queryable, Clone, Debug)]
 pub struct VideoPreviewClip {
    pub id: i32,
+    pub library_id: i32,
+    #[diesel(column_name = rel_path)]
    pub file_path: String,
    pub status: String,
    pub duration_seconds: Option<f32>,
@@ -0,0 +1,447 @@
+#![allow(dead_code)]
+
+use diesel::prelude::*;
+use diesel::sqlite::SqliteConnection;
+use std::ops::DerefMut;
+use std::sync::{Arc, Mutex};
+
+use crate::database::models::{InsertPersona, Persona};
+use crate::database::schema;
+use crate::database::{DbError, DbErrorKind, connect};
+use crate::otel::trace_db_call;
+
+/// Patch shape for update_persona. None = leave field alone. Built-ins are
+/// allowed to flip `include_all_memories` but should reject name/prompt
+/// edits at the handler layer (built-in copy lives in the migration).
+pub struct PersonaPatch {
+    pub name: Option<String>,
+    pub system_prompt: Option<String>,
+    pub include_all_memories: Option<bool>,
+    pub reviewed_only_facts: Option<bool>,
+    pub allow_agent_corrections: Option<bool>,
+}
+
+/// One row of a bulk migration upload. Fields named to match the JSON
+/// shape the mobile client uploads (`POST /personas/migrate`).
+pub struct ImportPersona {
+    pub persona_id: String,
+    pub name: String,
+    pub system_prompt: String,
+    pub is_built_in: bool,
+    pub created_at: i64,
+}
+
+pub trait PersonaDao: Sync + Send {
+    fn list_personas(
+        &mut self,
+        cx: &opentelemetry::Context,
+        user_id: i32,
+    ) -> Result<Vec<Persona>, DbError>;
+
+    fn get_persona(
+        &mut self,
+        cx: &opentelemetry::Context,
+        user_id: i32,
+        persona_id: &str,
+    ) -> Result<Option<Persona>, DbError>;
+
+    fn create_persona(
+        &mut self,
+        cx: &opentelemetry::Context,
+        user_id: i32,
+        persona_id: &str,
+        name: &str,
+        system_prompt: &str,
+        is_built_in: bool,
+        include_all_memories: bool,
+    ) -> Result<Persona, DbError>;
+
+    fn update_persona(
+        &mut self,
+        cx: &opentelemetry::Context,
+        user_id: i32,
+        persona_id: &str,
+        patch: PersonaPatch,
+    ) -> Result<Option<Persona>, DbError>;
+
+    fn delete_persona(
+        &mut self,
+        cx: &opentelemetry::Context,
+        user_id: i32,
+        persona_id: &str,
+    ) -> Result<bool, DbError>;
+
+    /// Idempotent bulk import. INSERT OR IGNORE on (user_id, persona_id)
+    /// — re-uploading the same set is a no-op. Returns the number of rows
+    /// actually inserted (skipped duplicates don't count).
+    fn bulk_import(
+        &mut self,
+        cx: &opentelemetry::Context,
+        user_id: i32,
+        personas: &[ImportPersona],
+    ) -> Result<usize, DbError>;
+}
+
+pub struct SqlitePersonaDao {
+    connection: Arc<Mutex<SqliteConnection>>,
+}
+
+impl Default for SqlitePersonaDao {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SqlitePersonaDao {
+    pub fn new() -> Self {
+        Self {
+            connection: Arc::new(Mutex::new(connect())),
+        }
+    }
+
+    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
+        Self { connection: conn }
+    }
+}
+
+impl PersonaDao for SqlitePersonaDao {
+    fn list_personas(
+        &mut self,
+        cx: &opentelemetry::Context,
+        uid: i32,
+    ) -> Result<Vec<Persona>, DbError> {
+        trace_db_call(cx, "query", "list_personas", |_span| {
+            use schema::personas::dsl::*;
+            let mut conn = self.connection.lock().expect("PersonaDao lock");
+            personas
+                .filter(user_id.eq(uid))
+                .order(created_at.asc())
+                .load::<Persona>(conn.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+        })
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+    }
+
+    fn get_persona(
+        &mut self,
+        cx: &opentelemetry::Context,
+        uid: i32,
+        pid: &str,
+    ) -> Result<Option<Persona>, DbError> {
+        trace_db_call(cx, "query", "get_persona", |_span| {
+            use schema::personas::dsl::*;
+            let mut conn = self.connection.lock().expect("PersonaDao lock");
+            personas
+                .filter(user_id.eq(uid))
+                .filter(persona_id.eq(pid))
+                .first::<Persona>(conn.deref_mut())
+                .optional()
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+        })
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+    }
+
+    fn create_persona(
+        &mut self,
+        cx: &opentelemetry::Context,
+        uid: i32,
+        pid: &str,
+        nm: &str,
+        prompt: &str,
+        builtin: bool,
+        include_all: bool,
+    ) -> Result<Persona, DbError> {
+        trace_db_call(cx, "insert", "create_persona", |_span| {
+            use schema::personas::dsl::*;
+            let mut conn = self.connection.lock().expect("PersonaDao lock");
+            let now = chrono::Utc::now().timestamp_millis();
+
+            diesel::insert_into(personas)
+                .values(InsertPersona {
+                    user_id: uid,
+                    persona_id: pid,
+                    name: nm,
+                    system_prompt: prompt,
+                    is_built_in: builtin,
+                    include_all_memories: include_all,
+                    created_at: now,
+                    updated_at: now,
+                    reviewed_only_facts: false,
+                    allow_agent_corrections: false,
+                })
+                .execute(conn.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Insert error: {}", e))?;
+
+            personas
+                .filter(user_id.eq(uid))
+                .filter(persona_id.eq(pid))
+                .first::<Persona>(conn.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+        })
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+    }
+
+    fn update_persona(
+        &mut self,
+        cx: &opentelemetry::Context,
+        uid: i32,
+        pid: &str,
+        patch: PersonaPatch,
+    ) -> Result<Option<Persona>, DbError> {
+        trace_db_call(cx, "update", "update_persona", |_span| {
+            use schema::personas::dsl::*;
+            let mut conn = self.connection.lock().expect("PersonaDao lock");
+            let now = chrono::Utc::now().timestamp_millis();
+
+            // Apply each field as its own UPDATE — keeps types simple
+            // (Diesel's tuple updates don't compose cleanly across optional
+            // columns) and matches the pattern already in use for entities
+            // (knowledge_dao.rs::update_entity).
+            if let Some(ref new_name) = patch.name {
+                diesel::update(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
+                    .set((name.eq(new_name), updated_at.eq(now)))
+                    .execute(conn.deref_mut())
+                    .map_err(|e| anyhow::anyhow!("Update name error: {}", e))?;
+            }
+            if let Some(ref new_prompt) = patch.system_prompt {
+                diesel::update(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
+                    .set((system_prompt.eq(new_prompt), updated_at.eq(now)))
+                    .execute(conn.deref_mut())
+                    .map_err(|e| anyhow::anyhow!("Update prompt error: {}", e))?;
+            }
+            if let Some(new_include_all) = patch.include_all_memories {
+                diesel::update(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
+                    .set((include_all_memories.eq(new_include_all), updated_at.eq(now)))
+                    .execute(conn.deref_mut())
+                    .map_err(|e| anyhow::anyhow!("Update include_all error: {}", e))?;
+            }
+            if let Some(new_reviewed_only) = patch.reviewed_only_facts {
+                diesel::update(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
+                    .set((
+                        reviewed_only_facts.eq(new_reviewed_only),
+                        updated_at.eq(now),
+                    ))
+                    .execute(conn.deref_mut())
+                    .map_err(|e| anyhow::anyhow!("Update reviewed_only_facts error: {}", e))?;
+            }
+            if let Some(new_allow_corrections) = patch.allow_agent_corrections {
+                diesel::update(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
+                    .set((
+                        allow_agent_corrections.eq(new_allow_corrections),
+                        updated_at.eq(now),
+                    ))
+                    .execute(conn.deref_mut())
+                    .map_err(|e| anyhow::anyhow!("Update allow_agent_corrections error: {}", e))?;
+            }
+
+            personas
+                .filter(user_id.eq(uid))
+                .filter(persona_id.eq(pid))
+                .first::<Persona>(conn.deref_mut())
+                .optional()
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+        })
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+    }
+
+    fn delete_persona(
+        &mut self,
+        cx: &opentelemetry::Context,
+        uid: i32,
+        pid: &str,
+    ) -> Result<bool, DbError> {
+        trace_db_call(cx, "delete", "delete_persona", |_span| {
+            use schema::personas::dsl::*;
+            let mut conn = self.connection.lock().expect("PersonaDao lock");
+            let n = diesel::delete(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
+                .execute(conn.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Delete error: {}", e))?;
+            Ok(n > 0)
+        })
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+    }
+
+    fn bulk_import(
+        &mut self,
+        cx: &opentelemetry::Context,
+        uid: i32,
+        rows: &[ImportPersona],
+    ) -> Result<usize, DbError> {
+        trace_db_call(cx, "insert", "bulk_import_personas", |_span| {
+            let mut conn = self.connection.lock().expect("PersonaDao lock");
+            let now = chrono::Utc::now().timestamp_millis();
+            let mut inserted = 0usize;
+
+            // INSERT OR IGNORE on the (user_id, persona_id) UNIQUE so
+            // re-running migrate is a no-op for personas already on the
+            // server.
+            for p in rows {
+                let n = diesel::sql_query(
+                    "INSERT OR IGNORE INTO personas (user_id, persona_id, name, system_prompt, \
+                     is_built_in, include_all_memories, created_at, updated_at) \
+                     VALUES (?, ?, ?, ?, ?, 0, ?, ?)",
+                )
+                .bind::<diesel::sql_types::Integer, _>(uid)
+                .bind::<diesel::sql_types::Text, _>(&p.persona_id)
+                .bind::<diesel::sql_types::Text, _>(&p.name)
+                .bind::<diesel::sql_types::Text, _>(&p.system_prompt)
+                .bind::<diesel::sql_types::Bool, _>(p.is_built_in)
+                .bind::<diesel::sql_types::BigInt, _>(p.created_at)
+                .bind::<diesel::sql_types::BigInt, _>(now)
+                .execute(conn.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Insert error: {}", e))?;
+                inserted += n;
+            }
+            Ok(inserted)
+        })
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::database::test::in_memory_db_connection;
+
+    fn dao_with_user(username: &str) -> (SqlitePersonaDao, i32) {
+        use crate::database::schema::users::dsl as u;
+        let conn = Arc::new(Mutex::new(in_memory_db_connection()));
+        diesel::insert_into(u::users)
+            .values((u::username.eq(username), u::password.eq("x")))
+            .execute(conn.lock().unwrap().deref_mut())
+            .unwrap();
+        let user_id: i32 = u::users
+            .filter(u::username.eq(username))
+            .select(u::id)
+            .first(conn.lock().unwrap().deref_mut())
+            .unwrap();
+        (SqlitePersonaDao::from_connection(conn), user_id)
+    }
+
+    #[test]
+    fn create_and_list_round_trip() {
+        let cx = opentelemetry::Context::new();
+        let (mut dao, uid) = dao_with_user("alice");
+
+        // The migration seeds 3 built-ins for any existing user; alice
+        // was created post-migration so she starts empty.
+        let p = dao
+            .create_persona(&cx, uid, "custom-1", "Custom A", "prompt A", false, false)
+            .unwrap();
+        assert_eq!(p.persona_id, "custom-1");
+        assert_eq!(p.user_id, uid);
+        assert!(!p.is_built_in);
+
+        let list = dao.list_personas(&cx, uid).unwrap();
+        assert_eq!(list.len(), 1);
+        assert_eq!(list[0].persona_id, "custom-1");
+    }
+
+    #[test]
+    fn unique_constraint_blocks_duplicate_persona_id() {
+        let cx = opentelemetry::Context::new();
+        let (mut dao, uid) = dao_with_user("bob");
+
+        dao.create_persona(&cx, uid, "x", "X", "p", false, false)
+            .unwrap();
+        let err = dao.create_persona(&cx, uid, "x", "X2", "p2", false, false);
+        assert!(
+            err.is_err(),
+            "second insert with same persona_id should fail"
+        );
+    }
+
+    #[test]
+    fn bulk_import_is_idempotent() {
+        let cx = opentelemetry::Context::new();
+        let (mut dao, uid) = dao_with_user("carol");
+
+        let rows = vec![
+            ImportPersona {
+                persona_id: "custom-a".into(),
+                name: "A".into(),
+                system_prompt: "p1".into(),
+                is_built_in: false,
+                created_at: 1,
+            },
+            ImportPersona {
+                persona_id: "custom-b".into(),
+                name: "B".into(),
+                system_prompt: "p2".into(),
+                is_built_in: false,
+                created_at: 2,
+            },
+        ];
+
+        let first = dao.bulk_import(&cx, uid, &rows).unwrap();
+        assert_eq!(first, 2);
+        let second = dao.bulk_import(&cx, uid, &rows).unwrap();
+        assert_eq!(second, 0, "re-import should insert nothing");
+
+        assert_eq!(dao.list_personas(&cx, uid).unwrap().len(), 2);
+    }
+
+    #[test]
+    fn dao_update_does_not_block_built_ins() {
+        // Documenting contract: the DAO is intentionally permissive —
+        // `update_persona` will apply name/system_prompt edits to ANY
+        // row, including built-ins. The guard against editing built-in
+        // identity (name + systemPrompt) lives in the HTTP handler
+        // (src/personas.rs::update_persona). If you find yourself
+        // wanting to add the guard here too, prefer that — defence in
+        // depth — but keep this test passing so anyone who removes
+        // the handler guard gets a failing call site, not silent data
+        // corruption.
+        let cx = opentelemetry::Context::new();
+        let (mut dao, uid) = dao_with_user("eve");
+
+        dao.create_persona(&cx, uid, "default", "Default", "old", true, false)
+            .unwrap();
+        let updated = dao
+            .update_persona(
+                &cx,
+                uid,
+                "default",
+                PersonaPatch {
+                    name: Some("Renamed".into()),
+                    system_prompt: Some("new prompt".into()),
+                    include_all_memories: None,
+                    reviewed_only_facts: None,
+                    allow_agent_corrections: None,
+                },
+            )
+            .unwrap()
+            .unwrap();
+        assert_eq!(updated.name, "Renamed");
+        assert_eq!(updated.system_prompt, "new prompt");
+        assert!(
+            updated.is_built_in,
+            "is_built_in flag should be unchanged by patch"
+        );
+    }
+
+    #[test]
+    fn update_toggles_include_all_memories() {
+        let cx = opentelemetry::Context::new();
+        let (mut dao, uid) = dao_with_user("dan");
+
+        dao.create_persona(&cx, uid, "j", "Journal", "p", true, false)
+            .unwrap();
+        let updated = dao
+            .update_persona(
+                &cx,
+                uid,
+                "j",
+                PersonaPatch {
+                    name: None,
+                    system_prompt: None,
+                    include_all_memories: Some(true),
+                    reviewed_only_facts: None,
+                    allow_agent_corrections: None,
+                },
+            )
+            .unwrap()
+            .unwrap();
+        assert!(updated.include_all_memories);
+    }
+}
@@ -1,3 +1,5 @@
+#![allow(dead_code)]
+
 use diesel::prelude::*;
 use diesel::sqlite::SqliteConnection;
 use std::ops::DerefMut;
@@ -84,6 +86,7 @@ impl PreviewDao for SqlitePreviewDao {

            diesel::insert_or_ignore_into(video_preview_clips)
                .values(InsertVideoPreviewClip {
+                    library_id: 1,
                    file_path: file_path_val.to_string(),
                    status: status_val.to_string(),
                    created_at: now.clone(),
@@ -111,7 +114,7 @@ impl PreviewDao for SqlitePreviewDao {
            let mut connection = self.connection.lock().expect("Unable to get PreviewDao");
            let now = chrono::Utc::now().to_rfc3339();

-            diesel::update(video_preview_clips.filter(file_path.eq(file_path_val)))
+            diesel::update(video_preview_clips.filter(rel_path.eq(file_path_val)))
                .set((
                    status.eq(status_val),
                    duration_seconds.eq(duration),
@@ -137,7 +140,7 @@ impl PreviewDao for SqlitePreviewDao {
            let mut connection = self.connection.lock().expect("Unable to get PreviewDao");

            match video_preview_clips
-                .filter(file_path.eq(file_path_val))
+                .filter(rel_path.eq(file_path_val))
                .first::<VideoPreviewClip>(connection.deref_mut())
            {
                Ok(clip) => Ok(Some(clip)),
@@ -163,7 +166,7 @@ impl PreviewDao for SqlitePreviewDao {
            let mut connection = self.connection.lock().expect("Unable to get PreviewDao");

            video_preview_clips
-                .filter(file_path.eq_any(file_paths))
+                .filter(rel_path.eq_any(file_paths))
                .load::<VideoPreviewClip>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
@@ -0,0 +1,382 @@
+//! Reconciliation pass for hash-keyed derived data.
+//!
+//! As `backfill_unhashed_backlog` populates `image_exif.content_hash`
+//! for legacy rows, we want the matching `tagged_photo` and
+//! `photo_insights` rows — which were inserted before the hash was
+//! known — to inherit the hash too. Otherwise reads keep falling back
+//! to the rel_path path even when a hash is now available.
+//!
+//! Two passes:
+//!   1. **Hash backfill** — for every `tagged_photo` / `photo_insights`
+//!      row with NULL `content_hash`, look up the matching
+//!      `image_exif.content_hash` and write it. SQL-only; idempotent;
+//!      a no-op once everything is hashed.
+//!   2. **Insight scalar merge** — when multiple `photo_insights` rows
+//!      share a `content_hash` with `is_current = true`, only the
+//!      earliest `generated_at` keeps `is_current = true` (per the
+//!      "earliest wins" rule in CLAUDE.md → "Multi-library data
+//!      model"). Others are demoted, not deleted, so they remain
+//!      visible in history endpoints.
+//!
+//! Tags are set-valued under the policy (union on read), so there's no
+//! analogous "collapse" pass — duplicate `(tag_id, content_hash)` rows
+//! across libraries are harmless and correctly de-duped at read time
+//! by the existing `DISTINCT` queries.
+//!
+//! The pass operates on the database alone — no filesystem access —
+//! so it doesn't need the library availability gate.
+
+// The lib doesn't call into this module directly — the watcher (in the
+// bin) does. Dead-code analysis at the lib level can't see that, so
+// suppress at the module level. Tests still exercise every function.
+#![allow(dead_code)]
+
+use diesel::prelude::*;
+use diesel::sql_query;
+use diesel::sqlite::SqliteConnection;
+use log::{debug, info, warn};
+
+/// Outcome of a reconciliation tick. Tracked so the watcher can log
+/// progress when something changed and stay quiet when nothing did.
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
+pub struct ReconcileStats {
+    pub tagged_photo_hashes_filled: usize,
+    pub photo_insights_hashes_filled: usize,
+    pub photo_insights_demoted: usize,
+}
+
+impl ReconcileStats {
+    pub fn changed(&self) -> bool {
+        self.tagged_photo_hashes_filled > 0
+            || self.photo_insights_hashes_filled > 0
+            || self.photo_insights_demoted > 0
+    }
+}
+
+/// Run the reconciliation pass. Idempotent — safe to call on every
+/// watcher tick. Errors are logged but never propagated; reconciliation
+/// is best-effort and a transient DB hiccup must not stall the watcher.
+pub fn run(conn: &mut SqliteConnection) -> ReconcileStats {
+    let mut stats = ReconcileStats::default();
+
+    stats.tagged_photo_hashes_filled = match backfill_tagged_photo_hashes(conn) {
+        Ok(n) => n,
+        Err(e) => {
+            warn!("reconcile: tagged_photo hash backfill failed: {:?}", e);
+            0
+        }
+    };
+
+    stats.photo_insights_hashes_filled = match backfill_photo_insights_hashes(conn) {
+        Ok(n) => n,
+        Err(e) => {
+            warn!("reconcile: photo_insights hash backfill failed: {:?}", e);
+            0
+        }
+    };
+
+    stats.photo_insights_demoted = match collapse_insight_currents(conn) {
+        Ok(n) => n,
+        Err(e) => {
+            warn!("reconcile: photo_insights scalar merge failed: {:?}", e);
+            0
+        }
+    };
+
+    if stats.changed() {
+        info!(
+            "reconcile: filled {} tagged_photo hash(es), {} photo_insights hash(es); demoted {} non-current insight row(s)",
+            stats.tagged_photo_hashes_filled,
+            stats.photo_insights_hashes_filled,
+            stats.photo_insights_demoted,
+        );
+    } else {
+        debug!("reconcile: no changes this tick");
+    }
+
+    stats
+}
+
+/// Populate `tagged_photo.content_hash` for any row that still has
+/// NULL by joining on `rel_path` against `image_exif`. tagged_photo
+/// doesn't carry `library_id`, so a path that exists under multiple
+/// libraries with different content is genuinely ambiguous; we pick
+/// any non-null hash for that path. Same trade-off as the migration
+/// backfill — see `migrations/2026-05-01-000000_hash_keyed_derived_data`.
+fn backfill_tagged_photo_hashes(conn: &mut SqliteConnection) -> QueryResult<usize> {
+    sql_query(
+        "UPDATE tagged_photo \
+         SET content_hash = ( \
+             SELECT content_hash FROM image_exif \
+             WHERE image_exif.rel_path = tagged_photo.rel_path \
+               AND image_exif.content_hash IS NOT NULL \
+             LIMIT 1 \
+         ) \
+         WHERE content_hash IS NULL \
+           AND EXISTS ( \
+               SELECT 1 FROM image_exif \
+               WHERE image_exif.rel_path = tagged_photo.rel_path \
+                 AND image_exif.content_hash IS NOT NULL \
+           )",
+    )
+    .execute(conn)
+}
+
+/// Populate `photo_insights.content_hash` from `image_exif`, keyed on
+/// `(library_id, rel_path)`. Unambiguous because photo_insights carries
+/// library_id.
+fn backfill_photo_insights_hashes(conn: &mut SqliteConnection) -> QueryResult<usize> {
+    sql_query(
+        "UPDATE photo_insights \
+         SET content_hash = ( \
+             SELECT content_hash FROM image_exif \
+             WHERE image_exif.library_id = photo_insights.library_id \
+               AND image_exif.rel_path = photo_insights.rel_path \
+               AND image_exif.content_hash IS NOT NULL \
+             LIMIT 1 \
+         ) \
+         WHERE content_hash IS NULL \
+           AND EXISTS ( \
+               SELECT 1 FROM image_exif \
+               WHERE image_exif.library_id = photo_insights.library_id \
+                 AND image_exif.rel_path = photo_insights.rel_path \
+                 AND image_exif.content_hash IS NOT NULL \
+           )",
+    )
+    .execute(conn)
+}
+
+/// Scalar-merge step: when multiple rows share a `content_hash` and
+/// claim `is_current = true`, demote all but the earliest by
+/// `generated_at` (ties broken by lowest id, deterministic).
+///
+/// Demoted rows keep their data — only `is_current` flips. Clients that
+/// hit `/insights/history` still see the full sequence; only the
+/// "current" pointer is unique per hash.
+fn collapse_insight_currents(conn: &mut SqliteConnection) -> QueryResult<usize> {
+    sql_query(
+        "UPDATE photo_insights \
+         SET is_current = 0 \
+         WHERE is_current = 1 \
+           AND content_hash IS NOT NULL \
+           AND id NOT IN ( \
+               SELECT MIN(p2.id) FROM photo_insights p2 \
+               WHERE p2.is_current = 1 \
+                 AND p2.content_hash = photo_insights.content_hash \
+                 AND p2.generated_at = ( \
+                     SELECT MIN(p3.generated_at) FROM photo_insights p3 \
+                     WHERE p3.is_current = 1 \
+                       AND p3.content_hash = p2.content_hash \
+                 ) \
+           )",
+    )
+    .execute(conn)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::database::test::in_memory_db_connection;
+
+    fn ensure_library(conn: &mut SqliteConnection, library_id: i32) {
+        // Migration seeds library id=1; tests that reference id>1 must
+        // create those rows themselves, otherwise FK enforcement (added
+        // in the tags-edit migration) rejects image_exif inserts.
+        diesel::sql_query(
+            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
+             VALUES (?, 'test-' || ?, '/tmp/test-' || ?, 0)",
+        )
+        .bind::<diesel::sql_types::Integer, _>(library_id)
+        .bind::<diesel::sql_types::Integer, _>(library_id)
+        .bind::<diesel::sql_types::Integer, _>(library_id)
+        .execute(conn)
+        .unwrap();
+    }
+
+    fn insert_image_exif(
+        conn: &mut SqliteConnection,
+        library_id: i32,
+        rel_path: &str,
+        content_hash: Option<&str>,
+    ) {
+        use crate::database::schema::image_exif;
+        ensure_library(conn, library_id);
+        diesel::sql_query(
+            "INSERT INTO image_exif (library_id, rel_path, created_time, last_modified, content_hash) \
+             VALUES (?, ?, 0, 0, ?)",
+        )
+        .bind::<diesel::sql_types::Integer, _>(library_id)
+        .bind::<diesel::sql_types::Text, _>(rel_path)
+        .bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(content_hash)
+        .execute(conn)
+        .unwrap();
+        // Keep clippy happy that the import is used.
+        let _ = image_exif::table;
+    }
+
+    fn insert_tagged_photo(conn: &mut SqliteConnection, rel_path: &str, tag_id: i32) {
+        diesel::sql_query(
+            "INSERT INTO tagged_photo (rel_path, tag_id, created_time) VALUES (?, ?, 0)",
+        )
+        .bind::<diesel::sql_types::Text, _>(rel_path)
+        .bind::<diesel::sql_types::Integer, _>(tag_id)
+        .execute(conn)
+        .unwrap();
+    }
+
+    fn insert_tag(conn: &mut SqliteConnection, id: i32, name: &str) {
+        diesel::sql_query("INSERT INTO tags (id, name, created_time) VALUES (?, ?, 0)")
+            .bind::<diesel::sql_types::Integer, _>(id)
+            .bind::<diesel::sql_types::Text, _>(name)
+            .execute(conn)
+            .unwrap();
+    }
+
+    fn insert_insight(
+        conn: &mut SqliteConnection,
+        library_id: i32,
+        rel_path: &str,
+        generated_at: i64,
+        is_current: bool,
+    ) -> i32 {
+        ensure_library(conn, library_id);
+        diesel::sql_query(
+            "INSERT INTO photo_insights (library_id, rel_path, title, summary, generated_at, model_version, is_current, backend) \
+             VALUES (?, ?, 't', 's', ?, 'v', ?, 'local')",
+        )
+        .bind::<diesel::sql_types::Integer, _>(library_id)
+        .bind::<diesel::sql_types::Text, _>(rel_path)
+        .bind::<diesel::sql_types::BigInt, _>(generated_at)
+        .bind::<diesel::sql_types::Bool, _>(is_current)
+        .execute(conn)
+        .unwrap();
+        diesel::sql_query("SELECT last_insert_rowid() AS id")
+            .get_result::<TestId>(conn)
+            .map(|r| r.id)
+            .unwrap()
+    }
+
+    #[derive(QueryableByName)]
+    struct TestId {
+        #[diesel(sql_type = diesel::sql_types::Integer)]
+        id: i32,
+    }
+
+    #[derive(QueryableByName, Debug)]
+    struct HashOnly {
+        #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
+        content_hash: Option<String>,
+    }
+
+    #[derive(QueryableByName, Debug)]
+    struct CurrentRow {
+        #[diesel(sql_type = diesel::sql_types::Integer)]
+        id: i32,
+        #[diesel(sql_type = diesel::sql_types::Bool)]
+        is_current: bool,
+    }
+
+    #[test]
+    fn backfill_fills_tagged_photo_hash_when_image_exif_has_one() {
+        let mut conn = in_memory_db_connection();
+        insert_tag(&mut conn, 1, "vacation");
+        insert_tagged_photo(&mut conn, "trip/IMG.jpg", 1);
+        // No image_exif row yet — backfill no-op.
+        let stats = run(&mut conn);
+        assert_eq!(stats.tagged_photo_hashes_filled, 0);
+
+        // image_exif row appears with a hash; next reconcile fills it.
+        insert_image_exif(&mut conn, 1, "trip/IMG.jpg", Some("hashabc"));
+        let stats = run(&mut conn);
+        assert_eq!(stats.tagged_photo_hashes_filled, 1);
+
+        let row = diesel::sql_query(
+            "SELECT content_hash FROM tagged_photo WHERE rel_path = 'trip/IMG.jpg'",
+        )
+        .get_result::<HashOnly>(&mut conn)
+        .unwrap();
+        assert_eq!(row.content_hash.as_deref(), Some("hashabc"));
+
+        // Idempotent: a second run is a no-op.
+        let stats = run(&mut conn);
+        assert_eq!(stats.tagged_photo_hashes_filled, 0);
+    }
+
+    #[test]
+    fn backfill_skips_tagged_photo_when_image_exif_has_no_hash() {
+        let mut conn = in_memory_db_connection();
+        insert_tag(&mut conn, 1, "vacation");
+        insert_tagged_photo(&mut conn, "trip/IMG.jpg", 1);
+        // image_exif exists but its hash is null.
+        insert_image_exif(&mut conn, 1, "trip/IMG.jpg", None);
+
+        let stats = run(&mut conn);
+        assert_eq!(stats.tagged_photo_hashes_filled, 0);
+    }
+
+    #[test]
+    fn backfill_fills_photo_insights_hash_scoped_by_library() {
+        let mut conn = in_memory_db_connection();
+        // Row in library 1 only — must not be filled by a hash from
+        // library 2's same-rel_path entry.
+        insert_image_exif(&mut conn, 1, "shared.jpg", Some("hash-lib1"));
+        let id1 = insert_insight(&mut conn, 1, "shared.jpg", 100, true);
+
+        let stats = run(&mut conn);
+        assert_eq!(stats.photo_insights_hashes_filled, 1);
+
+        let row = diesel::sql_query("SELECT content_hash FROM photo_insights WHERE id = ?")
+            .bind::<diesel::sql_types::Integer, _>(id1)
+            .get_result::<HashOnly>(&mut conn)
+            .unwrap();
+        assert_eq!(row.content_hash.as_deref(), Some("hash-lib1"));
+    }
+
+    #[test]
+    fn collapse_keeps_earliest_is_current_per_hash() {
+        let mut conn = in_memory_db_connection();
+        // Two libraries, same content_hash via image_exif. Insights
+        // were generated independently in each library, both currently
+        // is_current = true. The earlier one wins.
+        insert_image_exif(&mut conn, 1, "a.jpg", Some("h1"));
+        insert_image_exif(&mut conn, 2, "a.jpg", Some("h1"));
+        let earlier = insert_insight(&mut conn, 1, "a.jpg", 100, true);
+        let later = insert_insight(&mut conn, 2, "a.jpg", 200, true);
+
+        // First pass fills the content_hash; second collapses.
+        let stats = run(&mut conn);
+        assert_eq!(stats.photo_insights_hashes_filled, 2);
+        assert_eq!(stats.photo_insights_demoted, 1);
+
+        let rows = diesel::sql_query("SELECT id, is_current FROM photo_insights ORDER BY id")
+            .get_results::<CurrentRow>(&mut conn)
+            .unwrap();
+        let earlier_row = rows.iter().find(|r| r.id == earlier).unwrap();
+        let later_row = rows.iter().find(|r| r.id == later).unwrap();
+        assert!(
+            earlier_row.is_current,
+            "earlier insight should remain current"
+        );
+        assert!(!later_row.is_current, "later insight should be demoted");
+
+        // Idempotent.
+        let stats = run(&mut conn);
+        assert_eq!(stats.photo_insights_demoted, 0);
+    }
+
+    #[test]
+    fn collapse_does_not_demote_a_solo_current_row() {
+        let mut conn = in_memory_db_connection();
+        insert_image_exif(&mut conn, 1, "a.jpg", Some("h1"));
+        let solo = insert_insight(&mut conn, 1, "a.jpg", 100, true);
+
+        let stats = run(&mut conn);
+        assert_eq!(stats.photo_insights_demoted, 0);
+
+        let row = diesel::sql_query("SELECT id, is_current FROM photo_insights WHERE id = ?")
+            .bind::<diesel::sql_types::Integer, _>(solo)
+            .get_result::<CurrentRow>(&mut conn)
+            .unwrap();
+        assert!(row.is_current);
+    }
+}
@@ -57,6 +57,16 @@ diesel::table! {
        confidence -> Float,
        status -> Text,
        created_at -> BigInt,
+        persona_id -> Text,
+        user_id -> Integer,
+        valid_from -> Nullable<BigInt>,
+        valid_until -> Nullable<BigInt>,
+        superseded_by -> Nullable<Integer>,
+        created_by_model -> Nullable<Text>,
+        created_by_backend -> Nullable<Text>,
+        last_modified_by_model -> Nullable<Text>,
+        last_modified_by_backend -> Nullable<Text>,
+        last_modified_at -> Nullable<BigInt>,
    }
 }

@@ -64,23 +74,45 @@ diesel::table! {
    entity_photo_links (id) {
        id -> Integer,
        entity_id -> Integer,
-        file_path -> Text,
+        library_id -> Integer,
+        rel_path -> Text,
        role -> Text,
    }
 }

+diesel::table! {
+    face_detections (id) {
+        id -> Integer,
+        library_id -> Integer,
+        content_hash -> Text,
+        rel_path -> Text,
+        bbox_x -> Nullable<Float>,
+        bbox_y -> Nullable<Float>,
+        bbox_w -> Nullable<Float>,
+        bbox_h -> Nullable<Float>,
+        embedding -> Nullable<Binary>,
+        confidence -> Nullable<Float>,
+        source -> Text,
+        person_id -> Nullable<Integer>,
+        status -> Text,
+        model_version -> Text,
+        created_at -> BigInt,
+    }
+}
+
 diesel::table! {
    favorites (id) {
        id -> Integer,
        userid -> Integer,
-        path -> Text,
+        rel_path -> Text,
    }
 }

 diesel::table! {
    image_exif (id) {
        id -> Integer,
-        file_path -> Text,
+        library_id -> Integer,
+        rel_path -> Text,
        camera_make -> Nullable<Text>,
        camera_model -> Nullable<Text>,
        lens_model -> Nullable<Text>,
@@ -97,18 +129,26 @@ diesel::table! {
        date_taken -> Nullable<BigInt>,
        created_time -> BigInt,
        last_modified -> BigInt,
+        content_hash -> Nullable<Text>,
+        size_bytes -> Nullable<BigInt>,
+        phash_64 -> Nullable<BigInt>,
+        dhash_64 -> Nullable<BigInt>,
+        duplicate_of_hash -> Nullable<Text>,
+        duplicate_decided_at -> Nullable<BigInt>,
+        date_taken_source -> Nullable<Text>,
+        original_date_taken -> Nullable<BigInt>,
+        original_date_taken_source -> Nullable<Text>,
    }
 }

 diesel::table! {
-    knowledge_embeddings (id) {
+    libraries (id) {
        id -> Integer,
-        keyword -> Text,
-        description -> Text,
-        category -> Nullable<Text>,
-        embedding -> Binary,
+        name -> Text,
+        root_path -> Text,
        created_at -> BigInt,
-        model_version -> Text,
+        enabled -> Bool,
+        excluded_dirs -> Nullable<Text>,
    }
 }

@@ -130,22 +170,40 @@ diesel::table! {
 }

 diesel::table! {
-    message_embeddings (id) {
+    personas (id) {
        id -> Integer,
-        contact -> Text,
-        body -> Text,
-        timestamp -> BigInt,
-        is_sent -> Bool,
-        embedding -> Binary,
+        user_id -> Integer,
+        persona_id -> Text,
+        name -> Text,
+        system_prompt -> Text,
+        is_built_in -> Bool,
+        include_all_memories -> Bool,
        created_at -> BigInt,
-        model_version -> Text,
+        updated_at -> BigInt,
+        reviewed_only_facts -> Bool,
+        allow_agent_corrections -> Bool,
+    }
+}
+
+diesel::table! {
+    persons (id) {
+        id -> Integer,
+        name -> Text,
+        cover_face_id -> Nullable<Integer>,
+        entity_id -> Nullable<Integer>,
+        created_from_tag -> Bool,
+        notes -> Nullable<Text>,
+        created_at -> BigInt,
+        updated_at -> BigInt,
+        is_ignored -> Bool,
    }
 }

 diesel::table! {
    photo_insights (id) {
        id -> Integer,
-        file_path -> Text,
+        library_id -> Integer,
+        rel_path -> Text,
        title -> Text,
        summary -> Text,
        generated_at -> BigInt,
@@ -153,6 +211,9 @@ diesel::table! {
        is_current -> Bool,
        training_messages -> Nullable<Text>,
        approved -> Nullable<Bool>,
+        backend -> Text,
+        fewshot_source_ids -> Nullable<Text>,
+        content_hash -> Nullable<Text>,
    }
 }

@@ -171,9 +232,10 @@ diesel::table! {
 diesel::table! {
    tagged_photo (id) {
        id -> Integer,
-        photo_name -> Text,
+        rel_path -> Text,
        tag_id -> Integer,
        created_time -> BigInt,
+        content_hash -> Nullable<Text>,
    }
 }

@@ -196,7 +258,8 @@ diesel::table! {
 diesel::table! {
    video_preview_clips (id) {
        id -> Integer,
-        file_path -> Text,
+        library_id -> Integer,
+        rel_path -> Text,
        status -> Text,
        duration_seconds -> Nullable<Float>,
        file_size_bytes -> Nullable<Integer>,
@@ -208,7 +271,15 @@ diesel::table! {

 diesel::joinable!(entity_facts -> photo_insights (source_insight_id));
 diesel::joinable!(entity_photo_links -> entities (entity_id));
+diesel::joinable!(entity_photo_links -> libraries (library_id));
+diesel::joinable!(face_detections -> libraries (library_id));
+diesel::joinable!(face_detections -> persons (person_id));
+diesel::joinable!(image_exif -> libraries (library_id));
+diesel::joinable!(personas -> users (user_id));
+diesel::joinable!(persons -> entities (entity_id));
+diesel::joinable!(photo_insights -> libraries (library_id));
 diesel::joinable!(tagged_photo -> tags (tag_id));
+diesel::joinable!(video_preview_clips -> libraries (library_id));

 diesel::allow_tables_to_appear_in_same_query!(
    calendar_events,
@@ -216,11 +287,13 @@ diesel::allow_tables_to_appear_in_same_query!(
    entities,
    entity_facts,
    entity_photo_links,
+    face_detections,
    favorites,
    image_exif,
-    knowledge_embeddings,
+    libraries,
    location_history,
-    message_embeddings,
+    personas,
+    persons,
    photo_insights,
    search_history,
    tagged_photo,
@@ -1,3 +1,5 @@
+#![allow(dead_code)]
+
 use diesel::prelude::*;
 use diesel::sqlite::SqliteConnection;
 use serde::Serialize;
@@ -0,0 +1,507 @@
+//! Canonical `date_taken` resolution for ingest and the per-tick backfill
+//! drain.
+//!
+//! The waterfall (in order; first hit wins):
+//!
+//! 1. **kamadak-exif** — fast in-process EXIF read. Already done by
+//!    `exif::extract_exif_from_path` for image-bearing formats; callers
+//!    pass that result in via `prior_exif_date` so we don't re-parse.
+//! 2. **exiftool** — shell-out fallback that reaches places kamadak-exif
+//!    can't: QuickTime/MP4 (`MediaCreateDate`, `TrackCreateDate`,
+//!    `CreateDate`), Apple's `ContentCreateDate`, MakerNote sub-IFDs.
+//!    Required for videos to land a real date; degrades silently when
+//!    `exiftool` isn't on PATH.
+//! 3. **filename regex** — `memories::extract_date_from_filename` covers
+//!    common screenshot / chat-export / timestamp-named patterns.
+//! 4. **earliest filesystem time** — `utils::earliest_fs_time` picks the
+//!    earlier of created / modified, which on copied-from-backup files is
+//!    a better proxy for content age than either alone.
+//!
+//! `DateSource` records which step won so the per-tick drain can re-resolve
+//! weak sources (`fs_time`) once exiftool becomes available, and so the
+//! UI/debug surface can answer "why does this photo show up under this
+//! date." Note that the previous `/memories` request-time logic preferred
+//! filename even when EXIF was present; this resolver inverts that — EXIF
+//! is authoritative when it exists, on the theory that an EXIF
+//! `DateTimeOriginal` is more reliable than a filename pattern that may
+//! reflect import time rather than capture time.
+
+use std::collections::HashMap;
+use std::io::Write;
+use std::path::{Path, PathBuf};
+use std::process::{Command, Stdio};
+use std::sync::OnceLock;
+
+use chrono::{DateTime, Utc};
+use log::{debug, trace, warn};
+use serde::Deserialize;
+
+use crate::utils::earliest_fs_time;
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum DateSource {
+    /// kamadak-exif read DateTime/DateTimeOriginal directly.
+    Exif,
+    /// exiftool fallback caught a video / MakerNote / QuickTime tag.
+    Exiftool,
+    /// `extract_date_from_filename` matched a known pattern.
+    Filename,
+    /// Fell through to `earliest_fs_time(metadata)`.
+    FsTime,
+}
+
+impl DateSource {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            DateSource::Exif => "exif",
+            DateSource::Exiftool => "exiftool",
+            DateSource::Filename => "filename",
+            DateSource::FsTime => "fs_time",
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct ResolvedDate {
+    pub timestamp: i64,
+    pub source: DateSource,
+}
+
+/// Resolve the canonical date for a single file, given an already-extracted
+/// kamadak-exif date if available. Returns `None` only if every step in the
+/// waterfall fails — for files that exist on disk this should be vanishingly
+/// rare (the fs-time fallback alone almost always succeeds).
+pub fn resolve_date_taken(path: &Path, prior_exif_date: Option<i64>) -> Option<ResolvedDate> {
+    if let Some(ts) = prior_exif_date {
+        return Some(ResolvedDate {
+            timestamp: ts,
+            source: DateSource::Exif,
+        });
+    }
+    if let Some(ts) = exiftool_date_single(path) {
+        return Some(ResolvedDate {
+            timestamp: ts,
+            source: DateSource::Exiftool,
+        });
+    }
+    if let Some(dt) = path
+        .file_name()
+        .and_then(|f| f.to_str())
+        .and_then(crate::memories::extract_date_from_filename)
+    {
+        return Some(ResolvedDate {
+            timestamp: dt.timestamp(),
+            source: DateSource::Filename,
+        });
+    }
+    if let Ok(meta) = std::fs::metadata(path)
+        && let Some(t) = earliest_fs_time(&meta)
+    {
+        let dt: DateTime<Utc> = t.into();
+        return Some(ResolvedDate {
+            timestamp: dt.timestamp(),
+            source: DateSource::FsTime,
+        });
+    }
+    None
+}
+
+/// Batch waterfall. exiftool runs once over the whole batch (single
+/// subprocess); everything else is per-file and runs only on misses.
+/// `prior_exif_dates` lets the caller pass in already-known kamadak dates
+/// keyed by path; entries without a prior date fall through to exiftool
+/// and the rest of the waterfall.
+///
+/// The per-tick backfill drain is the primary caller — it loads ~500 rows
+/// at a time and uses one exiftool subprocess to drain the lot.
+pub fn resolve_dates_batch(
+    paths: &[PathBuf],
+    prior_exif_dates: &HashMap<PathBuf, i64>,
+) -> HashMap<PathBuf, ResolvedDate> {
+    let mut out: HashMap<PathBuf, ResolvedDate> = HashMap::new();
+    let mut needs_exiftool: Vec<&Path> = Vec::with_capacity(paths.len());
+
+    for path in paths {
+        if let Some(&ts) = prior_exif_dates.get(path) {
+            out.insert(
+                path.clone(),
+                ResolvedDate {
+                    timestamp: ts,
+                    source: DateSource::Exif,
+                },
+            );
+        } else {
+            needs_exiftool.push(path.as_path());
+        }
+    }
+
+    if !needs_exiftool.is_empty() {
+        let exiftool_results = exiftool_dates_batch(&needs_exiftool);
+        for path in &needs_exiftool {
+            if let Some(&ts) = exiftool_results.get(*path) {
+                out.insert(
+                    path.to_path_buf(),
+                    ResolvedDate {
+                        timestamp: ts,
+                        source: DateSource::Exiftool,
+                    },
+                );
+            }
+        }
+    }
+
+    for path in paths {
+        if out.contains_key(path) {
+            continue;
+        }
+        if let Some(dt) = path
+            .file_name()
+            .and_then(|f| f.to_str())
+            .and_then(crate::memories::extract_date_from_filename)
+        {
+            out.insert(
+                path.clone(),
+                ResolvedDate {
+                    timestamp: dt.timestamp(),
+                    source: DateSource::Filename,
+                },
+            );
+            continue;
+        }
+        if let Ok(meta) = std::fs::metadata(path)
+            && let Some(t) = earliest_fs_time(&meta)
+        {
+            let dt: DateTime<Utc> = t.into();
+            out.insert(
+                path.clone(),
+                ResolvedDate {
+                    timestamp: dt.timestamp(),
+                    source: DateSource::FsTime,
+                },
+            );
+        }
+    }
+
+    out
+}
+
+/// Tag priority for exiftool extraction. First non-zero value wins.
+///
+/// Photos: `DateTimeOriginal` (original capture) and `SubSecDateTimeOriginal`
+/// are most authoritative. `CreateDate` is a common alias and a sane fallback.
+///
+/// Videos: `MediaCreateDate` / `TrackCreateDate` are the QuickTime/MP4
+/// timestamps. `ContentCreateDate` is Apple's iOS-set tag; it often
+/// reflects local capture time on iPhone exports better than the others.
+///
+/// Notably absent: `FileModifyDate` / `FileAccessDate` — those are
+/// filesystem-derived and the resolver covers them via the `fs_time`
+/// fallback. Letting exiftool pull them here would mask "no real EXIF
+/// date" with a `source = exiftool` row that's no better than fs_time.
+const EXIFTOOL_DATE_TAGS: &[&str] = &[
+    "DateTimeOriginal",
+    "SubSecDateTimeOriginal",
+    "CreateDate",
+    "MediaCreateDate",
+    "TrackCreateDate",
+    "ContentCreateDate",
+];
+
+/// Cache the "exiftool exists on PATH" check across the process lifetime so
+/// the per-tick backfill doesn't fork a doomed subprocess every iteration on
+/// deploys without exiftool installed.
+fn exiftool_available() -> bool {
+    static AVAIL: OnceLock<bool> = OnceLock::new();
+    *AVAIL.get_or_init(|| {
+        let ok = Command::new("exiftool")
+            .arg("-ver")
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .status()
+            .map(|s| s.success())
+            .unwrap_or(false);
+        if !ok {
+            warn!("exiftool not on PATH; date_taken waterfall skips that step");
+        }
+        ok
+    })
+}
+
+/// One-file exiftool invocation. Used by the upload + GPS-write paths,
+/// which deal with one file at a time. The batch path uses
+/// `exiftool_dates_batch` so we don't pay subprocess startup per row.
+///
+/// Notably absent: `-fast` / `-fast2`. For QuickTime/MP4 files whose
+/// `moov` atom sits at the end (non-faststart, common for Snapchat
+/// exports and any MP4 muxed without `-movflags +faststart`), `-fast2`
+/// causes exiftool to skip the trailer and return no `CreateDate` /
+/// `MediaCreateDate`, dropping us to the `fs_time` fallback for files
+/// that actually have a real capture date. We pre-filter to files that
+/// kamadak-exif couldn't read, so the JPEG fast-path is already covered
+/// — paying full-scan cost on the residual is the right trade.
+fn exiftool_date_single(path: &Path) -> Option<i64> {
+    if !exiftool_available() {
+        return None;
+    }
+    let mut cmd = Command::new("exiftool");
+    cmd.arg("-j").arg("-q").arg("-d").arg("%s");
+    for tag in EXIFTOOL_DATE_TAGS {
+        cmd.arg(format!("-{}", tag));
+    }
+    cmd.arg(path);
+    let output = cmd.output().ok()?;
+    if !output.status.success() {
+        trace!("exiftool exited non-zero for {:?}", path);
+        return None;
+    }
+    parse_exiftool_json(&output.stdout)
+        .into_iter()
+        .next()
+        .map(|(_, ts)| ts)
+}
+
+/// Drain a batch via a single exiftool subprocess. Paths are fed on stdin
+/// via `-@ -`, so the argv stays short regardless of batch size — safe for
+/// libraries with very long path components.
+fn exiftool_dates_batch(paths: &[&Path]) -> HashMap<PathBuf, i64> {
+    let mut out = HashMap::new();
+    if paths.is_empty() || !exiftool_available() {
+        return out;
+    }
+
+    let mut cmd = Command::new("exiftool");
+    // No `-fast2` — see exiftool_date_single for the rationale (QuickTime
+    // moov-at-end files miss CreateDate / MediaCreateDate when the trailer
+    // is skipped).
+    cmd.arg("-j").arg("-q").arg("-d").arg("%s");
+    for tag in EXIFTOOL_DATE_TAGS {
+        cmd.arg(format!("-{}", tag));
+    }
+    cmd.arg("-@").arg("-");
+    cmd.stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .stderr(Stdio::null());
+
+    let mut child = match cmd.spawn() {
+        Ok(c) => c,
+        Err(e) => {
+            warn!("exiftool batch spawn failed: {}", e);
+            return out;
+        }
+    };
+
+    if let Some(mut stdin) = child.stdin.take() {
+        for p in paths {
+            // exiftool's argfile reader treats each line as one path; OS
+            // path bytes don't always survive a String round-trip, but
+            // every path we get here originated from rel_path / root_path
+            // strings already, so to-string-lossy is a non-event.
+            if let Err(e) = writeln!(stdin, "{}", p.display()) {
+                warn!("exiftool batch stdin write failed: {}", e);
+                break;
+            }
+        }
+    }
+
+    let output = match child.wait_with_output() {
+        Ok(o) => o,
+        Err(e) => {
+            warn!("exiftool batch wait failed: {}", e);
+            return out;
+        }
+    };
+    if !output.status.success() {
+        debug!(
+            "exiftool batch exit status {:?}; partial output may still parse",
+            output.status.code()
+        );
+    }
+    for (source, ts) in parse_exiftool_json(&output.stdout) {
+        out.insert(PathBuf::from(source), ts);
+    }
+    out
+}
+
+/// One row per input file. exiftool emits any tag we asked for that was
+/// present, plus the `SourceFile` it was reading. Tags are JSON values
+/// because `-d %s` returns the timestamp as a *string* of digits, not a
+/// number, when the date parses; absent tags are simply missing keys.
+#[derive(Debug, Deserialize)]
+struct ExiftoolEntry {
+    #[serde(rename = "SourceFile")]
+    source_file: String,
+    #[serde(rename = "DateTimeOriginal")]
+    date_time_original: Option<serde_json::Value>,
+    #[serde(rename = "SubSecDateTimeOriginal")]
+    sub_sec_date_time_original: Option<serde_json::Value>,
+    #[serde(rename = "CreateDate")]
+    create_date: Option<serde_json::Value>,
+    #[serde(rename = "MediaCreateDate")]
+    media_create_date: Option<serde_json::Value>,
+    #[serde(rename = "TrackCreateDate")]
+    track_create_date: Option<serde_json::Value>,
+    #[serde(rename = "ContentCreateDate")]
+    content_create_date: Option<serde_json::Value>,
+}
+
+fn parse_exiftool_json(stdout: &[u8]) -> Vec<(String, i64)> {
+    let entries: Vec<ExiftoolEntry> = match serde_json::from_slice(stdout) {
+        Ok(v) => v,
+        Err(e) => {
+            // Empty stdout on total failure isn't a parse error worth
+            // logging at warn — the caller already noted the non-zero
+            // exit status.
+            if !stdout.is_empty() {
+                warn!("exiftool JSON parse failed: {}", e);
+            }
+            return Vec::new();
+        }
+    };
+
+    let mut out = Vec::with_capacity(entries.len());
+    for entry in entries {
+        // Walk the priority list. exiftool sometimes returns the literal
+        // string "0000:00:00 00:00:00" for missing-but-allocated date
+        // slots; with `-d %s` that becomes the unix epoch (0). Reject
+        // anything <= 0 so we fall through to the next tag.
+        let tags = [
+            entry.date_time_original.as_ref(),
+            entry.sub_sec_date_time_original.as_ref(),
+            entry.create_date.as_ref(),
+            entry.media_create_date.as_ref(),
+            entry.track_create_date.as_ref(),
+            entry.content_create_date.as_ref(),
+        ];
+        let mut chosen: Option<i64> = None;
+        for tag in tags.iter().flatten() {
+            if let Some(ts) = coerce_to_unix_seconds(tag)
+                && ts > 0
+            {
+                chosen = Some(ts);
+                break;
+            }
+        }
+        if let Some(ts) = chosen {
+            out.push((entry.source_file, ts));
+        }
+    }
+    out
+}
+
+/// `-d %s` should hand us a numeric string, but exiftool's JSON encoder
+/// will emit a number when the tag was defined as numeric in its lib —
+/// accept both shapes.
+fn coerce_to_unix_seconds(v: &serde_json::Value) -> Option<i64> {
+    match v {
+        serde_json::Value::String(s) => s.trim().parse::<i64>().ok(),
+        serde_json::Value::Number(n) => n.as_i64(),
+        _ => None,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_exiftool_json_picks_first_priority_tag() {
+        let json = br#"[{
+            "SourceFile": "/lib/IMG.jpg",
+            "DateTimeOriginal": "1500000000",
+            "CreateDate": "1400000000"
+        }]"#;
+        let parsed = parse_exiftool_json(json);
+        assert_eq!(parsed, vec![("/lib/IMG.jpg".to_string(), 1500000000)]);
+    }
+
+    #[test]
+    fn parse_exiftool_json_falls_through_zeros() {
+        // exiftool emits "0000:00:00 00:00:00" → unix epoch 0 with -d %s.
+        // The resolver should skip those and pick the next tag.
+        let json = br#"[{
+            "SourceFile": "/lib/clip.mov",
+            "DateTimeOriginal": "0",
+            "MediaCreateDate": "1500000000"
+        }]"#;
+        let parsed = parse_exiftool_json(json);
+        assert_eq!(parsed, vec![("/lib/clip.mov".to_string(), 1500000000)]);
+    }
+
+    #[test]
+    fn parse_exiftool_json_accepts_numeric_values() {
+        let json = br#"[{
+            "SourceFile": "/lib/a.jpg",
+            "CreateDate": 1234567890
+        }]"#;
+        let parsed = parse_exiftool_json(json);
+        assert_eq!(parsed, vec![("/lib/a.jpg".to_string(), 1234567890)]);
+    }
+
+    #[test]
+    fn parse_exiftool_json_emits_nothing_when_no_tag_present() {
+        let json = br#"[{"SourceFile": "/lib/no_dates.bin"}]"#;
+        let parsed = parse_exiftool_json(json);
+        assert!(parsed.is_empty());
+    }
+
+    #[test]
+    fn parse_exiftool_json_handles_multiple_entries() {
+        let json = br#"[
+            {"SourceFile": "/lib/a.jpg", "DateTimeOriginal": "100"},
+            {"SourceFile": "/lib/b.jpg", "CreateDate": "200"}
+        ]"#;
+        let parsed = parse_exiftool_json(json);
+        assert_eq!(
+            parsed,
+            vec![
+                ("/lib/a.jpg".to_string(), 100),
+                ("/lib/b.jpg".to_string(), 200)
+            ]
+        );
+    }
+
+    #[test]
+    fn date_source_as_str_round_trip() {
+        for src in [
+            DateSource::Exif,
+            DateSource::Exiftool,
+            DateSource::Filename,
+            DateSource::FsTime,
+        ] {
+            assert!(!src.as_str().is_empty());
+        }
+    }
+
+    #[test]
+    fn resolve_uses_prior_exif_when_present() {
+        // Path doesn't need to exist when prior_exif_date short-circuits.
+        let resolved =
+            resolve_date_taken(Path::new("/nonexistent/file.jpg"), Some(1700000000)).unwrap();
+        assert_eq!(resolved.timestamp, 1700000000);
+        assert_eq!(resolved.source, DateSource::Exif);
+    }
+
+    #[test]
+    fn resolve_filename_when_no_exif_and_file_missing() {
+        // No prior EXIF, no exiftool match (file missing), but the filename
+        // pattern still matches so the resolver lands on Filename.
+        let resolved = resolve_date_taken(
+            Path::new("/nonexistent/Screenshot_2014-06-01-20-44-50.png"),
+            None,
+        )
+        .unwrap();
+        assert_eq!(resolved.source, DateSource::Filename);
+    }
+
+    #[test]
+    fn resolve_fs_time_when_only_metadata_available() {
+        let dir = tempfile::tempdir().unwrap();
+        let path = dir.path().join("plain.jpg");
+        std::fs::File::create(&path).unwrap();
+        let resolved = resolve_date_taken(&path, None).unwrap();
+        // exiftool may or may not be installed in the test env; either
+        // way the file has no EXIF and no filename date, so we should
+        // fall to fs_time.
+        assert_eq!(resolved.source, DateSource::FsTime);
+    }
+}
@@ -1,9 +1,11 @@
 use std::fs::File;
-use std::io::BufReader;
+use std::io::{BufReader, Read, Seek, SeekFrom};
 use std::path::Path;
+use std::process::Command;

 use anyhow::{Result, anyhow};
 use exif::{In, Reader, Tag, Value};
+use image::DynamicImage;
 use log::debug;
 use serde::{Deserialize, Serialize};

@@ -25,6 +27,233 @@ pub struct ExifData {
    pub date_taken: Option<i64>,
 }

+/// TIFF-based RAW formats where `JPEGInterchangeFormat` offsets are
+/// absolute file offsets (the file itself is a TIFF container).
+pub fn is_tiff_raw(path: &Path) -> bool {
+    matches!(
+        path.extension()
+            .and_then(|e| e.to_str())
+            .map(|s| s.to_lowercase())
+            .as_deref(),
+        Some(
+            "tiff" | "tif" | "nef" | "cr2" | "arw" | "dng" | "raf" | "orf" | "rw2" | "pef" | "srw"
+        )
+    )
+}
+
+/// Read the JPEG bytes pointed to by `JPEGInterchangeFormat` /
+/// `JPEGInterchangeFormatLength` in a single IFD. Returns `None` on any
+/// failure: tags missing, length zero, file read failure, or bytes that
+/// don't start with the JPEG SOI marker (some MakerNote pointers reference
+/// TIFF-wrapped previews or other non-JPEG payloads we can't load).
+fn read_jpeg_at_ifd(exif: &exif::Exif, path: &Path, ifd: In) -> Option<Vec<u8>> {
+    let offset = exif
+        .get_field(Tag::JPEGInterchangeFormat, ifd)?
+        .value
+        .get_uint(0)?;
+    let length = exif
+        .get_field(Tag::JPEGInterchangeFormatLength, ifd)?
+        .value
+        .get_uint(0)?;
+    if length == 0 {
+        return None;
+    }
+
+    let mut file = File::open(path).ok()?;
+    file.seek(SeekFrom::Start(offset as u64)).ok()?;
+    let mut buf = vec![0u8; length as usize];
+    file.read_exact(&mut buf).ok()?;
+
+    if buf.len() < 2 || buf[0] != 0xFF || buf[1] != 0xD8 {
+        return None;
+    }
+
+    Some(buf)
+}
+
+/// Shell out to `exiftool -j -G -n <path>` and return the per-file tag map.
+///
+/// `-j` requests JSON; the response is always an array of one element per
+/// input path. `-G` prefixes each key with the group name (`EXIF:Make`,
+/// `MakerNotes:LensInfo`, `File:FileSize`, …) so a UI can group the dump.
+/// `-n` returns numeric / raw values rather than exiftool's pretty-printed
+/// human strings, which keeps the output stable for clients that want to
+/// reformat (e.g. divide a focal-length numerator/denominator).
+///
+/// Returns:
+///  - `Ok(Some(value))` — the parsed object for this file.
+///  - `Ok(None)` — exiftool ran but the array was empty / not an object.
+///  - `Err(_)` — exiftool isn't on PATH, the spawn failed, or its stderr
+///    indicates an unsupported file. Caller surfaces a 503 / 422.
+///
+/// Used by `GET /image/exif/full` to power Apollo's DETAILS modal "FULL
+/// EXIF" pane. Per-file shell-out is fine for this on-demand surface;
+/// the indexer does NOT call this on the hot path (kamadak-exif covers
+/// the indexed columns; exiftool is the slow-path preview helper).
+pub fn read_full_exif_via_exiftool(path: &Path) -> Result<Option<serde_json::Value>> {
+    let output = Command::new("exiftool")
+        .arg("-j")
+        .arg("-G")
+        .arg("-n")
+        .arg(path)
+        .output()
+        .map_err(|e| anyhow!("exiftool spawn failed (is it on PATH?): {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(anyhow!(
+            "exiftool exited with {}: {}",
+            output.status,
+            stderr.trim()
+        ));
+    }
+
+    let parsed: serde_json::Value = serde_json::from_slice(&output.stdout)
+        .map_err(|e| anyhow!("exiftool returned non-JSON output: {}", e))?;
+
+    // `-j` always wraps the result in an array — pull out the first object.
+    let arr = parsed
+        .as_array()
+        .ok_or_else(|| anyhow!("expected JSON array from exiftool -j"))?;
+    Ok(arr.first().cloned())
+}
+
+/// Tags exiftool exposes for embedded JPEG previews, in priority order. The
+/// largest valid JPEG returned by any of them wins. Different camera makers
+/// stash their largest preview under different names: Nikon's full-res
+/// preview lives under `PreviewImage` in the MakerNote `PreviewIFD`, Canon /
+/// Sony often expose theirs as `JpgFromRaw`, and `OtherImage` is a catch-all
+/// some sub-IFD chains use.
+const EXIFTOOL_PREVIEW_TAGS: &[&str] = &["PreviewImage", "JpgFromRaw", "OtherImage"];
+
+/// Shell out to `exiftool -b -<tag>` for one tag. Returns the response bytes
+/// only if exiftool succeeded AND the bytes start with the JPEG SOI marker
+/// (some MakerNote tags hold TIFF-wrapped previews or other non-JPEG payloads
+/// we can't load).
+fn extract_exiftool_tag(path: &Path, tag: &str) -> Option<Vec<u8>> {
+    let output = Command::new("exiftool")
+        .arg("-b")
+        .arg(format!("-{}", tag))
+        .arg(path)
+        .output()
+        .ok()?;
+
+    if !output.status.success() {
+        return None;
+    }
+    let bytes = output.stdout;
+    if bytes.len() < 2 || bytes[0] != 0xFF || bytes[1] != 0xD8 {
+        return None;
+    }
+    Some(bytes)
+}
+
+/// Try each EXIFTOOL_PREVIEW_TAGS in turn and return the largest valid JPEG.
+/// If `exiftool` isn't on PATH the very first spawn returns `None` and we
+/// silently bail — callers fall back to whatever the IFD0/IFD1 fast path
+/// found.
+fn extract_preview_via_exiftool(path: &Path) -> Option<Vec<u8>> {
+    let mut best: Option<Vec<u8>> = None;
+    for &tag in EXIFTOOL_PREVIEW_TAGS {
+        let Some(bytes) = extract_exiftool_tag(path, tag) else {
+            continue;
+        };
+        match &best {
+            None => best = Some(bytes),
+            Some(b) if b.len() < bytes.len() => best = Some(bytes),
+            _ => {}
+        }
+    }
+    best
+}
+
+/// Returns the bytes of the embedded JPEG preview in a TIFF-based RAW or
+/// TIFF file. Used to thumbnail formats whose RAW pixel data can't be decoded
+/// by our normal tools (e.g. Sony ARW), and to serve a usable full-size
+/// image for clients that can't decode the RAW container directly. Returns
+/// `None` if no preview is present, the file isn't a TIFF container, or the
+/// data doesn't look like a valid JPEG.
+///
+/// Strategy:
+///   1. Fast path: read `JPEGInterchangeFormat` from IFD0 (PRIMARY) and IFD1
+///      (THUMBNAIL) directly via kamadak-exif. No subprocess, no external
+///      dependency.
+///   2. Slow path: shell out to `exiftool -b -<tag>` for each of
+///      `PreviewImage` / `JpgFromRaw` / `OtherImage`. kamadak-exif can't
+///      reach SubIFDs or MakerNote sub-IFDs, but most modern Nikon bodies
+///      stash their large preview JPEG in the Nikon MakerNote's PreviewIFD;
+///      Canon / Sony often use `JpgFromRaw` in a SubIFD chain. Skipped
+///      gracefully if exiftool isn't on PATH.
+///
+/// All candidates are pooled and the largest valid JPEG wins, so a deploy
+/// without exiftool degrades to "fast-path only" behavior rather than
+/// breaking outright.
+pub fn extract_embedded_jpeg_preview(path: &Path) -> Option<Vec<u8>> {
+    if !is_tiff_raw(path) {
+        return None;
+    }
+
+    let file = File::open(path).ok()?;
+    let mut bufreader = BufReader::new(file);
+    let exif = Reader::new().read_from_container(&mut bufreader).ok()?;
+
+    let primary = read_jpeg_at_ifd(&exif, path, In::PRIMARY);
+    let thumbnail = read_jpeg_at_ifd(&exif, path, In::THUMBNAIL);
+    let exiftool = extract_preview_via_exiftool(path);
+
+    [primary, thumbnail, exiftool]
+        .into_iter()
+        .flatten()
+        .max_by_key(|v| v.len())
+}
+
+/// Write GPS lat/lon into the file's EXIF in place via exiftool. Touches
+/// nothing else — camera, dates, MakerNote, etc. all stay as-is. Uses
+/// `-overwrite_original` so no `.orig` sidecar is left behind (the
+/// caller's responsibility to back up the file system if they want
+/// rollback). Returns Err if exiftool isn't on PATH, the file format
+/// doesn't support EXIF, lat/lon are out of range, or exiftool prints
+/// to stderr.
+///
+/// We pass lat/lon as positive decimal numbers and let the *Ref tags
+/// carry the sign (N/S, E/W). exiftool happily accepts signed decimals
+/// too, but the explicit ref form is unambiguous across exiftool
+/// versions and matches what cameras write.
+pub fn write_gps(path: &Path, lat: f64, lon: f64) -> Result<()> {
+    if !supports_exif(path) {
+        return Err(anyhow!(
+            "Format does not support EXIF GPS write: {}",
+            path.display()
+        ));
+    }
+    if !(-90.0..=90.0).contains(&lat) || !(-180.0..=180.0).contains(&lon) {
+        return Err(anyhow!("GPS coordinates out of range: {}, {}", lat, lon));
+    }
+    let lat_ref = if lat >= 0.0 { "N" } else { "S" };
+    let lon_ref = if lon >= 0.0 { "E" } else { "W" };
+    let lat_abs = lat.abs();
+    let lon_abs = lon.abs();
+    let output = Command::new("exiftool")
+        .arg("-overwrite_original")
+        .arg("-P")
+        .arg(format!("-GPSLatitude={}", lat_abs))
+        .arg(format!("-GPSLatitudeRef={}", lat_ref))
+        .arg(format!("-GPSLongitude={}", lon_abs))
+        .arg(format!("-GPSLongitudeRef={}", lon_ref))
+        .arg(path)
+        .output()
+        .map_err(|e| anyhow!("exiftool spawn failed (is it on PATH?): {}", e))?;
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(anyhow!(
+            "exiftool failed (exit {}): {}",
+            output.status.code().unwrap_or(-1),
+            stderr.trim()
+        ));
+    }
+    Ok(())
+}
+
 pub fn supports_exif(path: &Path) -> bool {
    if let Some(ext) = path.extension() {
        let ext_lower = ext.to_string_lossy().to_lowercase();
@@ -123,6 +352,37 @@ pub fn extract_exif_from_path(path: &Path) -> Result<ExifData> {
    Ok(data)
 }

+/// Read just the EXIF Orientation tag (1..=8) from a file. Cheaper than a
+/// full `extract_exif_from_path` when the caller only needs orientation —
+/// e.g. the thumbnail pipeline, which has to bake the rotation into the
+/// resized pixels because the saved thumb has no EXIF chunk for the browser
+/// to apply.
+pub fn read_orientation(path: &Path) -> Option<i32> {
+    let file = File::open(path).ok()?;
+    let mut reader = BufReader::new(file);
+    let exif = Reader::new().read_from_container(&mut reader).ok()?;
+    let field = exif.get_field(Tag::Orientation, In::PRIMARY)?;
+    get_u32_value(field).map(|v| v as i32)
+}
+
+/// Apply an EXIF Orientation (1..=8) to a `DynamicImage`, returning a
+/// canonically-oriented copy. Orientations:
+///   1 → as-is, 2 → flipH, 3 → rot180, 4 → flipV,
+///   5 → rot90CW + flipH, 6 → rot90CW, 7 → rot270CW + flipH, 8 → rot270CW.
+/// Anything else (missing tag, garbage values) is returned unchanged.
+pub fn apply_orientation(img: DynamicImage, orientation: i32) -> DynamicImage {
+    match orientation {
+        2 => img.fliph(),
+        3 => img.rotate180(),
+        4 => img.flipv(),
+        5 => img.rotate90().fliph(),
+        6 => img.rotate90(),
+        7 => img.rotate270().fliph(),
+        8 => img.rotate270(),
+        _ => img,
+    }
+}
+
 fn get_string_value(field: &exif::Field) -> Option<String> {
    match &field.value {
        Value::Ascii(vec) => {
@@ -0,0 +1,590 @@
+//! Face-detection pass for the file watcher.
+//!
+//! `process_new_files` calls [`run_face_detection_pass`] after the EXIF
+//! registration loop. We walk the candidates (images, not yet face-scanned,
+//! not excluded by EXCLUDED_DIRS), fan out parallel detect calls to Apollo,
+//! and persist the results — detected faces, `no_faces` markers when Apollo
+//! found nothing, `failed` markers on permanent decode errors, no marker on
+//! transient failures so the next scan retries.
+//!
+//! The watcher runs in a plain `std::thread`, so we build a short-lived
+//! tokio runtime per pass and `block_on` a join of K detect futures. K is
+//! configurable via `FACE_DETECT_CONCURRENCY` (default 8). Apollo's
+//! threadpool is bounded to 1–2 workers anyway, so the runs queue
+//! server-side; the client-side fan-out is purely about overlapping IO
+//! (file read + JSON encode) with someone else's inference.
+
+use crate::ai::face_client::{DetectMeta, FaceClient, FaceDetectError};
+use crate::exif;
+use crate::faces::{self, FaceDao, InsertFaceDetectionInput};
+use crate::file_types;
+use crate::libraries::Library;
+use crate::memories::PathExcluder;
+use crate::tags::TagDao;
+use log::{debug, info, warn};
+use std::path::Path;
+use std::sync::{Arc, Mutex};
+use tokio::sync::Semaphore;
+
+/// One file the watcher would like to face-scan. Built by the caller from
+/// the EXIF batch (we need `content_hash` to key everything against).
+#[derive(Debug, Clone)]
+pub struct FaceCandidate {
+    pub rel_path: String,
+    pub content_hash: String,
+}
+
+/// Synchronous entry point. Returns once every candidate has been
+/// processed (or definitively skipped). When `face_client.is_enabled()`
+/// is false this is a no-op so the watcher can call unconditionally.
+pub fn run_face_detection_pass(
+    library: &Library,
+    excluded_dirs: &[String],
+    face_client: &FaceClient,
+    face_dao: Arc<Mutex<Box<dyn FaceDao>>>,
+    tag_dao: Arc<Mutex<Box<dyn TagDao>>>,
+    candidates: Vec<FaceCandidate>,
+) {
+    if !face_client.is_enabled() {
+        return;
+    }
+    if candidates.is_empty() {
+        return;
+    }
+
+    let base = Path::new(&library.root_path);
+    let filtered = filter_excluded(base, excluded_dirs, candidates, Some(&library.name));
+    if filtered.is_empty() {
+        return;
+    }
+
+    let concurrency: usize = std::env::var("FACE_DETECT_CONCURRENCY")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|n: &usize| *n > 0)
+        .unwrap_or(8);
+
+    info!(
+        "face_watch: running detection on {} candidates (library '{}', concurrency {})",
+        filtered.len(),
+        library.name,
+        concurrency
+    );
+
+    // Per-pass tokio runtime. The watcher thread isn't in any pre-existing
+    // async context — building one here keeps the rest of the watcher
+    // sync-only. Worker count is small; the parallelism we care about is
+    // task-level (semaphore) not thread-level.
+    let rt = match tokio::runtime::Builder::new_multi_thread()
+        .worker_threads(2)
+        .enable_all()
+        .build()
+    {
+        Ok(rt) => rt,
+        Err(e) => {
+            warn!("face_watch: failed to build tokio runtime: {e}");
+            return;
+        }
+    };
+
+    let library_id = library.id;
+    let library_root = library.root_path.clone();
+    rt.block_on(async move {
+        let sem = Arc::new(Semaphore::new(concurrency));
+        let mut handles = Vec::with_capacity(filtered.len());
+        for cand in filtered {
+            let permit_sem = sem.clone();
+            let face_client = face_client.clone();
+            let face_dao = face_dao.clone();
+            let tag_dao = tag_dao.clone();
+            let library_root = library_root.clone();
+            handles.push(tokio::spawn(async move {
+                // acquire_owned would let us drop the permit explicitly
+                // before await points; for a one-shot call into Apollo
+                // the simpler bounded acquire is enough.
+                let _permit = permit_sem.acquire().await.expect("face semaphore");
+                process_one(
+                    library_id,
+                    &library_root,
+                    cand,
+                    &face_client,
+                    face_dao,
+                    tag_dao,
+                )
+                .await;
+            }));
+        }
+        for h in handles {
+            // join; per-task panics are logged inside process_one before
+            // they reach here, so we don't propagate.
+            let _ = h.await;
+        }
+    });
+}
+
+async fn process_one(
+    library_id: i32,
+    library_root: &str,
+    cand: FaceCandidate,
+    face_client: &FaceClient,
+    face_dao: Arc<Mutex<Box<dyn FaceDao>>>,
+    tag_dao: Arc<Mutex<Box<dyn TagDao>>>,
+) {
+    let abs = Path::new(library_root).join(&cand.rel_path);
+    // Read the bytes off disk in a blocking-friendly task. Filesystem IO
+    // is sync but cheap; a small spawn_blocking would be overkill.
+    let bytes = match read_image_bytes_for_detect(&abs) {
+        Ok(b) => b,
+        Err(e) => {
+            // Don't mark — file may have been moved/renamed mid-scan; let
+            // the next pass try again. Future-bug check: a permanently
+            // unreadable file would loop forever; we accept that for v1
+            // because process_new_files already prunes vanished rows on
+            // full scans.
+            warn!(
+                "face_watch: read failed for {} ({}): {}",
+                cand.rel_path, library_id, e
+            );
+            return;
+        }
+    };
+
+    let meta = DetectMeta {
+        content_hash: cand.content_hash.clone(),
+        library_id,
+        rel_path: cand.rel_path.clone(),
+        orientation: None,
+        model_version: None,
+    };
+    let ctx = opentelemetry::Context::current();
+
+    match face_client.detect(bytes, meta).await {
+        Ok(resp) => {
+            // Stage 1: persist detections, holding the dao lock only
+            // across synchronous DB writes.
+            let mut stored_for_autobind: Vec<(i32, Vec<f32>)> = Vec::new();
+            {
+                let mut dao = face_dao.lock().expect("face dao");
+                if resp.faces.is_empty() {
+                    if let Err(e) = dao.mark_status(
+                        &ctx,
+                        library_id,
+                        &cand.content_hash,
+                        &cand.rel_path,
+                        "no_faces",
+                        &resp.model_version,
+                    ) {
+                        warn!(
+                            "face_watch: mark no_faces failed for {}: {:?}",
+                            cand.rel_path, e
+                        );
+                    }
+                    debug!(
+                        "face_watch: {} → no faces (model {})",
+                        cand.rel_path, resp.model_version
+                    );
+                } else {
+                    let face_count = resp.faces.len();
+                    for face in &resp.faces {
+                        let emb = match face.decode_embedding() {
+                            Ok(b) => b,
+                            Err(e) => {
+                                warn!("face_watch: bad embedding for {}: {:?}", cand.rel_path, e);
+                                continue;
+                            }
+                        };
+                        // Decode the f32 vector once for auto-bind comparison.
+                        let emb_floats = faces::decode_embedding_bytes(&emb);
+                        match dao.store_detection(
+                            &ctx,
+                            InsertFaceDetectionInput {
+                                library_id,
+                                content_hash: cand.content_hash.clone(),
+                                rel_path: cand.rel_path.clone(),
+                                bbox: Some((face.bbox.x, face.bbox.y, face.bbox.w, face.bbox.h)),
+                                embedding: Some(emb),
+                                confidence: Some(face.confidence),
+                                source: "auto".to_string(),
+                                person_id: None,
+                                status: "detected".to_string(),
+                                model_version: resp.model_version.clone(),
+                            },
+                        ) {
+                            Ok(row) => {
+                                if let Some(floats) = emb_floats {
+                                    stored_for_autobind.push((row.id, floats));
+                                }
+                            }
+                            Err(e) => warn!(
+                                "face_watch: store_detection failed for {}: {:?}",
+                                cand.rel_path, e
+                            ),
+                        }
+                    }
+                    info!(
+                        "face_watch: {} → {} face(s) ({}ms, {})",
+                        cand.rel_path, face_count, resp.duration_ms, resp.model_version
+                    );
+                }
+            }
+
+            // Stage 2: auto-bind newly-stored faces against same-named
+            // people-tags. Done outside the dao lock so the lookups don't
+            // serialize with concurrent detect tasks.
+            if !stored_for_autobind.is_empty() {
+                try_auto_bind(
+                    &ctx,
+                    &cand.rel_path,
+                    &resp.model_version,
+                    stored_for_autobind,
+                    &tag_dao,
+                    &face_dao,
+                );
+            }
+        }
+        Err(FaceDetectError::Permanent(e)) => {
+            warn!(
+                "face_watch: permanent failure on {}: {} — marking failed",
+                cand.rel_path, e
+            );
+            let mut dao = face_dao.lock().expect("face dao");
+            // model_version is best-effort here — the engine that rejected
+            // the bytes may not have echoed one. Empty string is fine; this
+            // row is purely a "don't retry" sentinel.
+            if let Err(e) = dao.mark_status(
+                &ctx,
+                library_id,
+                &cand.content_hash,
+                &cand.rel_path,
+                "failed",
+                "",
+            ) {
+                warn!(
+                    "face_watch: mark failed errored for {}: {:?}",
+                    cand.rel_path, e
+                );
+            }
+        }
+        Err(FaceDetectError::Transient(e)) => {
+            // Don't mark anything; next scan tick retries naturally.
+            // Demoted to debug because OOM and engine-not-ready are noisy
+            // and self-resolving.
+            debug!(
+                "face_watch: transient on {}: {} (will retry next pass)",
+                cand.rel_path, e
+            );
+        }
+        Err(FaceDetectError::Disabled) => {
+            // Caller already checked is_enabled(); this branch is defensive.
+        }
+    }
+}
+
+/// Auto-bind newly-detected faces to a same-named person, when a tag on the
+/// photo unambiguously identifies one. Driven by `FACE_AUTOBIND_MIN_COS`
+/// (default 0.4): the new face's embedding must reach this cosine
+/// similarity against the L2-normalized mean of the person's existing
+/// faces. The first face for a person binds unconditionally — there's
+/// nothing to compare against, and the alternative ("never bind without
+/// a reference") would mean bootstrap never kicks off.
+///
+/// Multi-match (the photo carries tags for two different known persons)
+/// is intentionally a no-op — we can't tell which face is which without
+/// additional matching. Those faces stay unassigned for the cluster
+/// suggester (Phase 6) to handle.
+fn try_auto_bind(
+    ctx: &opentelemetry::Context,
+    rel_path: &str,
+    model_version: &str,
+    new_faces: Vec<(i32, Vec<f32>)>, // (face_id, decoded embedding)
+    tag_dao: &Arc<Mutex<Box<dyn TagDao>>>,
+    face_dao: &Arc<Mutex<Box<dyn FaceDao>>>,
+) {
+    // 1. Pull the photo's tags.
+    let tag_names: Vec<String> = {
+        let mut td = tag_dao.lock().expect("tag dao");
+        match td.get_tags_for_path(ctx, rel_path) {
+            Ok(tags) => tags.into_iter().map(|t| t.name).collect(),
+            Err(e) => {
+                warn!(
+                    "face_watch: get_tags_for_path failed for {}: {:?}",
+                    rel_path, e
+                );
+                return;
+            }
+        }
+    };
+    if tag_names.is_empty() {
+        return;
+    }
+
+    // 2. Find tags that map to existing persons (case-insensitive).
+    let person_for_tag: std::collections::HashMap<String, i32> = {
+        let mut fd = face_dao.lock().expect("face dao");
+        match fd.find_persons_by_names_ci(ctx, &tag_names) {
+            Ok(m) => m,
+            Err(e) => {
+                warn!(
+                    "face_watch: find_persons_by_names_ci failed for {}: {:?}",
+                    rel_path, e
+                );
+                return;
+            }
+        }
+    };
+
+    // 3. Multi-match: ambiguous, skip. Single match: candidate person.
+    let unique_person_ids: std::collections::HashSet<i32> =
+        person_for_tag.values().copied().collect();
+    if unique_person_ids.len() != 1 {
+        if !unique_person_ids.is_empty() {
+            debug!(
+                "face_watch: {} carries tags for {} different persons; skipping auto-bind",
+                rel_path,
+                unique_person_ids.len()
+            );
+        }
+        return;
+    }
+    let person_id = *unique_person_ids.iter().next().expect("nonempty set");
+
+    let threshold: f32 = std::env::var("FACE_AUTOBIND_MIN_COS")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|t: &f32| *t >= 0.0 && *t <= 1.0)
+        .unwrap_or(0.4);
+
+    // 4. Reference embedding (if any) under the same model_version.
+    let reference: Option<Vec<f32>> = {
+        let mut fd = face_dao.lock().expect("face dao");
+        match fd.person_reference_embedding(ctx, person_id, model_version) {
+            Ok(r) => r,
+            Err(e) => {
+                warn!(
+                    "face_watch: person_reference_embedding failed for person {}: {:?}",
+                    person_id, e
+                );
+                return;
+            }
+        }
+    };
+
+    // 5. Bind each new face that meets the criterion. Hold the lock once
+    // for the whole batch; assign_face_to_person uses its own short
+    // transaction internally.
+    let mut fd = face_dao.lock().expect("face dao");
+    for (face_id, emb) in new_faces {
+        let bind = match &reference {
+            None => {
+                // Person has no faces yet — first one wins so bootstrap
+                // can ever produce a usable reference. After this row
+                // commits, future faces evaluate against it.
+                debug!(
+                    "face_watch: auto-binding first face {} → person {} (no reference yet)",
+                    face_id, person_id
+                );
+                true
+            }
+            Some(ref_vec) => {
+                let sim = faces::cosine_similarity(&emb, ref_vec);
+                if sim >= threshold {
+                    debug!(
+                        "face_watch: auto-binding face {} → person {} (cos={:.3} ≥ {:.3})",
+                        face_id, person_id, sim, threshold
+                    );
+                    true
+                } else {
+                    debug!(
+                        "face_watch: leaving face {} unassigned (cos={:.3} < {:.3} for person {})",
+                        face_id, sim, threshold, person_id
+                    );
+                    false
+                }
+            }
+        };
+        if bind && let Err(e) = fd.assign_face_to_person(ctx, face_id, person_id) {
+            warn!(
+                "face_watch: assign_face_to_person failed (face={}, person={}): {:?}",
+                face_id, person_id, e
+            );
+        }
+    }
+}
+
+/// Drop candidates whose path matches the watcher's `EXCLUDED_DIRS` rules.
+/// Pulled out for unit testing — the same `PathExcluder` /memories uses,
+/// just applied at the face-detect candidate set instead of the memories
+/// listing. Skip @eaDir / .thumbnails / user-defined paths before we burn
+/// a detect call (and Apollo's GPU memory) on junk. Also drops anything
+/// that isn't an image file — the backlog drain pulls every hashed row in
+/// `image_exif`, which includes videos; sending those to Apollo just
+/// produces `failed` markers and inflates the FAILED stat.
+pub(crate) fn filter_excluded(
+    base: &Path,
+    excluded_dirs: &[String],
+    candidates: Vec<FaceCandidate>,
+    library_name: Option<&str>,
+) -> Vec<FaceCandidate> {
+    let excluder = if excluded_dirs.is_empty() {
+        None
+    } else {
+        Some(PathExcluder::new(base, excluded_dirs))
+    };
+    candidates
+        .into_iter()
+        .filter(|c| {
+            let abs = base.join(&c.rel_path);
+            if !file_types::is_image_file(&abs) {
+                debug!(
+                    "face_watch: skipping non-image path {} (library {})",
+                    c.rel_path,
+                    library_name.unwrap_or("<unknown>")
+                );
+                return false;
+            }
+            if let Some(ex) = excluder.as_ref()
+                && ex.is_excluded(&abs)
+            {
+                debug!(
+                    "face_watch: skipping excluded path {} (library {})",
+                    c.rel_path,
+                    library_name.unwrap_or("<unknown>")
+                );
+                return false;
+            }
+            true
+        })
+        .collect()
+}
+
+/// Read image bytes for face detection. Insightface (via opencv) can't
+/// decode RAW or HEIC — for those we extract the embedded JPEG preview
+/// the way the thumbnail pipeline does. Plain JPEG/PNG/WebP/etc. go
+/// through a direct read.
+pub(crate) fn read_image_bytes_for_detect(path: &Path) -> std::io::Result<Vec<u8>> {
+    if file_types::needs_ffmpeg_thumbnail(path)
+        && let Some(preview) = exif::extract_embedded_jpeg_preview(path)
+    {
+        return Ok(preview);
+    }
+    // Plain read for everything else. RAW/HEIC files without an embedded
+    // preview fall through here too; Apollo will then 422 and the caller
+    // marks the row failed. That's fine; we tried.
+    std::fs::read(path)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+
+    fn cand(rel_path: &str) -> FaceCandidate {
+        FaceCandidate {
+            rel_path: rel_path.to_string(),
+            content_hash: format!("hash-{rel_path}"),
+        }
+    }
+
+    #[test]
+    fn filter_excluded_pattern_drops_dir_components() {
+        // A pattern matches a path *component* under base, not a substring.
+        // Phase 3 needs this for @eaDir / .thumbnails skipping.
+        let tmp = tempfile::tempdir().unwrap();
+        let base = tmp.path();
+        let candidates = vec![
+            cand("photos/a.jpg"),                  // keep
+            cand("photos/@eaDir/SYNOPHOTO_THUMB"), // drop (component match)
+            cand("photos/eaDir-not-a-thing.jpg"),  // keep (substring, not component)
+        ];
+        let kept = filter_excluded(base, &["@eaDir".to_string()], candidates, Some("test"));
+        let kept_paths: Vec<_> = kept.iter().map(|c| c.rel_path.as_str()).collect();
+        assert_eq!(
+            kept_paths,
+            vec!["photos/a.jpg", "photos/eaDir-not-a-thing.jpg"]
+        );
+    }
+
+    #[test]
+    fn filter_excluded_absolute_dir_drops_subtree() {
+        // Absolute (under-base) entries drop the whole subtree.
+        let tmp = tempfile::tempdir().unwrap();
+        let base = tmp.path();
+        let candidates = vec![
+            cand("public/a.jpg"),
+            cand("private/a.jpg"),
+            cand("private/sub/b.jpg"),
+        ];
+        let kept = filter_excluded(base, &["/private".to_string()], candidates, None);
+        let kept_paths: Vec<_> = kept.iter().map(|c| c.rel_path.as_str()).collect();
+        assert_eq!(kept_paths, vec!["public/a.jpg"]);
+    }
+
+    #[test]
+    fn filter_excluded_empty_rules_passes_all() {
+        // EXCLUDED_DIRS unset still lets every image through — only the
+        // PathExcluder is skipped, the image-extension gate still runs.
+        let tmp = tempfile::tempdir().unwrap();
+        let base = tmp.path();
+        let candidates = vec![cand("a.jpg"), cand("b.jpg")];
+        let kept = filter_excluded(base, &[], candidates, None);
+        assert_eq!(kept.len(), 2);
+    }
+
+    #[test]
+    fn filter_excluded_drops_videos_and_non_media() {
+        // Backlog drain pulls every hashed row in image_exif (videos
+        // included). Videos must never reach Apollo — opencv can't
+        // decode them, every call would 422 and write a `failed` marker.
+        let tmp = tempfile::tempdir().unwrap();
+        let base = tmp.path();
+        let candidates = vec![
+            cand("photos/a.jpg"),
+            cand("photos/clip.mp4"),
+            cand("photos/clip.MOV"),
+            cand("photos/notes.txt"),
+            cand("photos/b.heic"),
+        ];
+        let kept = filter_excluded(base, &[], candidates, Some("test"));
+        let kept_paths: Vec<_> = kept.iter().map(|c| c.rel_path.as_str()).collect();
+        assert_eq!(kept_paths, vec!["photos/a.jpg", "photos/b.heic"]);
+    }
+
+    #[test]
+    fn read_bytes_passes_through_for_jpeg() {
+        // JPEG goes through plain read — we DON'T want to lose orientation
+        // metadata or re-encode here; insightface's exif_transpose handles
+        // orientation on its end.
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("test.jpg");
+        let mut buf = Vec::new();
+        // Tiny 4x4 grey JPEG — encoded by image crate so we know it round-trips.
+        let img = image::DynamicImage::ImageRgb8(image::RgbImage::from_pixel(
+            4,
+            4,
+            image::Rgb([128, 128, 128]),
+        ));
+        img.write_to(
+            &mut std::io::Cursor::new(&mut buf),
+            image::ImageFormat::Jpeg,
+        )
+        .unwrap();
+        fs::write(&path, &buf).unwrap();
+
+        let read = read_image_bytes_for_detect(&path).expect("read jpeg");
+        assert_eq!(read, buf, "JPEG bytes must pass through verbatim");
+    }
+
+    #[test]
+    fn read_bytes_falls_back_when_raw_has_no_preview() {
+        // A `.nef` file with non-RAW bytes won't have an embedded preview —
+        // the helper falls through to plain read rather than refusing. This
+        // matches the docstring contract; Apollo will then 422 and we'll
+        // mark the row as failed.
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("not_really.nef");
+        fs::write(&path, b"definitely-not-a-raw-file").unwrap();
+
+        let read = read_image_bytes_for_detect(&path).expect("fallback read");
+        assert_eq!(read, b"definitely-not-a-raw-file");
+    }
+}
@@ -0,0 +1,235 @@
+//! File enumeration for the indexer pass.
+//!
+//! Walks a library root and returns the `(absolute_path, forward_slash_rel_path)`
+//! pairs that belong in `image_exif`. Pruning `EXCLUDED_DIRS` happens here at
+//! WalkDir time via `filter_entry` so whole subtrees (Synology's `@eaDir`,
+//! `.thumbnails`, the operator's configured excludes) are never descended —
+//! vs walking the full tree and discarding leaves, which on a Synology mount
+//! with thousands of `@eaDir` subdirs is the difference between scanning N
+//! files and N×3.
+//!
+//! Previously inlined in `main.rs::process_new_files` without the exclusion
+//! filter — paths like `<lib>/@eaDir/.../SYNOFILE_THUMB_*.jpg` ended up in
+//! `image_exif` and looped through `face_watch::filter_excluded` every tick,
+//! since no `face_detections` row would ever be written for a path dropped
+//! at runtime.
+
+use std::path::{Path, PathBuf};
+use std::time::SystemTime;
+
+use walkdir::{DirEntry, WalkDir};
+
+use crate::file_types;
+use crate::memories::PathExcluder;
+
+/// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return every file
+/// entry (any extension). The shared primitive for any code that walks a
+/// library root — thumbnail generation, media counts, orphan-playlist
+/// reverse lookups, the indexer happy-path, etc. Higher-level helpers
+/// (e.g. `enumerate_indexable_files`) layer media-type / mtime filters
+/// on top.
+///
+/// Pruning happens via `filter_entry` so excluded subtrees are never
+/// descended at all. On a Synology mount with thousands of `@eaDir`
+/// dirs, that's the difference between visiting N files and ~3N.
+pub fn walk_library_files(base_path: &Path, excluded_dirs: &[String]) -> Vec<DirEntry> {
+    let excluder = PathExcluder::new(base_path, excluded_dirs);
+    WalkDir::new(base_path)
+        .into_iter()
+        // Always allow depth 0 (the root). Under a pathological config
+        // that excludes the base itself, downstream filters drop everything
+        // anyway — but yielding nothing here would also be silently wrong.
+        .filter_entry(move |entry| entry.depth() == 0 || !excluder.is_excluded(entry.path()))
+        .filter_map(|entry| entry.ok())
+        .filter(|entry| entry.file_type().is_file())
+        .collect()
+}
+
+/// Walk `base_path`, prune `EXCLUDED_DIRS` subtrees, and return
+/// `(absolute_path, forward_slash_rel_path)` for every image / video file
+/// that should be indexed.
+///
+/// `modified_since` keeps only files modified at or after the instant —
+/// used by the watcher's quick-scan tick to skip the long tail. Files
+/// whose metadata can't be read are kept; the caller's batch EXIF lookup
+/// dedups against existing rows.
+pub fn enumerate_indexable_files(
+    base_path: &Path,
+    excluded_dirs: &[String],
+    modified_since: Option<SystemTime>,
+) -> Vec<(PathBuf, String)> {
+    walk_library_files(base_path, excluded_dirs)
+        .into_iter()
+        .filter(|entry| match modified_since {
+            Some(since) => entry
+                .metadata()
+                .ok()
+                .and_then(|m| m.modified().ok())
+                .map(|m| m >= since)
+                .unwrap_or(true),
+            None => true,
+        })
+        .filter(|entry| {
+            file_types::direntry_is_image(entry) || file_types::direntry_is_video(entry)
+        })
+        .filter_map(|entry| {
+            let file_path = entry.path().to_path_buf();
+            // Forward-slash rel_path regardless of OS so DB comparisons
+            // against the batch EXIF lookup line up.
+            let rel = file_path
+                .strip_prefix(base_path)
+                .ok()?
+                .to_str()?
+                .replace('\\', "/");
+            Some((file_path, rel))
+        })
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+    use std::time::Duration;
+
+    /// Build a tempdir with `paths` (relative). Each touched file is empty;
+    /// directory components are created automatically.
+    fn make_tree(paths: &[&str]) -> tempfile::TempDir {
+        let dir = tempfile::tempdir().expect("tempdir");
+        for p in paths {
+            let abs = dir.path().join(p);
+            if let Some(parent) = abs.parent() {
+                fs::create_dir_all(parent).expect("mkdir -p");
+            }
+            fs::File::create(&abs).expect("touch");
+        }
+        dir
+    }
+
+    fn rel_paths(found: &[(PathBuf, String)]) -> Vec<String> {
+        let mut v: Vec<String> = found.iter().map(|(_, r)| r.clone()).collect();
+        v.sort();
+        v
+    }
+
+    #[test]
+    fn excludes_eadir_subtree() {
+        // The bug: Synology's @eaDir gets walked into and its
+        // SYNOFILE_THUMB_*.jpg leaves end up in image_exif. With
+        // filter_entry pruning, the subtree is never descended.
+        let dir = make_tree(&[
+            "vacation/IMG_0001.jpg",
+            "vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_S.jpg",
+            "vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_XL.jpg",
+            "@eaDir/top_level_thumb.jpg",
+        ]);
+        let found = enumerate_indexable_files(dir.path(), &["@eaDir".to_string()], None);
+        assert_eq!(rel_paths(&found), vec!["vacation/IMG_0001.jpg".to_string()]);
+    }
+
+    #[test]
+    fn excludes_nested_pattern() {
+        // .thumbnails as a component pattern (not an absolute dir).
+        let dir = make_tree(&[
+            "a/b/photo.jpg",
+            "a/.thumbnails/cached.jpg",
+            "a/b/.thumbnails/nested.jpg",
+        ]);
+        let found = enumerate_indexable_files(dir.path(), &[".thumbnails".to_string()], None);
+        assert_eq!(rel_paths(&found), vec!["a/b/photo.jpg".to_string()]);
+    }
+
+    #[test]
+    fn excludes_absolute_under_base() {
+        // Leading-'/' entries are interpreted as paths under the library
+        // root (see PathExcluder::new).
+        let dir = make_tree(&["private/secret.jpg", "public/keep.jpg"]);
+        let found = enumerate_indexable_files(dir.path(), &["/private".to_string()], None);
+        assert_eq!(rel_paths(&found), vec!["public/keep.jpg".to_string()]);
+    }
+
+    #[test]
+    fn filters_non_media() {
+        let dir = make_tree(&[
+            "a.jpg",
+            "b.mp4",
+            "c.txt",
+            "d",         // no extension
+            "e.jpg.bak", // wrong ext
+        ]);
+        let found = enumerate_indexable_files(dir.path(), &[], None);
+        assert_eq!(
+            rel_paths(&found),
+            vec!["a.jpg".to_string(), "b.mp4".to_string()]
+        );
+    }
+
+    #[test]
+    fn modified_since_filters_old_files() {
+        let dir = make_tree(&["old.jpg", "new.jpg"]);
+        // Backdate "old.jpg" to a known instant. Use filetime via a portable
+        // touch: set both atime and mtime to a fixed past time using
+        // std::fs::File metadata — simpler to set the cutoff into the future
+        // for "old" and the present for "new" semantically.
+        //
+        // Simplest reliable approach: capture mtime of new.jpg, sleep
+        // briefly, recreate it, and use the original mtime as the cutoff.
+        // That way "old.jpg" is older than the cutoff and "new.jpg" is at
+        // or after.
+        let new_path = dir.path().join("new.jpg");
+        // Force a measurable gap so filesystems with low-resolution mtime
+        // don't collapse them into the same instant.
+        std::thread::sleep(Duration::from_millis(20));
+        let cutoff = SystemTime::now();
+        std::thread::sleep(Duration::from_millis(20));
+        // Bump new.jpg's mtime by rewriting it.
+        fs::write(&new_path, b"x").expect("rewrite");
+
+        let found = enumerate_indexable_files(dir.path(), &[], Some(cutoff));
+        assert_eq!(rel_paths(&found), vec!["new.jpg".to_string()]);
+    }
+
+    #[test]
+    fn walk_library_files_excludes_subtrees_and_returns_all_extensions() {
+        // The lower-level primitive: any extension survives, but excluded
+        // subtrees are pruned. Used by thumbnail gen and media-count
+        // gauges, which need non-media files too (e.g., walks through
+        // sidecar XMPs alongside the photos).
+        let dir = make_tree(&[
+            "vacation/IMG_0001.jpg",
+            "vacation/IMG_0001.xmp",
+            "vacation/@eaDir/IMG_0001.jpg/SYNOFILE_THUMB_S.jpg",
+            "notes.txt",
+        ]);
+        let mut got: Vec<String> = walk_library_files(dir.path(), &["@eaDir".to_string()])
+            .into_iter()
+            .map(|e| {
+                e.path()
+                    .strip_prefix(dir.path())
+                    .unwrap()
+                    .to_string_lossy()
+                    .replace('\\', "/")
+            })
+            .collect();
+        got.sort();
+        assert_eq!(
+            got,
+            vec![
+                "notes.txt".to_string(),
+                "vacation/IMG_0001.jpg".to_string(),
+                "vacation/IMG_0001.xmp".to_string(),
+            ]
+        );
+    }
+
+    #[test]
+    fn rel_path_is_forward_slash() {
+        // Sanity on a nested path. On Unix this is already '/'; the
+        // assertion guards a future Windows port from regressing.
+        let dir = make_tree(&["a/b/c.jpg"]);
+        let found = enumerate_indexable_files(dir.path(), &[], None);
+        let (_abs, rel) = &found[0];
+        assert_eq!(rel, "a/b/c.jpg");
+        assert!(!rel.contains('\\'));
+    }
+}
@@ -3,9 +3,22 @@ use walkdir::DirEntry;

 /// Supported image file extensions
 pub const IMAGE_EXTENSIONS: &[&str] = &[
-    "jpg", "jpeg", "png", "webp", "tiff", "tif", "heif", "heic", "avif", "nef",
+    "jpg", "jpeg", "png", "webp", "tiff", "tif", "heif", "heic", "avif", "nef", "arw",
 ];

+/// Extensions the `image` crate cannot decode — we fall back to ffmpeg to
+/// extract an embedded preview or decode the frame.
+pub const FFMPEG_THUMBNAIL_EXTENSIONS: &[&str] = &["heif", "heic", "nef", "arw"];
+
+/// Returns true if thumbnail generation should go through ffmpeg instead of
+/// the `image` crate (RAW formats, HEIF/HEIC).
+pub fn needs_ffmpeg_thumbnail(path: &Path) -> bool {
+    match path.extension().and_then(|e| e.to_str()) {
+        Some(ext) => FFMPEG_THUMBNAIL_EXTENSIONS.contains(&ext.to_lowercase().as_str()),
+        None => false,
+    }
+}
+
 /// Supported video file extensions
 pub const VIDEO_EXTENSIONS: &[&str] = &["mp4", "mov", "avi", "mkv"];

--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`ALTER TABLE photo_insights ADD COLUMN backend TEXT NOT NULL DEFAULT 'local';`
				`@@ -0,0 +1 @@`
				`ALTER TABLE photo_insights ADD COLUMN fewshot_source_ids TEXT;`
				`@@ -0,0 +1 @@`
				`ALTER TABLE personas DROP COLUMN reviewed_only_facts;`