AI: add enable_thinking reasoning toggle plumbed to llama.cpp

New optional SamplingOverride forwarded to llama-server as chat_template_kwargs.enable_thinking (gates Qwen3-style reasoning blocks). None leaves the template default; other backends ignore it. Wired through the agentic-insight and chat-turn request bodies/handlers. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Unified search: use ANY-mode tag matching, not ALL
2026-06-17 18:14:44 -04:00 · 2026-06-14 02:25:24 -04:00 · 2026-06-14 02:20:06 -04:00 · 2026-06-14 02:02:57 -04:00 · 2026-06-14 01:58:48 -04:00 · 2026-06-14 01:29:21 -04:00
137 changed files with 38443 additions and 6221 deletions
@@ -0,0 +1,3 @@
 [target.x86_64-unknown-linux-gnu]
 linker = "/usr/bin/gcc"
 rustflags = ["-C", "link-arg=-fuse-ld=mold"]
@@ -53,11 +53,60 @@ AGENTIC_CHAT_MAX_ITERATIONS=6
 # OPENROUTER_HTTP_REFERER=https://your-site.example
 # OPENROUTER_APP_TITLE=ImageApi
 # ── AI Insights — local backend switch ──────────────────────────────────
 # Picks which local LLM stack the server uses for chat, vision describe,
 # and embeddings. `ollama` (default) uses the OLLAMA_* settings above;
 # `llamacpp` uses the LLAMA_SWAP_* settings below. The switch is global
 # and applies to both `backend=local` and `backend=hybrid` (hybrid keeps
 # chat on OpenRouter but still uses this stack for the describe pass).
 # Don't flip mid-deploy without re-embedding existing index rows —
 # mixed vector spaces break similarity search.
 # LLM_BACKEND=ollama
 # ── AI Insights — llama.cpp / llama-swap (optional) ─────────────────────
 # Set LLAMA_SWAP_URL plus LLM_BACKEND=llamacpp to swap the local stack
 # off Ollama. Talks OpenAI-compatible /v1 to a llama-swap proxy fronting
 # per-slot llama-server instances. Chat models receive images directly
 # via content-parts (vision-capable models assumed); a separate vision
 # slot is used only by the describe_photo tool and describe-image utility.
 # LLAMA_SWAP_URL=http://localhost:9292/v1
 # LLAMA_SWAP_PRIMARY_MODEL=chat
 # Optional dedicated vision slot for describe_image. Defaults to
 # PRIMARY_MODEL so describe_photo works without extra config.
 # LLAMA_SWAP_VISION_MODEL=vision
 # LLAMA_SWAP_EMBEDDING_MODEL=embed
 # Comma-separated allowlist surfaced by /insights/models when
 # LLM_BACKEND=llamacpp. All report has_vision=true.
 # LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed
 # LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180
 # ── Unified search translation model (optional) ─────────────────────────
 # /photos/search/unified runs one small LLM call to translate a natural-
 # language query into structured filters + a semantic term, then CLIP-ranks.
 # That step needs an LLM AND CLIP available at once. On a tight VRAM budget a
 # large chat model can't co-reside with CLIP, so pin a small, fast model here
 # (it can stay loaded alongside CLIP and the chat model). Precedence:
 # UNIFIED_SEARCH_MODEL > the client's selected model > the configured default.
 # Use the configured backend (LLM_BACKEND); local only — no hybrid.
 # UNIFIED_SEARCH_MODEL=qwen3-0.6b
 # ── Text-to-speech (optional, requires LLAMA_SWAP_URL) ───────────────────
 # TTS routes through the same llama-swap proxy (a Chatterbox model id), so it
 # only needs LLAMA_SWAP_URL — it does NOT require LLM_BACKEND=llamacpp.
 # Powers POST /tts/speech and the /tts/voices* endpoints (read-aloud insights
 # + voice cloning in the mobile app).
 # LLAMA_SWAP_TTS_MODEL=chatterbox        # TTS model id in config.yaml
 # LLAMA_SWAP_TTS_VOICE=m                 # default voice when a request omits one
 # LLAMA_SWAP_TTS_REF_SECONDS=30          # max voice-clone reference clip length (s)
 # LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS=600   # synth timeout (long chunked text)
 # ── AI Insights — sibling services (optional) ───────────────────────────
-# Apollo (places + face inference). Single Apollo deploys typically set
+# Apollo (places, face inference, CLIP encoders). Single-Apollo deploys
-# only APOLLO_API_BASE_URL and let the face client fall back to it.
+# typically set only APOLLO_API_BASE_URL and let the face + CLIP
 # clients fall back to it.
 # APOLLO_API_BASE_URL=http://apollo.lan:8000
 # APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000
 # APOLLO_CLIP_API_BASE_URL=http://apollo.lan:8000
 # SMS_API_URL=http://localhost:8000
 # SMS_API_TOKEN=
@@ -80,6 +129,51 @@ FACE_DETECT_TIMEOUT_SEC=60
 FACE_BACKLOG_MAX_PER_TICK=64
 FACE_HASH_BACKFILL_MAX_PER_TICK=2000
 # ── CLIP semantic photo search ──────────────────────────────────────────
 # ImageApi calls Apollo's /api/internal/clip/{encode_image,encode_text}
 # to populate per-photo embeddings during the watcher's backlog drain
 # and to encode user queries at /photos/search time. Disabled when
 # neither APOLLO_CLIP_API_BASE_URL nor APOLLO_API_BASE_URL is set.
 #
 # Per-watcher-tick cap on the encode drain. Default 32 ≈ ~1 photo/sec
 # on CPU, ~30 photos/sec on a single-GPU host (Apollo's threadpool
 # is 1 on CUDA, so concurrency is bounded server-side regardless of
 # our setting). Bump on a fresh deploy to clear the backlog faster.
 CLIP_BACKLOG_MAX_PER_TICK=32
 # Client-side parallel encode calls per drain pass. Apollo's GPU pool
 # serializes server-side; this just overlaps file-IO with inference.
 CLIP_ENCODE_CONCURRENCY=4
 # Per-encode HTTP timeout. CPU-only Apollo deploys may need higher.
 CLIP_REQUEST_TIMEOUT_SEC=60
 # ── RAG / search ────────────────────────────────────────────────────────
 # Set to `1` to enable cross-encoder reranking on /search results.
 SEARCH_RAG_RERANK=0
 # ── Nightly reel pre-generation (Phase 3+) ──────────────────────────────
 # Set to `1` to enable the scheduler. Disabled by default.
 # REEL_PREGEN_ENABLED=1
 # Hour (0-23) when the nightly batch fires. Default 3 AM.
 # REEL_PREGEN_HOUR=3
 # Day of week for weekly reels (0=Sun, 1=Mon, …). Default Monday.
 # REEL_PREGEN_WEEK_DOW=1
 # Timezone offset in minutes from UTC (e.g., -480 = PST). Defaults to
 # the server's local timezone.
 # REEL_PREGEN_TZ_OFFSET_MINUTES=
 # Fixed timezone offset — overrides auto-detect to avoid DST shifts.
 # When set, both the DB fallback and env fallback use this value.
 # REEL_PREGEN_TZ_FIXED_MINUTES=-480
 # Voice ID for narration (e.g., "grandma"). Falls back to the value
 # stored in the user_ai_prefs DB row when set.
 # REEL_PREGEN_VOICE=
 # Library filter: a library id (e.g. "1") or "all" for every library.
 # REEL_PREGEN_LIBRARY=all
 # Max agentic tool iterations for pre-gen scripter. Default 8.
 # REEL_PREGEN_MAX_TOOL_ITERS=8
 #
 # On-disk reel cache sweep (runs every 24h, independent of pre-gen). Removes
 # reel MP4s with no ledger row + no live job that are older than the max age —
 # i.e. the on-demand cache, which otherwise grows forever. Set to 0 to disable.
 # REEL_CACHE_SWEEP_ENABLED=1
 # Age (days) before an unreferenced reel MP4 is swept. Default 7.
 # REEL_CACHE_MAX_AGE_DAYS=7
@@ -0,0 +1,9 @@
 # Normalize line endings in the repo to LF. Windows checkouts can still
 # present working-copy files as CRLF; this just keeps the committed history
 # stable so contributors on any OS don't see whitespace-only diffs every
 # time someone touches a file.
 * text=auto eol=lf
 # Migrations and SQL must be LF — SQLite parsers don't care, but diffing
 # is much cleaner with stable endings.
 *.sql text eol=lf
@@ -2,8 +2,14 @@
 database/target
 *.db
 *.db.bak
 *.db-shm
 *.db-wal
 .env
 # Server-local TTS pronunciation overrides (tts_pronunciations.example.json is the template)
 /tts_pronunciations.json
 /tmp
 /docs
 /specs
 # Default ignored files
 .idea/shelf/
@@ -76,7 +76,10 @@ cargo run --bin cleanup_files -- --base-path /path/to/media --database-url ./dat
 ### Core Components
 **Layered Architecture:**
- **HTTP Layer** (`main.rs`): Route handlers for images, videos, metadata, tags, favorites, memories
+- **Startup wiring** (`main.rs`): only ~350 lines — env load, migrations, AppState, route registration, server bind. Background jobs are kicked off here but defined elsewhere.
 - **HTTP Layer** (`handlers/{image,video,favorites}.rs`, `files.rs`, `tags.rs`, `faces.rs`, `memories.rs`, `ai/handlers.rs`): the route handlers, grouped by domain.
 - **Background loops** (`watcher.rs`): the file-watcher tick (`watch_files`, `process_new_files`) and the orphaned-playlist cleanup (`cleanup_orphaned_playlists`). Per-tick drains are factored into `backfill.rs` (`backfill_unhashed_backlog`, `backfill_missing_date_taken`, `backfill_missing_content_hashes`, `process_face_backlog`, `build_face_candidates`).
 - **Thumbnails** (`thumbnails.rs`): generation pipeline + the `IMAGE_GAUGE` / `VIDEO_GAUGE` Prometheus metrics.
 - **Auth Layer** (`auth.rs`): JWT token validation, Claims extraction via FromRequest trait
 - **Service Layer** (`files.rs`, `exif.rs`, `memories.rs`): Business logic for file operations and EXIF extraction
 - **DAO Layer** (`database/mod.rs`): Trait-based data access (ExifDao, UserDao, FavoriteDao, TagDao)
@@ -104,6 +107,242 @@ All database access goes through trait-based DAOs (e.g., `ExifDao`, `SqliteExifD
 - `query_by_exif()`: Complex filtering by camera, GPS bounds, date ranges
 - Batch operations minimize DB hits during file watching
 ### Multi-library data model
 ImageApi supports more than one library (a library = a `(name, root_path)`
 row in the `libraries` table that maps to a mounted directory tree). The
 same bytes may exist under more than one library — typical case is an
 "active" library plus an "archive" library that ingests files as they age
 out — and the data model is designed so that derived data follows the
 **bytes**, not the path, while user-managed data does the same.
 **The principle.** A photo's identity is its `content_hash` (blake3, see
 `src/content_hash.rs`). Anything we compute from or attach to a photo is
 keyed on that hash so it survives:
 - the same file appearing in a second library (backup / archive / mirror),
 - the file moving between libraries (recent → archive handoff),
 - the file moving within a library (re-organized rel_path),
 - intra-library duplicates (same bytes at two paths).
 **Table classification.** Three categories drive the keying decision:
 | Category | Key | Rationale | Tables |
 |---|---|---|---|
 | Intrinsic to bytes | `content_hash` | Rerunning is wasted work (or LLM cost) | `face_detections` ✓, `image_exif` (target), `photo_insights` (target), `video_preview_clips` (target) |
 | User intent about a photo | `content_hash` | "Tag this photo" means the bytes, not a path | `tagged_photo` (target), `favorites` (target) |
 | Library administrative | `(library_id, rel_path)` | Tied to a specific filesystem location | `libraries`, `entity_photo_links`, the `rel_path` back-ref columns on hash-keyed tables |
 ✓ = already implemented this way. *(target)* = today still keyed on
 `(library_id, rel_path)` and slated for migration. The migration adds a
 nullable `content_hash` column, populates it from `image_exif` where
 known, and read paths fall back to rel_path while the hash is null.
 **Carrying a `rel_path` even when hash-keyed.** Hash-keyed tables retain
 `(library_id, rel_path)` columns as a denormalized **back-reference**, not
 as the key. This lets a single query answer "what is at this path right
 now" without joining through `image_exif`, and supports the path-only
 endpoints that predate the hash. `face_detections` is the reference
 implementation: hash is the truth, path is a hint.
 **Merge semantics on read.** When the same hash has rows under more than
 one library:
 - Set-valued data (tags, favorites, faces, entity links) → **union**.
 - Scalar data (current insight, EXIF row, video preview clip) → earliest
  `generated_at` / `created_time` wins. The historical lib1 row beats a
  re-generated lib2 row, so the user's curated insight isn't shadowed by
  a re-run on archive ingest.
 **Write attribution.** A new tag/favorite/insight created while viewing
 under lib2 binds to the bytes, not to lib2 — so it shows up under lib1
 too. This is by design, but it's the most surprising rule on first
 encounter; clients should not assume tags are library-scoped.
 **Hash-less rows (transitional state).** During and immediately after a
 new mount, `image_exif.content_hash` is being populated by
 `backfill_unhashed_backlog` (capped per tick). Rules during this window:
 - Writes: if the hash is known, write hash-keyed. If not, write
  `(library_id, rel_path)`-keyed and let the reconciliation job collapse
  duplicates once the hash lands.
 - Reads: prefer hash key, fall back to `(library_id, rel_path)`.
 - Reconciliation: a one-shot pass after every backfill tick collapses
  rows that now share a hash, applying the merge semantics above.
  Idempotent — safe to re-run.
 **Library handoff (recent → archive).** When a file moves between
 libraries (e.g. operator moves `~/photos/2024/IMG.nef` to the archive
 mount), the file watcher sees the disappearance under lib1 and the
 appearance under lib2. Hash-keyed rows don't need migration; the
 `(library_id, rel_path)` back-ref columns are updated to point to the new
 location. Library administrative rows (`entity_photo_links`,
 `(library_id, rel_path)` rows in `image_exif` for hash-less items) are
 re-keyed by the move detector, which matches a disappearance to an
 appearance by `content_hash` within a configurable window.
 **Orphans (source deleted while a copy survives).** When the only
 `image_exif` row for a hash is deleted (file removed from disk), the
 hash-keyed derived rows survive **as long as another `image_exif` row
 references the same hash**. If the last reference is gone, derived rows
 are eligible for GC (deferred — the GC job runs on a slow schedule so
 that a brief unmount or rename doesn't wipe history).
 **Stats and counts.** When reporting "how many photos do you have," count
 `DISTINCT content_hash` over `image_exif`, not row count. Faces stats
 already does this (`FaceDao::stats` in `src/faces.rs`); other counters
 should follow suit. Numerator and denominator must live in the same
 domain — see the face-stats commentary below for the cautionary tale.
 **Per-library scoping when the user asks for it.** A request scoped to
 `?library=N` filters the `image_exif` view to that library, and the
 hash-keyed derived data is joined through that view. The user sees only
 photos that have a copy under lib N, but the derived data attached to
 those photos is the merged hash-keyed view. This is the answer to "show
 me archive photos with their original tags."
 **Operator kill switch (`libraries.enabled`).** Setting `enabled=0` on a
 library is a hard pause: the watcher skips it entirely — before the
 probe, before ingest, before any maintenance pass — and the orphan-GC
 all-online consensus check filters disabled libraries out (they don't
 keep the GC window closed). Reads / serving are unaffected; nothing
 prevents `/image?path=...` from resolving against a disabled library's
 root if the file is on disk. The existing `image_exif` rows for a
 disabled library are **not deleted** — they continue to anchor
 hash-keyed derived data, so cross-library duplicates survive the
 disable. Toggle via SQL; there is intentionally no HTTP endpoint for
 library mutation (single-user tool, no role / permission story).
 Typical workflows: stage a new mount with `enabled=0` then flip to `1`;
 quiet a flaky NAS during maintenance without disturbing the rest of
 the system.
 **Per-library excludes (`libraries.excluded_dirs`).** A
 comma-separated column, same shape as the global `EXCLUDED_DIRS` env
 var, that's applied **in union** with the env-var globals when a
 walker scans this library. Use case: mount a parent directory as a
 new library while a sibling library covers a child subtree, and
 exclude that child subtree from the parent so the two libraries
 don't double-walk and double-write `image_exif`. Two entry forms
 (parsed by `memories::PathExcluder`):
 - `/sub/path` — leading slash flags it as a path under the library
  root. Joins to root + matches by `path.starts_with(...)`. Works
  at any depth (`/photos`, `/media/2024/raw`).
 - `name` — no leading slash flags it as a component name to skip
  anywhere in the tree (`@eaDir`, `.thumbnails`). Single segment
  only — `media/photos/a` without a leading slash never matches
  anything. Hash-keyed derived
 data (faces, tags, insights) is unaffected either way — those
 follow the bytes — but `image_exif` row count, walker CPU, and
 thumbnail disk usage all drop to 1× instead of 2× for the overlap.
 Affects: file-watch ingest (`process_new_files`), thumbnail
 generation, media-count gauges, the orphaned-playlist cleanup walk,
 and the `/memories` endpoint. The face-detection backlog drain
 inherits via `face_watch::filter_excluded`. NULL = no extras (only
 the global env var applies).
 **Library availability and safety.** Libraries can be on network shares
 or removable media; the file watcher must not interpret a temporary
 unavailability as a mass-deletion event. Every tick begins with a
 **presence probe** per library: the library is considered online iff
 its `root_path` exists, is readable, and a top-level scan returns at
 least one expected entry (or matches a recent file-count high-water
 mark within a tolerance). The probe result gates which actions are safe
 to run on that library this tick:
 | Action | Requires online? |
 |---|---|
 | Quick / full scan ingest of new files | yes |
 | EXIF / face / insight backlog drains | yes — but the work runs against any online library |
 | Move-handoff detection (lib1 disappearance ↔ lib2 appearance match) | **both** libraries online |
 | `(library_id, rel_path)` re-keying on detected move | **both** libraries online |
 | Orphan GC of hash-keyed derived data | all libraries that have *ever* held the hash must be online and confirmed-clean for two consecutive ticks |
 | Reads / serving | always allowed; falls back to whichever library is online |
 A library that fails the probe enters a "stale" state: writes scoped to
 it are paused, its rows are flagged stale (not deleted) in
 `/libraries` status, and the watcher logs at `warn` once per
 state-transition (not per tick). A library that recovers re-enters the
 online set automatically; no operator action required for transient
 outages. The intent is that pulling a USB drive, rebooting a NAS, or
 losing a VPN never triggers a destructive code path — the worst case is
 that derived-data work pauses until the share returns.
 The same rule constrains the move-handoff matcher: a disappearance
 under lib1 only counts as a "move" if there is a matching appearance
 under another **online** library within the window. A bare
 disappearance with no matching appearance is treated as
 "unavailable-or-deleted, defer judgment" — it does not re-key any rows
 and does not enqueue GC.
 **Maintenance pipeline (`src/library_maintenance.rs`).** The watcher
 runs three maintenance passes per tick that together implement the
 move/handoff and orphan rules:
 1. **Missing-file scan** — per online library, paginated. A page of
   `image_exif` rows is loaded (`IMAGE_EXIF_MISSING_SCAN_PAGE_SIZE`,
   default 500), each row's `(root_path/rel_path)` is `stat()`-ed,
   and confirmed-not-found rows are deleted from `image_exif`
   (capped at `IMAGE_EXIF_MISSING_DELETE_CAP_PER_TICK`, default 200).
   Permission/IO errors are skipped, never deleted — only `NotFound`
   triggers a deletion. The cursor wraps every time a partial page
   comes back, so the whole library is swept across consecutive ticks.
   Skipped wholesale for Stale libraries via the per-library probe
   gate at the top of the loop iteration.
 2. **Back-ref refresh** — DB-only. For `face_detections`,
   `tagged_photo`, and `photo_insights`: any hash-keyed row whose
   `(library_id, rel_path)` no longer matches an `image_exif` row
   *but whose `content_hash` does* is repointed at the surviving
   `image_exif` location. Idempotent SQL; no health gate needed.
   This is what makes the recent → archive handoff invisible to
   read paths: when the missing-file scan retires the lib-A row,
   tags/faces/insights pivot to lib-B's path before any user
   notices.
 3. **Orphan GC** — destructive. Hash-keyed derived rows whose
   `content_hash` no longer has any `image_exif` row are eligible.
   Two-tick consensus: a hash must be observed orphaned on two
   consecutive ticks AND every library must be online for both. A
   single Stale tick within the window cancels all pending deletes.
   The pending set is held in memory (`OrphanGcState`) — restart
   resets it, which only delays a delete, never causes one. Tags,
   faces, and insights for orphaned hashes are deleted in one batch
   per tick.
 A backup library that briefly disappears, then returns within two
 ticks, never loses any derived data. A move from lib-A to lib-B
 without disappearance flips through pass 1 (lib-A row retired) and
 pass 2 (back-refs follow), with pass 3 noting nothing because the
 hash is still present in `image_exif` (lib-B's row).
 **Known gap: in-place content changes (future Branch D).** The
 maintenance pipeline assumes a `(library_id, rel_path)`'s bytes are
 stable for as long as the file exists at that path. If a user edits
 a file in place (crop, re-export) without renaming, the watcher's
 quick scan walks the file (mtime is recent) but `process_new_files`
 short-circuits because `(library_id, rel_path)` already has an
 `image_exif` row — no re-hash, no re-EXIF, no face redetection. The
 row's `content_hash` keeps pointing at the original bytes. Tags /
 faces / insights stay attached to the original hash and continue to
 display because the rel_path back-ref still resolves; new faces
 introduced by the edit are never detected.
 The right place to fix this is a **stale-content detection pass**
 that compares `image_exif.last_modified` / `size_bytes` to
 `fs::metadata` for rows the quick scan would otherwise skip. On
 mismatch, recompute the hash, update `image_exif`, and apply the
 "content branched" semantics:
 - **Faces** re-run (faces are fully derived from bytes).
 - **Tags** migrate to the new hash (user intent — "this photo is
  vacation" survives a crop). Insights migrate forward as a
  starting point and are flagged for re-generation.
 - **Favorites** (when migrated to hash-keyed) follow the path /
  user intent.
 The interesting case is the operator who keeps an unedited copy in
 the archive library and edits the local copy: post-detection, the
 archive copy stays on the original hash, the local copy branches to
 the new hash, and the two histories cleanly split. Apollo's
 `derived.db` cache will need an invalidation hook for the changed
 hash — design it alongside Branch D.
 ### File Processing Pipeline
 **Thumbnail Generation:**
@@ -128,6 +367,60 @@ Runs in background thread with two-tier strategy:
 - Batch queries EXIF DB to detect new files
 - Configurable via `WATCH_QUICK_INTERVAL_SECONDS` and `WATCH_FULL_INTERVAL_SECONDS`
 **Canonical date_taken pipeline (`src/date_resolver.rs`).** Every row's
 `image_exif.date_taken` is populated at ingest by a four-step waterfall;
 which step won is recorded in `image_exif.date_taken_source` so the
 per-tick drain can re-resolve weak entries when better tools become
 available, and so the UI/debug surface can answer "why did this photo
 land on this date?". Order:
 1. **`exif`** — kamadak-exif `DateTime` / `DateTimeOriginal`. Fast,
   in-process, image-only.
 2. **`exiftool`** — shell-out fallback for tags kamadak can't reach:
   QuickTime/MP4 (`MediaCreateDate`, `TrackCreateDate`, `CreateDate`),
   Apple's `ContentCreateDate`, MakerNote sub-IFDs. Required for
   videos to land a real date. Single-file at ingest; the per-tick
   drain feeds the whole batch through one `exiftool -@ -` subprocess.
   Degrades silently when `exiftool` isn't on PATH (resolver caches the
   "available" check via `OnceLock`).
 3. **`filename`** — `extract_date_from_filename` in `memories.rs`
   matches screenshot, chat-export, and timestamp-named patterns.
 4. **`fs_time`** — `earliest_fs_time(metadata)` (earlier of created /
   modified). Last resort.
 Notable behavior change vs. the pre-2026-05 request-time logic:
 **EXIF beats filename when both are present.** A photo named
 `Screenshot_2014-06-01.png` whose EXIF `DateTime` is 2021 now appears
 under 2021, not 2014 — on the theory that EXIF is more reliable than
 import-named filenames. The reverse case (no EXIF, filename has a
 date) is unchanged.
 The `backfill_missing_date_taken` drain (`src/backfill.rs`) runs every
 watcher tick alongside `backfill_unhashed_backlog` (also `src/backfill.rs`). It loads up to
 `DATE_BACKFILL_MAX_PER_TICK` rows (default 500) where
 `date_taken IS NULL` (backed by the `idx_image_exif_date_backfill`
 partial index), runs the waterfall batch via `resolve_dates_batch`,
 and writes results via the `backfill_date_taken` DAO method (touches
 only `date_taken` + `date_taken_source` so EXIF / hash / perceptual
 columns are preserved). Resolved rows — including the ones the
 waterfall could only resolve via `fs_time` — are not re-eligible:
 the resolver is deterministic on file bytes + filename + fs metadata,
 so re-running on the same inputs lands on the same source every time.
 An earlier version included `date_taken_source = 'fs_time'` in the
 eligibility predicate, but with `ORDER BY id ASC LIMIT 500` it spun on
 the same lowest-id rows in perpetuity and held the SQLite write lock
 long enough to starve face-PATCH writers (5s busy_timeout → 500). If
 a stronger tool comes online (exiftool install, new filename regex),
 re-resolve out-of-band rather than re-introducing the steady-state
 eligibility.
 `/memories` is a single SQL query against this column
 (`get_memories_in_window` in `src/database/mod.rs`), using
 `strftime('%m-%d' | '%W' | '%m', date_taken, 'unixepoch', tz)` for
 calendar matching with the client's timezone offset. The pre-rewrite
 version stat'd every row and walked the entire library tree — at
 ~14k photos this took 10–15 s; the rewrite is single-digit ms.
 **EXIF Extraction:**
 - Uses `kamadak-exif` crate
 - Supports: JPEG, TIFF, RAW (NEF, CR2, CR3), HEIF/HEIC, PNG, WebP
@@ -180,10 +473,16 @@ GET /memories?path=...&recursive=true
 POST /insights/generate              (non-agentic single-shot)
 POST /insights/generate/agentic      (tool-calling loop; body: { file_path, backend?, model?, ... })
 GET  /insights?path=...&library=...
-GET  /insights/models                (local Ollama models + capabilities)
+GET  /insights/models                (local-backend models + capabilities; Ollama OR llama-swap based on LLM_BACKEND)
 GET  /insights/openrouter/models     (curated OpenRouter allowlist)
 POST /insights/rate                  (thumbs up/down for training data)
 // Text-to-Speech (Chatterbox via llama-swap; needs LLAMA_SWAP_URL)
 POST /tts/speech                     (read-aloud: { text, voice?, ... } -> { audio_base64, format })
 GET  /tts/voices                     (Chatterbox voice library)
 POST /tts/voices/upload              (clone a voice from an uploaded clip; multipart)
 POST /tts/voices/from-library        (clone a voice from a library audio/video file)
 // Insight Chat Continuation
 POST /insights/chat                  (single-turn reply, non-streaming)
 POST /insights/chat/stream           (SSE: text / tool_call / tool_result / truncated / done)
@@ -219,11 +518,11 @@ ImageApi owns the face data; Apollo (sibling repo) hosts the insightface inferen
 - `persons(id, name UNIQUE COLLATE NOCASE, cover_face_id, entity_id, created_from_tag, notes, ...)` — operator-managed, name is the user-visible identity.
 - `face_detections(id, library_id, content_hash, rel_path, bbox_*, embedding BLOB, confidence, source, person_id, status, model_version, ...)` — keyed on `content_hash` so a photo duplicated across libraries is detected once. Marker rows for `status IN ('no_faces','failed')` carry NULL bbox/embedding (CHECK constraint enforces this).
-**Why content_hash and not (library_id, rel_path):** ties face data to the bytes, not the path. A backup mount that copies files from the primary library naturally inherits the existing detections without re-running inference.
+**Why content_hash and not (library_id, rel_path):** ties face data to the bytes, not the path. A backup mount that copies files from the primary library naturally inherits the existing detections without re-running inference. This is the reference implementation of the multi-library data model — see "Multi-library data model" above.
-**File-watch hook** (`src/main.rs::process_new_files`): for each photo with a populated `content_hash`, check `FaceDao::already_scanned(hash)`; if not, send bytes (or embedded JPEG preview for RAW via `exif::extract_embedded_jpeg_preview`) to Apollo's `/api/internal/faces/detect`. K=`FACE_DETECT_CONCURRENCY` (default 8) parallel calls per scan tick; Apollo serializes them via its single-worker GPU pool. `face_watch.rs` is the Tokio orchestration layer.
+**File-watch hook** (`src/watcher.rs::process_new_files`): for each photo with a populated `content_hash`, check `FaceDao::already_scanned(hash)`; if not, send bytes (or embedded JPEG preview for RAW via `exif::extract_embedded_jpeg_preview`) to Apollo's `/api/internal/faces/detect`. K=`FACE_DETECT_CONCURRENCY` (default 8) parallel calls per scan tick; Apollo serializes them via its single-worker GPU pool. `face_watch.rs` is the Tokio orchestration layer.
-**Per-tick backlog drain** (also `src/main.rs`): two passes that run on every watcher tick regardless of quick-vs-full scan:
+**Per-tick backlog drain** (`src/backfill.rs`): two passes that run on every watcher tick regardless of quick-vs-full scan:
 - `backfill_unhashed_backlog` — populates `image_exif.content_hash` for photos that arrived before the hash field was retroactive. Capped by `FACE_HASH_BACKFILL_MAX_PER_TICK` (default 2000); errors don't burn the cap.
 - `process_face_backlog` — runs detection on photos that have a hash but no `face_detections` row. Capped by `FACE_BACKLOG_MAX_PER_TICK` (default 64). Selected via a SQL anti-join (`FaceDao::list_unscanned_candidates`); videos and EXCLUDED_DIRS paths filtered out client-side via `face_watch::filter_excluded` so they never reach Apollo.
@@ -233,9 +532,13 @@ ImageApi owns the face data; Apollo (sibling repo) hosts the insightface inferen
 **Rerun preserves manual rows** (`POST /image/faces/{id}/rerun`): only `source='auto'` rows are deleted before re-running detection. `already_scanned` returns true on ANY row, so a photo whose only faces are manually drawn never auto-redetects.
 **Stats domain — content_hash, not file rows** (`FaceDao::stats` in `src/faces.rs`): `total_photos` counts `DISTINCT content_hash` over `image_exif` (filtered to image extensions, `content_hash IS NOT NULL`), and so do `scanned` / `with_faces` / `no_faces` / `failed` over `face_detections`. Numerator and denominator must live in the same domain — `face_detections` is keyed on content_hash, so the same JPEG present at two rel_paths or in two libraries scans once. Counting `image_exif` rows in the denominator inflated total by one per duplicate file and produced a permanent gap (e.g. 1101/1103 with nothing actually pending). Hash-less rows are excluded from total_photos while they sit in the `backfill_unhashed_backlog` queue; otherwise the bar pins below 100% for the duration of that backfill even though those rows aren't pending detection yet — they're pending hashing.
 Module map:
 - `src/faces.rs` — `FaceDao` trait + `SqliteFaceDao` impl, route handlers for `/faces/*`, `/image/faces/*`, `/persons/*`. Mirror of `tags.rs` layout.
 - `src/face_watch.rs` — Tokio orchestration for the file-watch detect pass; `filter_excluded` (PathExcluder + image-extension filter), `read_image_bytes_for_detect` (RAW preview fallback).
 - `src/backfill.rs` — per-tick drains (unhashed-hash, date_taken, face-backlog, etc.) called from `watcher::watch_files` and `watcher::process_new_files`.
 - `src/watcher.rs` — the watcher loop itself and `process_new_files` (file walk → EXIF write → face-candidate build).
 - `src/ai/face_client.rs` — HTTP client for Apollo's inference. Configured by `APOLLO_FACE_API_BASE_URL`, falls back to `APOLLO_API_BASE_URL`. Both unset → feature disabled, file-watch hook is a no-op.
 - `migrations/2026-04-29-000000_add_faces/` — schema.
@@ -296,6 +599,7 @@ Optional:
 ```bash
 WATCH_QUICK_INTERVAL_SECONDS=60        # Quick scan interval
 WATCH_FULL_INTERVAL_SECONDS=3600       # Full scan interval
 DATE_BACKFILL_MAX_PER_TICK=500         # Cap on canonical-date drain per watcher tick
 OTLP_OTLS_ENDPOINT=http://...          # OpenTelemetry collector (release builds)
 # AI Insights Configuration
@@ -333,8 +637,55 @@ OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small  # Optional, embeddings
 OPENROUTER_HTTP_REFERER=https://your-site.example    # Optional attribution header
 OPENROUTER_APP_TITLE=ImageApi                  # Optional attribution header
 # Local LLM backend switch. `ollama` (default) keeps the OLLAMA_* settings
 # above; `llamacpp` swaps the entire local stack (chat + vision describe +
 # embeddings) over to llama-swap. The switch is global and applies to
 # `backend=local` requests and to `backend=hybrid`'s describe pass (hybrid
 # chat still goes to OpenRouter). Don't flip mid-deploy without
 # re-embedding — mixed vector spaces break similarity search.
 LLM_BACKEND=ollama
 # Embedding model contract. Corpus and queries must be embedded by the same
 # model with matching prefixes — after changing the embed model or any of
 # these, run `cargo run --bin reembed_embeddings` (all tables) or search is
 # garbage. Prefix values may contain a literal \n (expanded to a newline).
 EMBEDDING_DIM=768           # 768 = nomic-embed-text v1.5; 1024 = Qwen3-Embedding-0.6B
 EMBED_QUERY_PREFIX=         # nomic: "search_query: " | Qwen3: "Instruct: <task>\nQuery: "
 EMBED_DOCUMENT_PREFIX=      # nomic: "search_document: " | Qwen3: leave empty
 # llama.cpp / llama-swap (used when LLM_BACKEND=llamacpp). OpenAI-compatible
 # proxy hosting one or more llama-server processes. Chat models receive
 # images directly via content-parts (all models assumed vision-capable).
 LLAMA_SWAP_URL=http://localhost:9292/v1         # Required when LLM_BACKEND=llamacpp
 LLAMA_SWAP_PRIMARY_MODEL=chat                   # Chat slot id (matches config.yaml)
 LLAMA_SWAP_VISION_MODEL=                        # Dedicated vision slot for describe_image / describe_photo
                                                # tool. Defaults to PRIMARY_MODEL when unset.
 LLAMA_SWAP_EMBEDDING_MODEL=embed                # Embedding slot id
 LLAMA_SWAP_ALLOWED_MODELS=chat,coder            # Curated allowlist surfaced by GET /insights/models
                                                # when LLM_BACKEND=llamacpp. All report has_vision=true.
                                                # Empty = picker shows only the configured primary model.
 LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180          # Per-request timeout; bump for slow CPU offload
 # Text-to-speech (Chatterbox served behind llama-swap). Only needs
 # LLAMA_SWAP_URL — independent of LLM_BACKEND. Powers /tts/speech (read-aloud)
 # and /tts/voices* (voice cloning). Reference audio is ffmpeg-normalized to WAV
 # server-side, so any source format works.
 LLAMA_SWAP_TTS_MODEL=chatterbox                # TTS model id in config.yaml (default: chatterbox)
 LLAMA_SWAP_TTS_VOICE=m                         # Default voice when /tts/speech omits one (optional)
 LLAMA_SWAP_TTS_REF_SECONDS=30                  # Max voice-clone reference clip length, seconds
                                               # (Chatterbox is zero-shot; ~10-20s clean ref is ideal)
 LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS=600     # Per-request synth timeout (long chunked insights take
                                               # minutes); overrides the shared client timeout for /tts/speech
 TTS_PRONUNCIATIONS_PATH=tts_pronunciations.json # JSON map of pronunciation overrides applied before synth
                                               # (see tts_pronunciations.example.json); hot-reloaded on change
 # Insight Chat Continuation
 AGENTIC_CHAT_MAX_ITERATIONS=6                  # Cap on tool-calling iterations per chat turn (default 6)
 AGENTIC_CHAT_DEFAULT_NUM_CTX=32768             # Assumed context window for the history-truncation budget
                                               # when a chat request omits num_ctx (default 32768). Size to
                                               # the smallest context among the chat models actually served;
                                               # too small silently guts replayed history every turn (and
                                               # destroys llama.cpp KV-cache prefix reuse).
 ```
 **AI Insights Fallback Behavior:**
@@ -352,10 +703,50 @@ The `OllamaClient` provides methods to query available models:
 This allows runtime verification of model availability before generating insights.
 **Local backend switch (`LLM_BACKEND`):**
 One env var decides which "local" stack the server runs against — `ollama`
 (default) or `llamacpp`. It's global on purpose: chat, vision, and
 embeddings all route through the same backend, so the embedding-vector
 column in SQLite stays in one vector space. Don't flip mid-deploy without
 re-embedding the affected rows — similarity search will collapse.
 - `LLM_BACKEND=ollama`: chat, vision, and embeddings use Ollama. Vision
  capability is probed per-model via `/api/show`.
 - `LLM_BACKEND=llamacpp`: chat models receive images directly via OpenAI
  content-parts (all models assumed vision-capable). Embeddings hit the
  `embed` slot. A dedicated `LLAMA_SWAP_VISION_MODEL` slot (defaults to
  the chat model) handles `describe_image` for the `describe_photo` tool.
  Requires `LLAMA_SWAP_URL`.
 The per-request `backend=hybrid` override is orthogonal: it always sends
 chat to OpenRouter (text-only, images are pre-described and inlined), but
 the describe + embed passes still route through whichever `LLM_BACKEND`
 is configured.
 **Backend dispatch (`ResolvedBackend`):**
 `InsightGenerator::resolve_backend(kind, overrides)` is the single entry
 point that builds clients for a request. Returns a `ResolvedBackend` with
 two roles: `.chat()` (the agentic/chat client) and `.local()` (local-only
 utility calls: rerank, describe_image, embeddings). `BackendKind` is an
 enum (`Local` | `Hybrid`) replacing the stringly-typed `"local"` /
 `"hybrid"` labels. `SamplingOverrides` groups model/ctx/temp/top_p/top_k/
 min_p per-request overrides. All downstream code (`execute_tool`,
 `run_streaming_agentic_loop`, etc.) takes `&ResolvedBackend` rather than
 individual client references.
 `GET /insights/models` returns the local-backend models with capabilities
 in the same envelope shape regardless of `LLM_BACKEND`: Ollama servers
 when `ollama`, llama-swap slots (from `LLAMA_SWAP_ALLOWED_MODELS`) when
 `llamacpp`. No `/insights/llamacpp/models` — the picker reads a single
 endpoint.
 **Hybrid Backend (OpenRouter):**
 - Per-request opt-in via `backend=hybrid` on `POST /insights/generate/agentic`.
- Local Ollama still describes the image (vision); the description is inlined
+- Vision describe happens before the agentic loop; the description is inlined
-  into the chat prompt and the agentic loop runs on OpenRouter.
+  into the chat prompt and the agentic loop runs on OpenRouter. Vision
  routes through whichever `LLM_BACKEND` is configured.
 - `request.model` (if provided) overrides `OPENROUTER_DEFAULT_MODEL` for that
  call. The mobile picker reads from `OPENROUTER_ALLOWED_MODELS`.
 - No live capability precheck — the operator-curated allowlist is trusted.
@@ -363,6 +754,15 @@ This allows runtime verification of model availability before generating insight
 - `GET /insights/openrouter/models` returns `{ models, default_model, configured }`
  for client picker UIs.
 **Cross-replay matrix (chat continuation):**
 - `local → local` allowed (whether served by Ollama or llama-swap; that's
  a deploy-time decision, not a request-time one).
 - `hybrid → hybrid` allowed.
 - `hybrid → local` allowed (the inlined description replays as text).
 - `local → hybrid` rejected — the stored transcript has raw images in the
  first user message and OpenRouter providers don't accept that shape
  consistently. Regenerate the insight in hybrid mode instead.
 **Insight Chat Continuation:**
 After an agentic insight is generated, the full `Vec<ChatMessage>` transcript is
@@ -372,7 +772,12 @@ clients whether chat is available for a given insight.
 - `POST /insights/chat` runs one turn of the agentic loop against the replayed
  history. Body: `{ file_path, library?, user_message, model?, backend?, num_ctx?,
-  temperature?, top_p?, top_k?, min_p?, max_iterations?, amend? }`.
+  temperature?, top_p?, top_k?, min_p?, max_iterations?, system_prompt?, amend? }`.
  `system_prompt` is a per-turn override: in append mode (default) it's applied
  ephemerally — the original system message is restored before persistence so
  the stored transcript keeps its baked persona. In amend mode the override
  stays in place and becomes the new insight row's system message. Mirrors the
  internal `annotate_system_with_budget` swap-and-restore pattern.
 - `POST /insights/chat/stream` is the SSE variant — same request body, response
  is `text/event-stream` with events: `iteration_start`, `text` (delta), `tool_call`,
  `tool_result`, `truncated`, `done`, plus a server-emitted `error_message` on
@@ -404,14 +809,17 @@ Per-`(library_id, file_path)` async mutex (`AppState.insight_chat.chat_locks`)
 serialises concurrent turns on the same insight so the JSON blob doesn't race.
 Context management is a soft bound: if the serialized history exceeds
-`num_ctx - 2048` tokens (cheap 4-byte/token heuristic), the oldest
+`num_ctx - 2048` tokens (cheap 4-byte/token heuristic; `num_ctx` defaults
-assistant-tool_call + tool_result pairs are dropped until under budget. The
+to `AGENTIC_CHAT_DEFAULT_NUM_CTX`, 32768, when the request omits it), the
 oldest assistant-tool_call + tool_result pairs are dropped until under budget. The
 initial user message (with any images) and system prompt are always preserved.
 The `truncated` event / flag is surfaced to the client when a drop occurred.
 Configurable env:
 - `AGENTIC_CHAT_MAX_ITERATIONS` — cap on tool-calling iterations per turn
  (default 6). Per-request `max_iterations` is clamped to this cap.
 - `AGENTIC_CHAT_DEFAULT_NUM_CTX` — assumed context window for the truncation
  budget when the request omits `num_ctx` (default 32768).
 **Apollo Places integration (optional):**
@@ -1,6 +1,6 @@
 [package]
 name = "image-api"
-version = "1.1.0"
+version = "1.4.0"
 authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
 edition = "2024"
@@ -9,6 +9,9 @@ edition = "2024"
 [profile.release]
 lto = "thin"
 [profile.dev]
 debug = "line-tables-only"
 [dependencies]
 actix = "0.13.1"
 actix-web = "4"
@@ -23,7 +26,7 @@ jsonwebtoken = "9.3.0"
 serde = "1"
 serde_json = "1"
 diesel = { version = "2.2.10", features = ["sqlite"] }
-libsqlite3-sys = { version = "0.35", features = ["bundled"] }
+libsqlite3-sys = "0.35"
 diesel_migrations = "2.2.0"
 chrono = "0.4"
 clap = { version = "4.5", features = ["derive"] }
@@ -59,5 +62,13 @@ ical = "0.11"
 scraper = "0.20"
 base64 = "0.22"
 blake3 = "1.5"
 image_hasher = "3.0"
 bk-tree = "0.5"
 async-trait = "0.1"
 indicatif = "0.17"
 uuid = { version = "1.10", features = ["v4", "serde"] }
 # Windows lacks system sqlite3, so re-enable the bundled C build there.
 # Linux/macOS use the system library (faster builds, smaller binary).
 [target.'cfg(windows)'.dependencies]
 libsqlite3-sys = { version = "0.35", features = ["bundled"] }
@@ -147,6 +147,56 @@ so you can rewrite the saved summary from within chat.
 - `AGENTIC_CHAT_MAX_ITERATIONS` - Cap on tool-calling iterations per chat turn [default: `6`]
  - Per-request `max_iterations` (when sent by the client) is clamped to this cap
 #### Text-to-Speech (Optional)
 Reads insights aloud and manages cloned voices via a Chatterbox model served
 behind the same llama-swap proxy. Only requires `LLAMA_SWAP_URL` (the TTS client
 is built whenever that's set — independent of `LLM_BACKEND`). Endpoints:
 - `POST /tts/speech` — body `{ text, voice?, format?, exaggeration?, cfg_weight?,
  temperature? }`; returns `{ audio_base64, format }`. Input is cleaned
  server-side (markdown + emoji stripped, then pronunciation overrides applied —
  see below) and the generation knobs are clamped
  to Chatterbox's ranges. Synthesis is serialized (one at a time — the upstream
  has no GPU lock of its own); a concurrent request gets a fast `429`.
 - `POST /tts/speech/jobs` — durable variant for long syntheses: same body as
  `/tts/speech`, returns `202 { job_id, status }` immediately. Jobs queue on the
  GPU permit instead of fast-failing `429`.
 - `GET /tts/speech/jobs/{id}` — poll a job: `{ job_id, status, format,
  audio_base64?, error? }` with status `queued|running|done|error|cancelled`.
  Results are kept in memory ~10 min after completion, then the job 404s.
 - `DELETE /tts/speech/jobs/{id}` — cancel a queued/running job.
 - `GET /tts/voices` — list the voice library. Served from an in-memory cache
  (so the listing doesn't make llama-swap spin up the TTS model and evict the
  resident LLM); pass `?refresh=1` to force an upstream re-query. The cache is
  invalidated by voice create/delete.
 - `POST /tts/voices/upload` — multipart `voice_name` + `voice_file`; clone a
  voice from an uploaded clip (≤25 MB).
 - `POST /tts/voices/from-library` — body `{ voice_name, path, library? }`; clone
  from a library file (audio forwarded as-is; video has its audio extracted via
  ffmpeg).
 - `DELETE /tts/voices/{name}` — remove a cloned voice from the library.
 Created voice names are tagged with the ref-clip cap in effect (e.g.
 `grandma-30s`) so the library shows which reference length produced each clone.
 Words the model mispronounces (place names, initialisms) can be rewritten
 before synthesis via a JSON map — copy `tts_pronunciations.example.json` to
 `tts_pronunciations.json` and edit; changes apply without a restart. Full
 matching rules are documented in `src/ai/pronunciation.rs`.
 Env:
 - `TTS_PRONUNCIATIONS_PATH` - pronunciation-override JSON file
  [default: `tts_pronunciations.json` in the working directory]
 - `LLAMA_SWAP_TTS_MODEL` - TTS model id in llama-swap's `config.yaml` [default: `chatterbox`]
 - `LLAMA_SWAP_TTS_VOICE` - default voice used when a `/tts/speech` request omits `voice` (optional)
 - `LLAMA_SWAP_TTS_REF_SECONDS` - max voice-clone reference clip length in seconds
  [default: `30`]. Reference audio is ffmpeg-normalized to mono 24 kHz WAV (so any
  source format works); Chatterbox is zero-shot, so a clean ~10–20s sample is the
  sweet spot — more rarely helps.
 - `LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS` - per-request synthesis timeout in
  seconds [default: `600`]. Long insights are chunked + synthesized server-side
  and can take minutes; this is separate from (and overrides, for `/tts/speech`)
  the shared `LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS`.
 #### Fallback Behavior
 - Primary server is tried first with 5-second connection timeout
 - On failure, automatically falls back to secondary server (if configured)
@@ -0,0 +1 @@
 DROP INDEX IF EXISTS idx_tags_name_nocase;
@@ -0,0 +1,28 @@
 -- Tags only enforced uniqueness in application code (the add_tag handler
 -- looks up by name before inserting). The schema itself accepted dupes,
 -- so a divergent code path could land two tags with the same name. Now
 -- that we expose a rename endpoint we want a hard guarantee: case-
 -- insensitive UNIQUE on tags.name.
 -- Pre-flight: collapse exact-name duplicates (case-insensitive) onto the
 -- lowest-id row before adding the constraint, otherwise the index
 -- creation fails on any DB that ever produced dupes. On a clean DB this
 -- is a no-op.
 UPDATE tagged_photo
 SET tag_id = (
    SELECT MIN(t2.id) FROM tags t2
    WHERE LOWER(t2.name) = LOWER((SELECT name FROM tags WHERE id = tagged_photo.tag_id))
 )
 WHERE tag_id IN (
    SELECT t.id FROM tags t
    WHERE t.id <> (
        SELECT MIN(t2.id) FROM tags t2 WHERE LOWER(t2.name) = LOWER(t.name)
    )
 );
 DELETE FROM tags
 WHERE id <> (
    SELECT MIN(t2.id) FROM tags t2 WHERE LOWER(t2.name) = LOWER(tags.name)
 );
 CREATE UNIQUE INDEX idx_tags_name_nocase ON tags (name COLLATE NOCASE);
@@ -0,0 +1,5 @@
 DROP INDEX IF EXISTS idx_photo_insights_content_hash;
 ALTER TABLE photo_insights DROP COLUMN content_hash;
 DROP INDEX IF EXISTS idx_tagged_photo_content_hash;
 ALTER TABLE tagged_photo DROP COLUMN content_hash;
@@ -0,0 +1,64 @@
 -- Phase B of the multi-library data-model rollout: add a nullable
 -- `content_hash` column to derived/user-intent tables that should follow
 -- the bytes rather than the path. Reads will prefer hash-key joins and
 -- fall back to rel_path while the column is null. A separate
 -- reconciliation pass collapses duplicates as the column populates.
 --
 -- See CLAUDE.md → "Multi-library data model" for the policy. The
 -- reference implementation is `face_detections`, which has been
 -- hash-keyed since it was introduced.
 --
 -- Tables in this migration:
 --   * tagged_photo   — user-intent (tags follow the bytes)
 --   * photo_insights — intrinsic to bytes (LLM-generated description)
 --
 -- favorites is the natural third candidate but its DAO is barely used in
 -- v1 and the row count is tiny; deferring lets this migration stay
 -- focused on the high-volume tables that drive cross-library overhead.
 -- ---------------------------------------------------------------------------
 -- tagged_photo
 -- ---------------------------------------------------------------------------
 ALTER TABLE tagged_photo ADD COLUMN content_hash TEXT;
 -- Backfill: for each tagged_photo row, find the content_hash for its
 -- rel_path. tagged_photo doesn't carry a library_id, so a rel_path that
 -- exists under multiple libraries with different content is genuinely
 -- ambiguous — we take the first matching image_exif row. The
 -- reconciliation pass at runtime cleans up any rows that resolve
 -- differently once a hash is known per library.
 UPDATE tagged_photo
 SET content_hash = (
    SELECT content_hash FROM image_exif
    WHERE image_exif.rel_path = tagged_photo.rel_path
      AND image_exif.content_hash IS NOT NULL
    LIMIT 1
 )
 WHERE content_hash IS NULL;
 -- Hash-key index. Partial (only non-null rows) to keep the index small
 -- during the transitional window where most rows are still null.
 CREATE INDEX idx_tagged_photo_content_hash
    ON tagged_photo (content_hash)
    WHERE content_hash IS NOT NULL;
 -- ---------------------------------------------------------------------------
 -- photo_insights
 -- ---------------------------------------------------------------------------
 ALTER TABLE photo_insights ADD COLUMN content_hash TEXT;
 -- Backfill keyed on (library_id, rel_path) — photo_insights already
 -- carries library_id, so the resolution is unambiguous.
 UPDATE photo_insights
 SET content_hash = (
    SELECT content_hash FROM image_exif
    WHERE image_exif.library_id = photo_insights.library_id
      AND image_exif.rel_path = photo_insights.rel_path
      AND image_exif.content_hash IS NOT NULL
    LIMIT 1
 )
 WHERE content_hash IS NULL;
 CREATE INDEX idx_photo_insights_content_hash
    ON photo_insights (content_hash)
    WHERE content_hash IS NOT NULL;
@@ -0,0 +1,2 @@
 -- Requires SQLite 3.35+ for ALTER TABLE DROP COLUMN.
 ALTER TABLE libraries DROP COLUMN enabled;
@@ -0,0 +1,14 @@
 -- Operator-controlled kill switch for a library. When `enabled = 0` the
 -- watcher tick skips that library entirely — before the availability
 -- probe, before ingest, before any maintenance pass — and the orphan-GC
 -- all-online check treats it as out-of-scope rather than as a blocker.
 --
 -- The intended workflow is staging a new mount: insert with enabled=0,
 -- verify the row appears in /libraries with enabled=false, then UPDATE
 -- to 1 to start ingest. Same toggle works as a maintenance kill switch
 -- after the fact ("don't keep probing this NAS while I'm rebooting it").
 --
 -- Default 1 so every existing library stays running on upgrade — no
 -- behavior change without an explicit flip.
 ALTER TABLE libraries ADD COLUMN enabled BOOLEAN NOT NULL DEFAULT 1;
@@ -0,0 +1,2 @@
 -- Requires SQLite 3.35+ for ALTER TABLE DROP COLUMN.
 ALTER TABLE libraries DROP COLUMN excluded_dirs;
@@ -0,0 +1,14 @@
 -- Per-library excluded directories.
 --
 -- The global EXCLUDED_DIRS env var is the right knob for excludes that
 -- every library shares (Synology @eaDir, .thumbnails, etc.). It's a
 -- poor fit for "exclude this subtree from THIS library only", which
 -- the natural use case for is mounting a parent directory while
 -- another library already covers a child subtree underneath.
 --
 -- This column is parsed comma-separated, same shape as the env var,
 -- and the watcher / memories / thumbnail walks each apply
 -- (env_globals ∪ library.excluded_dirs) when scanning the library.
 -- NULL = no extra excludes; the global env var still applies.
 ALTER TABLE libraries ADD COLUMN excluded_dirs TEXT;
@@ -0,0 +1,8 @@
 DROP INDEX IF EXISTS idx_image_exif_duplicate_of_hash;
 DROP INDEX IF EXISTS idx_image_exif_dhash;
 DROP INDEX IF EXISTS idx_image_exif_phash;
 ALTER TABLE image_exif DROP COLUMN duplicate_decided_at;
 ALTER TABLE image_exif DROP COLUMN duplicate_of_hash;
 ALTER TABLE image_exif DROP COLUMN dhash_64;
 ALTER TABLE image_exif DROP COLUMN phash_64;
@@ -0,0 +1,41 @@
 -- Adds perceptual-hash signals + soft-mark resolution state to image_exif so
 -- the duplicates surface in Apollo can group near-duplicates (re-encoded,
 -- resized, format-converted copies) and let the user demote losers without
 -- touching the file on disk. Image-only for v1: phash_64/dhash_64 are NULL
 -- on videos and on images that fail to decode. See Apollo CLAUDE.md →
 -- Duplicate detection / Caching layer for the policy.
 --
 -- Soft-mark columns are media-type-agnostic — when video perceptual hashing
 -- arrives, it lives in a separate hash-keyed companion table and reuses the
 -- same duplicate_of_hash / duplicate_decided_at machinery.
 -- pHash (DCT, 64-bit) packed as i64 for fast XOR + popcount Hamming.
 ALTER TABLE image_exif ADD COLUMN phash_64 BIGINT;
 -- dHash (gradient, 64-bit). Cheap, robust to compression/resize. Stored
 -- alongside pHash so the query layer can fall back if either is null.
 ALTER TABLE image_exif ADD COLUMN dhash_64 BIGINT;
 -- When non-null, this row is a soft-marked duplicate of the row whose
 -- content_hash matches. The duplicate file stays on disk; the default
 -- /photos listing filters it out. /photos?include_duplicates=true opts
 -- back in (the Apollo duplicates modal uses this).
 ALTER TABLE image_exif ADD COLUMN duplicate_of_hash TEXT;
 -- Unix seconds of the resolve. Distinguishes "never reviewed" from
 -- "reviewed and resolved" for the Apollo include_resolved toggle.
 ALTER TABLE image_exif ADD COLUMN duplicate_decided_at BIGINT;
 -- Partial indexes — the columns are NULL for the vast majority of rows
 -- during the transitional window and forever for videos / decode failures.
 CREATE INDEX idx_image_exif_phash
    ON image_exif (phash_64)
    WHERE phash_64 IS NOT NULL;
 CREATE INDEX idx_image_exif_dhash
    ON image_exif (dhash_64)
    WHERE dhash_64 IS NOT NULL;
 CREATE INDEX idx_image_exif_duplicate_of_hash
    ON image_exif (duplicate_of_hash)
    WHERE duplicate_of_hash IS NOT NULL;
@@ -0,0 +1,2 @@
 DROP INDEX IF EXISTS idx_image_exif_date_backfill;
 ALTER TABLE image_exif DROP COLUMN date_taken_source;
@@ -0,0 +1,24 @@
 -- Tracks where a row's `date_taken` was sourced so the canonical-date
 -- waterfall (kamadak-exif → exiftool → filename → earliest_fs_time) is
 -- visible to debugging and to the per-tick backfill drain that re-runs
 -- weak sources once stronger ones become available (e.g. exiftool gets
 -- installed on a deploy that didn't have it). See CLAUDE.md → Memories
 -- canonical-date pipeline.
 --
 -- Values:
 --   'exif'     — kamadak-exif read DateTime/DateTimeOriginal directly
 --   'exiftool' — exiftool fallback caught a video / MakerNote / QuickTime tag
 --   'filename' — extract_date_from_filename matched a known pattern
 --   'fs_time'  — fell through to earliest_fs_time(metadata)
 --
 -- NULL when `date_taken` itself is NULL (no source resolved the date).
 ALTER TABLE image_exif ADD COLUMN date_taken_source TEXT;
 -- Partial index for the per-tick backfill drain: targets rows that need
 -- re-resolution (no date yet, or only the weakest source resolved it).
 -- Filename-sourced rows are intentionally excluded — the regex is
 -- authoritative when it matches and re-running exiftool wouldn't change
 -- the answer.
 CREATE INDEX idx_image_exif_date_backfill
    ON image_exif (library_id, id)
    WHERE date_taken IS NULL OR date_taken_source = 'fs_time';
@@ -0,0 +1,9 @@
 -- Reverting this migration is a no-op: the labels we wrote in `up.sql`
 -- are correct under any state of the schema (every dated row was indeed
 -- exif-sourced before the resolver landed), and there's no signal that
 -- distinguishes "labelled by this migration" from "labelled by the
 -- ingest path post-resolver". Clearing them would break the drain's
 -- eligibility filter again.
 --
 -- The companion migration `2026-05-06-000000_add_date_taken_source` is
 -- the one to revert if you need to remove the column entirely.
@@ -0,0 +1,20 @@
 -- Backfill `date_taken_source` for rows that pre-date the canonical-date
 -- pipeline. Before the resolver landed, `image_exif.date_taken` could
 -- only be populated via `exif::extract_exif_from_path` (kamadak-exif)
 -- on the file-watcher, upload, or GPS-write paths. The resolver column
 -- migration added `date_taken_source` defaulting to NULL, so every
 -- historical row with a date is currently unlabelled — and the
 -- per-tick drain skips them because its eligibility predicate is
 -- `date_taken IS NULL OR date_taken_source = 'fs_time'`.
 --
 -- Label them `'exif'` once and let the drain take over from here. Safe
 -- because every code path that wrote `date_taken` prior to the
 -- resolver was a kamadak-exif read — there was no other source.
 --
 -- Idempotent: re-running this migration on a DB that has already been
 -- backfilled is a no-op (the WHERE clause matches nothing the second
 -- time around).
 UPDATE image_exif
 SET date_taken_source = 'exif'
 WHERE date_taken IS NOT NULL
  AND date_taken_source IS NULL;
@@ -0,0 +1,2 @@
 ALTER TABLE image_exif DROP COLUMN original_date_taken_source;
 ALTER TABLE image_exif DROP COLUMN original_date_taken;
@@ -0,0 +1,15 @@
 -- Manual date_taken override: when an operator overrides a row's date via
 -- POST /image/exif/date, the prior `(date_taken, date_taken_source)` is
 -- snapshotted into these columns and the live columns hold the new value
 -- with `date_taken_source = 'manual'`. POST /image/exif/date/clear restores
 -- the pair and nulls the originals.
 --
 -- The waterfall source-name set is now:
 --   'exif' | 'exiftool' | 'filename' | 'fs_time' | 'manual'
 --
 -- The `idx_image_exif_date_backfill` partial index already filters to
 -- `date_taken IS NULL OR date_taken_source = 'fs_time'`, so 'manual' rows
 -- are naturally excluded from the per-tick backfill drain — no index
 -- change needed.
 ALTER TABLE image_exif ADD COLUMN original_date_taken BIGINT;
 ALTER TABLE image_exif ADD COLUMN original_date_taken_source TEXT;
@@ -0,0 +1,43 @@
 -- Drop the persona-scoping column on entity_facts via the table-rebuild
 -- dance for SQLite-version portability (matches the pattern in
 -- 2026-04-20-000000_add_backend_to_insights/down.sql).
 DROP INDEX IF EXISTS idx_entity_facts_persona;
 CREATE TABLE entity_facts_backup AS
    SELECT id, subject_entity_id, predicate, object_entity_id, object_value,
           source_photo, source_insight_id, confidence, status, created_at
    FROM entity_facts;
 DROP TABLE entity_facts;
 CREATE TABLE entity_facts (
    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    subject_entity_id INTEGER NOT NULL,
    predicate TEXT NOT NULL,
    object_entity_id INTEGER,
    object_value TEXT,
    source_photo TEXT,
    source_insight_id INTEGER,
    confidence REAL NOT NULL DEFAULT 0.6,
    status TEXT NOT NULL DEFAULT 'active',
    created_at BIGINT NOT NULL,
    CONSTRAINT fk_ef_subject FOREIGN KEY (subject_entity_id) REFERENCES entities(id) ON DELETE CASCADE,
    CONSTRAINT fk_ef_object  FOREIGN KEY (object_entity_id)  REFERENCES entities(id) ON DELETE SET NULL,
    CONSTRAINT fk_ef_insight FOREIGN KEY (source_insight_id) REFERENCES photo_insights(id) ON DELETE SET NULL,
    CHECK (object_entity_id IS NOT NULL OR object_value IS NOT NULL)
 );
 INSERT INTO entity_facts
    SELECT id, subject_entity_id, predicate, object_entity_id, object_value,
           source_photo, source_insight_id, confidence, status, created_at
    FROM entity_facts_backup;
 DROP TABLE entity_facts_backup;
 CREATE INDEX idx_entity_facts_subject ON entity_facts(subject_entity_id);
 CREATE INDEX idx_entity_facts_predicate ON entity_facts(predicate);
 CREATE INDEX idx_entity_facts_status ON entity_facts(status);
 CREATE INDEX idx_entity_facts_source_photo ON entity_facts(source_photo);
 DROP INDEX IF EXISTS idx_personas_user;
 DROP TABLE IF EXISTS personas;
@@ -0,0 +1,64 @@
 -- Personas live server-side now (mobile previously stored them in
 -- AsyncStorage only). Each user gets the three built-ins seeded; custom
 -- personas land here too via POST /personas or POST /personas/migrate.
 --
 -- `entity_facts` gains a persona_id so each persona accumulates its own
 -- voice over a shared entity graph (entities themselves stay unscoped).
 -- Existing rows backfill to 'default' via the column DEFAULT — that
 -- becomes the historical baseline. The `include_all_memories` flag on
 -- personas lets any persona opt back into reading the full pool.
 CREATE TABLE personas (
    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    user_id INTEGER NOT NULL,
    persona_id TEXT NOT NULL,
    name TEXT NOT NULL,
    system_prompt TEXT NOT NULL,
    is_built_in BOOLEAN NOT NULL DEFAULT FALSE,
    include_all_memories BOOLEAN NOT NULL DEFAULT FALSE,
    created_at BIGINT NOT NULL,
    updated_at BIGINT NOT NULL,
    UNIQUE(user_id, persona_id),
    CONSTRAINT fk_personas_user FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
 );
 CREATE INDEX idx_personas_user ON personas(user_id);
 -- Seed built-ins for every existing user. System prompts copied verbatim
 -- from FileViewer-React/hooks/usePersonas.tsx so server and client agree
 -- on the canonical voice for each built-in.
 INSERT INTO personas (user_id, persona_id, name, system_prompt, is_built_in, created_at, updated_at)
 SELECT
    u.id,
    'default',
    'Default Assistant',
    'You are my long-term memory assistant. Use only the information provided. Do not invent details. Respond in 3–6 sentences in third person, leading with the most concrete moment from the photo and the surrounding context. Plain prose, no headings.',
    TRUE,
    strftime('%s', 'now') * 1000,
    strftime('%s', 'now') * 1000
 FROM users u
 UNION ALL
 SELECT
    u.id,
    'journal',
    'Personal Journal',
    'You are a personal journal writer. Write in first person, present tense, with warmth and reflection — focusing on emotions and meaningful moments. Use only the information provided; do not invent details. Aim for 4–8 sentences in a single flowing paragraph, no headings.',
    TRUE,
    strftime('%s', 'now') * 1000,
    strftime('%s', 'now') * 1000
 FROM users u
 UNION ALL
 SELECT
    u.id,
    'factual',
    'Factual Reporter',
    'You are a factual memory recorder. Be precise, objective, and concise. Lead with the date and place, then list what / when / who in 2–4 short sentences. Use only the information provided; if a detail is unknown, say so rather than guessing.',
    TRUE,
    strftime('%s', 'now') * 1000,
    strftime('%s', 'now') * 1000
 FROM users u;
 -- Persona scoping on facts only. Entities and entity_photo_links stay
 -- shared (real-world referents and shared photo ↔ entity associations).
 ALTER TABLE entity_facts ADD COLUMN persona_id TEXT NOT NULL DEFAULT 'default';
 CREATE INDEX idx_entity_facts_persona ON entity_facts(persona_id);
@@ -0,0 +1,47 @@
 -- Reverse 2026-05-10-000000_entity_facts_persona_fk: drop the
 -- composite FK and the user_id column via the same rebuild pattern.
 DROP INDEX IF EXISTS idx_entity_facts_user_persona;
 DROP INDEX IF EXISTS idx_entity_facts_persona;
 DROP INDEX IF EXISTS idx_entity_facts_source_photo;
 DROP INDEX IF EXISTS idx_entity_facts_status;
 DROP INDEX IF EXISTS idx_entity_facts_predicate;
 DROP INDEX IF EXISTS idx_entity_facts_subject;
 ALTER TABLE entity_facts RENAME TO entity_facts_old;
 CREATE TABLE entity_facts (
    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    subject_entity_id INTEGER NOT NULL,
    predicate TEXT NOT NULL,
    object_entity_id INTEGER,
    object_value TEXT,
    source_photo TEXT,
    source_insight_id INTEGER,
    confidence REAL NOT NULL DEFAULT 0.6,
    status TEXT NOT NULL DEFAULT 'active',
    created_at BIGINT NOT NULL,
    persona_id TEXT NOT NULL DEFAULT 'default',
    CONSTRAINT fk_ef_subject FOREIGN KEY (subject_entity_id) REFERENCES entities(id) ON DELETE CASCADE,
    CONSTRAINT fk_ef_object  FOREIGN KEY (object_entity_id)  REFERENCES entities(id) ON DELETE SET NULL,
    CONSTRAINT fk_ef_insight FOREIGN KEY (source_insight_id) REFERENCES photo_insights(id) ON DELETE SET NULL,
    CHECK (object_entity_id IS NOT NULL OR object_value IS NOT NULL)
 );
 INSERT INTO entity_facts
    (id, subject_entity_id, predicate, object_entity_id, object_value,
     source_photo, source_insight_id, confidence, status, created_at,
     persona_id)
 SELECT
    id, subject_entity_id, predicate, object_entity_id, object_value,
    source_photo, source_insight_id, confidence, status, created_at,
    persona_id
 FROM entity_facts_old;
 DROP TABLE entity_facts_old;
 CREATE INDEX idx_entity_facts_subject ON entity_facts(subject_entity_id);
 CREATE INDEX idx_entity_facts_predicate ON entity_facts(predicate);
 CREATE INDEX idx_entity_facts_status ON entity_facts(status);
 CREATE INDEX idx_entity_facts_source_photo ON entity_facts(source_photo);
 CREATE INDEX idx_entity_facts_persona ON entity_facts(persona_id);
@@ -0,0 +1,82 @@
 -- Add a real foreign key from entity_facts to personas. Until now,
 -- entity_facts.persona_id was a free-form string with no integrity
 -- guarantee — deleting a persona orphaned its facts, which then sat
 -- forever in the readable-only-via-PersonaFilter::All hive-mind view.
 --
 -- personas is keyed (user_id, persona_id) so the FK has to be
 -- composite. That requires entity_facts to carry user_id too, which
 -- has the side benefit of fixing multi-user fact leakage on the read
 -- path (without it, two users with the same 'default' persona would
 -- see each other's default-scoped facts).
 --
 -- SQLite can't ALTER TABLE to add an FK; the table-rebuild dance is
 -- the only way. Pattern matches 2026-05-09's down.sql and the older
 -- 2026-04-20-000000 migration.
 DROP INDEX IF EXISTS idx_entity_facts_subject;
 DROP INDEX IF EXISTS idx_entity_facts_predicate;
 DROP INDEX IF EXISTS idx_entity_facts_status;
 DROP INDEX IF EXISTS idx_entity_facts_source_photo;
 DROP INDEX IF EXISTS idx_entity_facts_persona;
 ALTER TABLE entity_facts RENAME TO entity_facts_old;
 CREATE TABLE entity_facts (
    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    subject_entity_id INTEGER NOT NULL,
    predicate TEXT NOT NULL,
    object_entity_id INTEGER,
    object_value TEXT,
    source_photo TEXT,
    source_insight_id INTEGER,
    confidence REAL NOT NULL DEFAULT 0.6,
    status TEXT NOT NULL DEFAULT 'active',
    created_at BIGINT NOT NULL,
    persona_id TEXT NOT NULL DEFAULT 'default',
    user_id INTEGER NOT NULL DEFAULT 1,
    CONSTRAINT fk_ef_subject FOREIGN KEY (subject_entity_id) REFERENCES entities(id) ON DELETE CASCADE,
    CONSTRAINT fk_ef_object  FOREIGN KEY (object_entity_id)  REFERENCES entities(id) ON DELETE SET NULL,
    CONSTRAINT fk_ef_insight FOREIGN KEY (source_insight_id) REFERENCES photo_insights(id) ON DELETE SET NULL,
    CONSTRAINT fk_ef_persona FOREIGN KEY (user_id, persona_id) REFERENCES personas(user_id, persona_id) ON DELETE CASCADE,
    CHECK (object_entity_id IS NOT NULL OR object_value IS NOT NULL)
 );
 -- Backfill: assign each legacy fact to the user that owns the matching
 -- persona. Built-ins are seeded per-user with the same persona_id
 -- string for everyone, so MIN(user_id) deterministically picks the
 -- earliest registered user (typically user 1, the operator). Custom
 -- persona_ids exist for at most one user, so MIN is also unique.
 -- Falls back to user_id=1 when no matching persona row exists; in that
 -- case the FK below would still fail, but legacy rows shouldn't be in
 -- that state because 2026-05-09 ADD COLUMN defaulted persona_id to
 -- 'default', which is seeded for every user.
 INSERT INTO entity_facts
    (id, subject_entity_id, predicate, object_entity_id, object_value,
     source_photo, source_insight_id, confidence, status, created_at,
     persona_id, user_id)
 SELECT
    old.id,
    old.subject_entity_id,
    old.predicate,
    old.object_entity_id,
    old.object_value,
    old.source_photo,
    old.source_insight_id,
    old.confidence,
    old.status,
    old.created_at,
    old.persona_id,
    COALESCE(
        (SELECT MIN(p.user_id) FROM personas p WHERE p.persona_id = old.persona_id),
        1
    )
 FROM entity_facts_old old;
 DROP TABLE entity_facts_old;
 CREATE INDEX idx_entity_facts_subject ON entity_facts(subject_entity_id);
 CREATE INDEX idx_entity_facts_predicate ON entity_facts(predicate);
 CREATE INDEX idx_entity_facts_status ON entity_facts(status);
 CREATE INDEX idx_entity_facts_source_photo ON entity_facts(source_photo);
 CREATE INDEX idx_entity_facts_persona ON entity_facts(persona_id);
 CREATE INDEX idx_entity_facts_user_persona ON entity_facts(user_id, persona_id);
@@ -0,0 +1,5 @@
 -- SQLite can drop columns since 3.35 (March 2021); embedded
 -- libsqlite3-sys is well past that. Drop in reverse insert order so
 -- a partial down still leaves the schema valid.
 ALTER TABLE entity_facts DROP COLUMN valid_until;
 ALTER TABLE entity_facts DROP COLUMN valid_from;
@@ -0,0 +1,25 @@
 -- Add valid-time columns to entity_facts.
 --
 -- entity_facts already has created_at — *transaction time*, the
 -- moment WE recorded the fact. That's not the same as the real-world
 -- period the fact was true. "Cameron is_in_relationship_with X" was
 -- only true during a window; recording it in 2026 doesn't make it
 -- true today. Without the distinction, every former relationship,
 -- former job, former address reads as currently-true.
 --
 -- Adding two BIGINT NULL columns: valid_from / valid_until (unix
 -- seconds). NULL means "unbounded on that side" — `valid_from IS
 -- NULL` reads as "always-true-back-to-the-beginning",
 -- `valid_until IS NULL` as "still-true-now-or-unknown". Both NULL =
 -- temporal validity unknown (current state of all legacy rows).
 --
 -- Conflict detection refines accordingly: same-predicate facts with
 -- different objects stop flagging when their intervals are disjoint
 -- ("lives_in NYC 2018-2020" and "lives_in SF 2020-present" are both
 -- valid, just at different times).
 ALTER TABLE entity_facts ADD COLUMN valid_from BIGINT;
 ALTER TABLE entity_facts ADD COLUMN valid_until BIGINT;
 -- Optional partial index for time-bounded scans. Skipped for now —
 -- conflict detection runs per-entity (small N) and doesn't need it.
@@ -0,0 +1,2 @@
 DROP INDEX IF EXISTS idx_entity_facts_superseded_by;
 ALTER TABLE entity_facts DROP COLUMN superseded_by;
@@ -0,0 +1,31 @@
 -- Add a supersession pointer to entity_facts.
 --
 -- Status alone is a one-way trapdoor: 'rejected' loses the link
 -- between the rejected fact and the one that replaced it. For
 -- evolving facts (Cameron's relationship, employer, address) the
 -- curator wants to *replace* a stale fact with a new one and keep
 -- the history readable: "from 2018 until 2022 this was true, then
 -- it became this other thing".
 --
 -- A nullable INTEGER column pointing at another entity_facts.id —
 -- no FK constraint because SQLite can't ALTER ADD COLUMN with REFs;
 -- the DAO's delete_fact clears dangling pointers in the same
 -- transaction as the parent delete to keep the column honest.
 --
 -- A status of 'superseded' on the old fact (alongside the existing
 -- active / reviewed / rejected) signals "replaced by a newer
 -- claim". Read paths already filter 'rejected' out of the active
 -- view; the curation UI will treat 'superseded' the same way for
 -- conflict detection so they don't keep flagging.
 --
 -- Pairs with the valid-time columns from 2026-05-10-000100: the
 -- supersede action auto-stamps the old fact's `valid_until` from
 -- the new fact's `valid_from`, closing the interval cleanly.
 ALTER TABLE entity_facts ADD COLUMN superseded_by INTEGER;
 -- Helpful index for "show me what superseded this fact" walks
 -- (rare today; cheap to add now while the table is small).
 CREATE INDEX idx_entity_facts_superseded_by
    ON entity_facts(superseded_by)
    WHERE superseded_by IS NOT NULL;
@@ -0,0 +1,4 @@
 DROP INDEX IF EXISTS idx_entity_facts_created_by_backend;
 DROP INDEX IF EXISTS idx_entity_facts_created_by_model;
 ALTER TABLE entity_facts DROP COLUMN created_by_backend;
 ALTER TABLE entity_facts DROP COLUMN created_by_model;
@@ -0,0 +1,30 @@
 -- Track which model + backend generated each fact so the curator
 -- can audit which configurations produce trustworthy knowledge.
 --
 -- photo_insights already carries `model_version` + `backend`, and
 -- entity_facts.source_insight_id links to it — but:
 --   1. source_insight_id is only set after an insight is stored
 --      (post-loop), so chat-continuation facts and facts whose insight
 --      was regenerated lose the link.
 --   2. JOINing for every read is more friction than just embedding the
 --      provenance on the fact row itself.
 --   3. Manual facts (POST /knowledge/facts) have no insight at all and
 --      need to record "manual" as their provenance.
 --
 -- Two nullable TEXT columns are enough for the audit use case: model
 -- (e.g. "qwen2.5:7b", "anthropic/claude-sonnet-4") and backend
 -- ("local", "hybrid", "manual"). Pre-existing rows leave both NULL —
 -- legacy facts predate this tracking and can't be back-filled
 -- reliably from training_messages without burning compute.
 ALTER TABLE entity_facts ADD COLUMN created_by_model TEXT;
 ALTER TABLE entity_facts ADD COLUMN created_by_backend TEXT;
 -- Indexes are cheap and useful for "show me all facts from model X"
 -- audit queries — partial so the legacy NULL rows don't bloat them.
 CREATE INDEX idx_entity_facts_created_by_model
    ON entity_facts(created_by_model)
    WHERE created_by_model IS NOT NULL;
 CREATE INDEX idx_entity_facts_created_by_backend
    ON entity_facts(created_by_backend)
    WHERE created_by_backend IS NOT NULL;
@@ -0,0 +1 @@
 ALTER TABLE personas DROP COLUMN reviewed_only_facts;
@@ -0,0 +1,16 @@
 -- Per-persona toggle: when true, agent reads only see facts whose
 -- status is exactly 'reviewed' (human-verified). When false (the
 -- default), agent reads see 'active' OR 'reviewed' — everything not
 -- rejected or superseded.
 --
 -- The mobile app surfaces this as "Strict mode" on the persona
 -- editor: useful when you want a persona's chat to be grounded
 -- exclusively on the curated subset, e.g. for tasks where
 -- hallucinated agent claims are particularly costly.
 --
 -- Note: this is separate from `include_all_memories` (which unions
 -- across personas for hive-mind reads). Reviewed-only operates on
 -- the status axis; include_all_memories operates on the persona-
 -- scope axis. They compose freely.
 ALTER TABLE personas ADD COLUMN reviewed_only_facts BOOLEAN NOT NULL DEFAULT 0;
@@ -0,0 +1,5 @@
 ALTER TABLE personas DROP COLUMN allow_agent_corrections;
 DROP INDEX IF EXISTS idx_entity_facts_last_modified_at;
 ALTER TABLE entity_facts DROP COLUMN last_modified_at;
 ALTER TABLE entity_facts DROP COLUMN last_modified_by_backend;
 ALTER TABLE entity_facts DROP COLUMN last_modified_by_model;
@@ -0,0 +1,30 @@
 -- Three coupled changes for agent self-correction safety:
 --
 -- 1. `entity_facts.last_modified_by_*` + `last_modified_at` track who
 --    most recently mutated each fact. `created_by_*` from migration
 --    2026-05-10-000300 records who first wrote the row; this records
 --    who last *changed* it. Separate columns so the create vs update
 --    audit is independently grep-able ("show me every fact gpt-5
 --    altered last week" stays a single index scan).
 --
 -- 2. `personas.allow_agent_corrections` is the gate for the new
 --    agent-side `update_fact` / `supersede_fact` tools. Default OFF —
 --    a fresh persona's agent can create but can't alter or replace.
 --    Operator opts in per-persona after the model has earned trust,
 --    typically via the strict-mode flow (curate, then ratchet up
 --    agent autonomy as confidence rises). Parallel in shape to
 --    `reviewed_only_facts` from 2026-05-10-000400; they compose.
 --
 -- 3. Index on `last_modified_at` (partial, NOT NULL) for the
 --    audit-feed reads in the curation UI ("show recent agent edits
 --    sorted newest first").
 ALTER TABLE entity_facts ADD COLUMN last_modified_by_model TEXT;
 ALTER TABLE entity_facts ADD COLUMN last_modified_by_backend TEXT;
 ALTER TABLE entity_facts ADD COLUMN last_modified_at BIGINT;
 CREATE INDEX idx_entity_facts_last_modified_at
    ON entity_facts(last_modified_at)
    WHERE last_modified_at IS NOT NULL;
 ALTER TABLE personas ADD COLUMN allow_agent_corrections BOOLEAN NOT NULL DEFAULT 0;
@@ -0,0 +1,6 @@
 -- Irreversible: we collapsed multiple raw entity_type strings to
 -- canonical forms and don't have a per-row record of the original.
 -- The down migration is intentionally a no-op (the rewritten values
 -- are still semantically correct), and the up migration is safe to
 -- re-run because every UPDATE is conditional on `!= canonical`.
 SELECT 1;
@@ -0,0 +1,43 @@
 -- Canonicalize `entities.entity_type` so legacy rows from before
 -- `normalize_entity_type` landed in upsert_entity stop polluting
 -- client-side filters. Mirrors the synonym map in
 -- `src/database/knowledge_dao.rs::normalize_entity_type`:
 --   person  ← person | people | human | individual | contact
 --   place   ← place  | location | venue | site | area | landmark
 --   event   ← event  | occasion | activity | celebration
 --   thing   ← thing  | object | item | product
 -- Types outside the synonym set (e.g. "friend", "family") are not
 -- recognized as canonical and get a lowercase+trim pass instead, so
 -- at minimum case variants collapse.
 --
 -- `UPDATE OR IGNORE` skips rows that would violate UNIQUE(name,
 -- entity_type) after the rewrite. Two rows like ("Sarah", "person")
 -- + ("Sarah", "Person") would otherwise collide — the duplicate
 -- survives unchanged so the curator can merge it via the curation
 -- UI rather than have the migration silently delete data.
 UPDATE OR IGNORE entities
 SET entity_type = 'person'
 WHERE LOWER(TRIM(entity_type)) IN ('person', 'people', 'human', 'individual', 'contact')
  AND entity_type != 'person';
 UPDATE OR IGNORE entities
 SET entity_type = 'place'
 WHERE LOWER(TRIM(entity_type)) IN ('place', 'location', 'venue', 'site', 'area', 'landmark')
  AND entity_type != 'place';
 UPDATE OR IGNORE entities
 SET entity_type = 'event'
 WHERE LOWER(TRIM(entity_type)) IN ('event', 'occasion', 'activity', 'celebration')
  AND entity_type != 'event';
 UPDATE OR IGNORE entities
 SET entity_type = 'thing'
 WHERE LOWER(TRIM(entity_type)) IN ('thing', 'object', 'item', 'product')
  AND entity_type != 'thing';
 -- Anything left ("Friend" vs "friend") gets a lowercase+trim sweep
 -- so at least case variants of the same custom type collapse.
 UPDATE OR IGNORE entities
 SET entity_type = LOWER(TRIM(entity_type))
 WHERE entity_type != LOWER(TRIM(entity_type));
@@ -0,0 +1,5 @@
 DROP INDEX IF EXISTS idx_image_exif_date_backfill;
 CREATE INDEX idx_image_exif_date_backfill
    ON image_exif (library_id, id)
    WHERE date_taken IS NULL OR date_taken_source = 'fs_time';
@@ -0,0 +1,18 @@
 -- Narrow the date-backfill partial index to NULL-only rows.
 --
 -- The original index (2026-05-06-000000_add_date_taken_source) also matched
 -- `date_taken_source = 'fs_time'` so the drain could "re-resolve weak
 -- entries when better tools become available." In practice the resolver
 -- is deterministic on file bytes + filename + fs metadata: a row that
 -- landed on fs_time once will land on fs_time again on every subsequent
 -- tick. With `ORDER BY id ASC LIMIT 500`, the drain spun on the same
 -- lowest-id fs_time rows in perpetuity, never advancing, while hammering
 -- the SQLite write lock once per row and starving other writers (face
 -- PATCHes were hitting busy_timeout and returning 500). Drop fs_time
 -- from the eligibility set; if exiftool / a new filename pattern ever
 -- comes online, a one-shot operator command can re-resolve.
 DROP INDEX IF EXISTS idx_image_exif_date_backfill;
 CREATE INDEX idx_image_exif_date_backfill
    ON image_exif (library_id, id)
    WHERE date_taken IS NULL;
@@ -0,0 +1,3 @@
 DROP INDEX IF EXISTS idx_image_exif_clip_backfill;
 ALTER TABLE image_exif DROP COLUMN clip_model_version;
 ALTER TABLE image_exif DROP COLUMN clip_embedding;
@@ -0,0 +1,27 @@
 -- CLIP semantic photo search: store a per-photo image embedding so
 -- text queries can rerank against the live library via cosine
 -- similarity. Apollo encodes the bytes via its CLIP service; ImageApi
 -- writes the resulting blob here.
 --
 -- `clip_embedding` is the raw little-endian float32 buffer of an
 -- L2-normalized vector (dim depends on the model — 768 bytes×4 for
 -- ViT-L/14, 512 bytes×4 for ViT-B/32). Apollo always returns the
 -- normalized form so the search-time dot product reduces to a plain
 -- cosine similarity.
 --
 -- `clip_model_version` echoes the upstream `APOLLO_CLIP_MODEL` (e.g.
 -- "ViT-L/14"). A model swap shouldn't silently mix geometries — the
 -- backfill drain will re-eligibilize rows whose stored model_version
 -- differs from the live engine's, and the search route refuses to
 -- mix rows from two model_versions in the same response.
 ALTER TABLE image_exif ADD COLUMN clip_embedding BLOB;
 ALTER TABLE image_exif ADD COLUMN clip_model_version TEXT;
 -- Partial index for the backfill drain. Mirrors the shape of
 -- `idx_image_exif_date_backfill`: candidate rows are those with a
 -- known content_hash (so we don't race the unhashed backlog) but no
 -- embedding yet. SELECT cost stays O(missing rows) instead of full
 -- table scan once the column is mostly populated.
 CREATE INDEX IF NOT EXISTS idx_image_exif_clip_backfill
    ON image_exif (id)
    WHERE clip_embedding IS NULL AND content_hash IS NOT NULL;
@@ -0,0 +1,3 @@
 DROP INDEX IF EXISTS idx_insight_gen_jobs_status_cleanup;
 DROP INDEX IF EXISTS idx_insight_gen_jobs_file;
 DROP TABLE IF EXISTS insight_generation_jobs;
@@ -0,0 +1,23 @@
 -- Track async insight generation jobs so the client can poll for
 -- completion after the server returns 202 Accepted. Each generation
 -- creates a new row; the application layer cancels prior running
 -- jobs before inserting.
 CREATE TABLE insight_generation_jobs (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    library_id INTEGER NOT NULL DEFAULT 1,
    file_path TEXT NOT NULL,
    generation_type TEXT NOT NULL,
    status TEXT NOT NULL DEFAULT 'running',
    started_at INTEGER NOT NULL,
    completed_at INTEGER,
    result_insight_id INTEGER,
    error_message TEXT
 );
 -- For the status endpoint: fast lookup by (library_id, file_path)
 CREATE INDEX idx_insight_gen_jobs_file
    ON insight_generation_jobs(library_id, file_path);
 -- For startup cleanup (future): prune old completed/failed jobs
 CREATE INDEX idx_insight_gen_jobs_status_cleanup
    ON insight_generation_jobs(status, started_at);
@@ -0,0 +1,28 @@
 -- Restore UNIQUE constraint
 CREATE TABLE insight_generation_jobs_new (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    library_id INTEGER NOT NULL DEFAULT 1,
    file_path TEXT NOT NULL,
    generation_type TEXT NOT NULL,
    status TEXT NOT NULL DEFAULT 'running',
    started_at INTEGER NOT NULL,
    completed_at INTEGER,
    result_insight_id INTEGER,
    error_message TEXT,
    UNIQUE(library_id, file_path, generation_type)
 );
 INSERT INTO insight_generation_jobs_new
    SELECT id, library_id, file_path, generation_type, status, started_at, completed_at, result_insight_id, error_message
    FROM insight_generation_jobs;
 DROP TABLE insight_generation_jobs;
 ALTER TABLE insight_generation_jobs_new RENAME TO insight_generation_jobs;
 CREATE INDEX idx_insight_gen_jobs_file
    ON insight_generation_jobs(library_id, file_path);
 CREATE INDEX idx_insight_gen_jobs_status_cleanup
    ON insight_generation_jobs(status, started_at);
@@ -0,0 +1,30 @@
 -- Remove UNIQUE(library_id, file_path, generation_type) constraint to allow
 -- multiple job rows per file. This enables proper cancel/regenerate semantics:
 -- a new job is always inserted on regenerate, and the old job is cancelled
 -- independently. The application layer prevents concurrent running jobs.
 CREATE TABLE insight_generation_jobs_new (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    library_id INTEGER NOT NULL DEFAULT 1,
    file_path TEXT NOT NULL,
    generation_type TEXT NOT NULL,
    status TEXT NOT NULL DEFAULT 'running',
    started_at INTEGER NOT NULL,
    completed_at INTEGER,
    result_insight_id INTEGER,
    error_message TEXT
 );
 INSERT INTO insight_generation_jobs_new
    SELECT id, library_id, file_path, generation_type, status, started_at, completed_at, result_insight_id, error_message
    FROM insight_generation_jobs;
 DROP TABLE insight_generation_jobs;
 ALTER TABLE insight_generation_jobs_new RENAME TO insight_generation_jobs;
 CREATE INDEX idx_insight_gen_jobs_file
    ON insight_generation_jobs(library_id, file_path);
 CREATE INDEX idx_insight_gen_jobs_status_cleanup
    ON insight_generation_jobs(status, started_at);
@@ -0,0 +1,11 @@
 -- SQLite doesn't support DROP COLUMN before 3.35.0; recreate the table
 -- without the new columns. This is only needed for rollback.
 CREATE TABLE photo_insights_old AS
    SELECT id, library_id, rel_path, title, summary, generated_at,
           model_version, is_current, training_messages, approved,
           backend, fewshot_source_ids, content_hash
    FROM photo_insights;
 DROP TABLE photo_insights;
 ALTER TABLE photo_insights_old RENAME TO photo_insights;
@@ -0,0 +1,8 @@
 -- Persist generation parameters on each insight row for auditing.
 ALTER TABLE photo_insights ADD COLUMN num_ctx INTEGER;
 ALTER TABLE photo_insights ADD COLUMN temperature REAL;
 ALTER TABLE photo_insights ADD COLUMN top_p REAL;
 ALTER TABLE photo_insights ADD COLUMN top_k INTEGER;
 ALTER TABLE photo_insights ADD COLUMN min_p REAL;
 ALTER TABLE photo_insights ADD COLUMN system_prompt TEXT;
 ALTER TABLE photo_insights ADD COLUMN persona_id TEXT;
@@ -0,0 +1,13 @@
 -- SQLite doesn't support DROP COLUMN before 3.35.0; recreate the table
 -- without the token-count columns. This is only needed for rollback.
 CREATE TABLE photo_insights_old AS
    SELECT id, library_id, rel_path, title, summary, generated_at,
           model_version, is_current, training_messages, approved,
           backend, fewshot_source_ids, content_hash,
           num_ctx, temperature, top_p, top_k, min_p,
           system_prompt, persona_id
    FROM photo_insights;
 DROP TABLE photo_insights;
 ALTER TABLE photo_insights_old RENAME TO photo_insights;
@@ -0,0 +1,6 @@
 -- Persist token usage on each insight row. Split from
 -- 2026-05-27-000002_add_insight_generation_params because that
 -- migration was already applied on some environments before these
 -- columns were added.
 ALTER TABLE photo_insights ADD COLUMN prompt_eval_count INTEGER;
 ALTER TABLE photo_insights ADD COLUMN eval_count INTEGER;
@@ -0,0 +1,2 @@
 DROP INDEX IF EXISTS idx_precomputed_reels_span_library;
 DROP TABLE IF EXISTS precomputed_reels;
@@ -0,0 +1,14 @@
 CREATE TABLE precomputed_reels (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    span TEXT NOT NULL,
    library_key TEXT NOT NULL,
    cache_key TEXT NOT NULL,
    output_path TEXT NOT NULL,
    title TEXT NOT NULL,
    media_count INT NOT NULL,
    render_version INT NOT NULL DEFAULT 1,
    tz_offset_minutes INT NOT NULL,
    voice TEXT,
    generated_at BIGINT NOT NULL
 );
 CREATE INDEX idx_precomputed_reels_span_library ON precomputed_reels(span, library_key, generated_at DESC);
@@ -0,0 +1 @@
 DROP TABLE IF EXISTS user_ai_prefs;
@@ -0,0 +1,7 @@
 CREATE TABLE user_ai_prefs (
    id INTEGER PRIMARY KEY CHECK(id=1),
    voice TEXT,
    tz_offset_minutes INTEGER,
    library TEXT,
    updated_at BIGINT NOT NULL
 );
@@ -0,0 +1,392 @@
 # Insight Chat improvements — design
 **Date:** 2026-05-07
 **Branch:** `feature/insight-chat-improvements` (in both `ImageApi/` and `FileViewer-React/`)
 **Scope:** ImageApi photo-anchored insight + chat surface, plus the
 FileViewer-React client. Apollo's free/visit chat is **not** in this cycle.
 ## Problem
 Three concrete gaps in today's insight + chat surface:
 1. **Tool drift.** ImageApi exposes 13 tools to the LLM. Some are gated on
   `apollo_enabled` / `has_vision`, but several optional ones
   (`search_rag`, `get_calendar_events`, `get_location_history`) are
   registered unconditionally even when their backing tables are empty.
   Descriptions vary in quality and a couple have outright bugs.
 2. **Inconsistent / incomplete tool descriptions.** Tools like
   `search_messages` describe their selection rules but omit useful
   examples; `store_fact` doesn't show the `object_entity_id` vs
   `object_value` choice; `get_sms_messages` accepts a `days_radius`
   parameter that the backing client silently ignores. The LLM is being
   instructed against a slightly wrong reality.
 3. **System prompt fights the persona.** Today's generation prompt
   prepends the user's `custom_system_prompt` and then immediately asserts
   `"You are a personal photo memory assistant..."`. The user message
   demands `"a detailed insight with a title and summary"`. Both
   contradict whatever voice / shape / POV the persona just established.
   On chat continuation the persona is baked into the stored transcript at
   generation time and can't be changed without regenerating.
 ## Goals
 - Tool catalog is **representative** — every tool registered for a turn is
  backed by data the user actually has.
 - Tool descriptions are **concise but complete**, with examples for any
  tool whose param choice has multiple modes or non-obvious interactions.
 - Persona / system prompt is **authoritative** for voice, length, and
  shape — both at generation and during chat continuation.
 - Per-turn system prompt overrides on chat work without surprising
  side-effects on the stored transcript outside `amend` mode.
 ## Non-goals
 - Apollo backend / frontend changes. Separate cycle.
 - Refactoring the `generate_photo_title` post-hoc title flow. Already
  takes `custom_system_prompt`.
 - Tool consolidation (e.g. merging `search_messages` + `get_sms_messages`).
  Considered and deferred — keeps blast radius small.
 - Removing knowledge-memory tools (`recall_*` / `store_*`). Audit
  confirmed they have a live read path via `knowledge.rs` HTTP routes.
 - Persisting persona changes to the stored transcript outside `amend`
  mode. Deliberate — re-opens use the persona currently active in the
  client, not a sticky historical setting.
 ---
 ## Design
 ### A. System prompt — generation
 Today (`insight_generator.rs:3305–3326`):
 ```
 [custom_system_prompt if any] +
 "You are a personal photo memory assistant helping to reconstruct..." +
 {owner_id_note} +
 {fewshot_block} +
 "IMPORTANT INSTRUCTIONS:
 1. You MUST call multiple tools...
 2. When calling get_sms_messages and search_rag...
 3. Use recall_facts_for_photo...
 ...
 8. You have a hard budget of {max_iterations} iterations..."
 ```
 The first concatenation is the bug: `custom` claims one identity, the
 next line asserts another.
 **New structure** — two named blocks, in order:
 ```
 [Identity / voice / format block]    ← persona-controlled (or neutral default)
 [Procedural block]                   ← always identity-free
 ```
 **Identity block:**
 - When `custom_system_prompt` is supplied: use that string verbatim, no
  pre/append.
 - When not: a neutral default that doesn't fight a future persona.
  Working text: `"You are reconstructing a memory from a photo. Use the
  gathered context to write a thoughtful summary; you decide voice,
  length, and shape."`
 **Procedural block** — identity-free, always emitted:
 ```
 Tool-use guidance:
 - You have a budget of {max_iterations} tool-calling iterations.
 - Call tools to gather context BEFORE writing your final answer; don't
  answer after one or two calls.
 - When calling get_sms_messages or search_rag, make at least one call
  WITHOUT a contact filter — surrounding events matter even when a
  contact is known.
 - Use recall_facts_for_photo + recall_entities to load any prior
  knowledge about subjects in the photo.
 - When you identify people / places / events / things, use store_entity
  + store_fact to grow the persistent memory.
 - A tool returning no results is informative; continue with the others.
 {owner_id_note if applicable}
 {fewshot_block if applicable}
 ```
 Differences from today's "IMPORTANT INSTRUCTIONS" block: removed the
 "you are a personal photo memory assistant" framing and the explicit
 "at least 5 tool calls" floor (replaced with the softer "don't answer
 after one or two"). Few-shot stays — it's pattern-of-tool-use, not
 identity.
 ### B. User message — generation
 Today (line 3357):
 ```
 {visual_block}Please analyze this photo and gather any relevant context
 from the surrounding weeks.
 Photo file path: {file_path}
 Date taken: {date}
 {contact_info}
 {gps_info}
 {tags_info}
 Use the available tools to gather more context about this moment
 (messages, calendar events, location history, etc.), then write a
 detailed insight with a title and summary.
 ```
 Problems: the trailing line bakes in output shape ("title and
 summary"), and the title from the resulting response is **discarded
 anyway** — `generate_photo_title` (line 3494) regenerates the title
 post-hoc from the summary. So the prompt is constraining voice for no
 data-model benefit.
 **New payload** — context-only, no output prescription:
 ```
 {visual_block}Photo file path: {file_path}
 Date taken: {date}
 {contact_info}
 {gps_info}
 {tags_info}
 Gather context with the available tools, then respond.
 ```
 The persona owns shape. If a user wants "title-then-paragraph" output,
 their persona prompt says so.
 ### C. System prompt — chat continuation
 Add `system_prompt: Option<String>` to `ChatTurnRequest` (and to its
 HTTP wrapper `ChatTurnHttpRequest`). It carries through both the
 non-streaming `chat_turn` and the streaming `chat_turn_stream`.
 **Append mode (default, `amend=false`)** — ephemeral
 swap-and-restore, mirroring the existing `annotate_system_with_budget`
 pattern:
 1. Load stored transcript.
 2. If `system_prompt` is `Some(s)`:
   - If first message is a `system` role: stash original content,
     replace with `s`.
   - Else: prepend a synthetic ephemeral system message with `s` (note
     it's synthetic so the restore step pops it rather than rewriting).
 3. Run `annotate_system_with_budget` on top (existing per-turn budget
   note appends to whatever's there now).
 4. Run the agentic loop.
 5. **Before persistence**, restore the original system content (or pop
   the synthetic one). Run `restore_system_content` for the budget
   annotation as today.
 6. Save.
 Result: the model sees the override; the stored transcript is
 unchanged outside the model's actual reply.
 **Amend mode (`amend=true`)**:
 - If `system_prompt` is supplied: the override stays in place during
  the serialization for the new insight row. The new row's
  `training_messages` system message is the override. `is_current=false`
  flips on prior rows as today.
 - If not supplied: behaves as today (stored transcript's system message
  carries forward unchanged).
 ### D. FileViewer-React — client wiring
 `hooks/useInsightChat.tsx`:
 - `SendTurnOptions` gains `systemPromptOverride?: string | null`.
 - Inside `sendTurn`, before issuing the streaming POST:
  1. Read the active persona's `systemPrompt` from AsyncStorage
     (already loaded for generation flows — reuse the same accessor).
  2. If a one-shot `systemPromptOverride` is set, append as a suffix
     (`${persona}\n\n${override}`) so persona voice survives + override
     tweaks the turn.
  3. Include the resulting string as `system_prompt` on the request body.
 - No history-load change. The history endpoint still returns the stored
  transcript.
 `components/InsightChatModal.tsx`:
 - Add a small "Style note" composer affordance — a one-shot text input
  that, when filled, becomes the `systemPromptOverride` for the next
  send. Cleared after send.
 - The existing persona chip continues to open `PersonaManagerModal`.
 `hooks/usePersonas.tsx` and the bundled defaults:
 - Built-in `assistant` and `journal` prompts get audited and rewritten
  to **explicitly state voice / shape / length** — since the framework
  no longer guarantees a default shape, the persona must.
 ### E. Tool catalog — gating
 Widen `build_tool_definitions` from `(has_vision: bool, apollo_enabled:
 bool)` to a single `ToolGateOpts` struct:
 ```rust
 pub struct ToolGateOpts {
    pub has_vision: bool,
    pub apollo_enabled: bool,
    pub daily_summaries_present: bool,
    pub calendar_present: bool,
    pub location_history_present: bool,
 }
 ```
 The chat / generation services compute the three new fields lazily per
 turn via `SELECT 1 FROM <table> LIMIT 1` (cheap; cached for the turn's
 duration). Lazy because operators import data after launch and we don't
 want to require a restart for the LLM to discover its new capabilities.
 Per-tool gating:
 | Tool | Existing gate | New gate |
 |---|---|---|
 | `describe_photo` | `has_vision` | unchanged |
 | `get_personal_place_at` | `apollo_enabled` | unchanged |
 | `get_calendar_events` | none | `calendar_present` |
 | `get_location_history` | none | `location_history_present` |
 | `search_rag` | none | `daily_summaries_present` |
 All other tools always-on. (`get_sms_messages` and `search_messages`
 fail informatively if SMS-API is unreachable; not worth a startup probe
 since intermittent failures are the same shape.)
 ### F. Tool descriptions — convention
 Every description follows:
 1. One sentence: **what** + **when to call**.
 2. Param semantics worth knowing (units, ranges, mode behavior,
   precedence).
 3. **Example invocation** for tools with multiple modes, optional bands,
   or non-obvious parameter interactions.
 4. Cross-references when relevant: `prefer X when both apply`.
 Banned: all-caps section headers inside descriptions
 (`"CONTENT search"`, `"TIME-BASED fetch"`); persona-prescriptive language
 (`"you are a..."`); behavioral references to other tools by description
 rather than name.
 Tools getting examples: `search_messages`, `search_rag`, `store_fact`,
 `get_sms_messages`. Trivial tools (`get_current_datetime`,
 `reverse_geocode`, `get_file_tags`) skip the example.
 Sample (`search_messages`):
 > Search SMS/MMS message bodies. Modes: `fts5` (keyword + phrase + prefix
 > + AND/OR/NOT + NEAR proximity), `semantic` (embedding similarity,
 > requires generated embeddings), `hybrid` (RRF merge, recommended;
 > degrades to `fts5` when embeddings absent). Optional `start_ts` /
 > `end_ts` (real-UTC unix seconds) and `contact_id` filters. For pure
 > date / contact browsing without keywords, prefer `get_sms_messages`.
 >
 > Examples:
 > - `{query: "trader joe's"}` — phrase across all time.
 > - `{query: "dinner", contact_id: 42, start_ts: 1700000000, end_ts: 1700604800}`
 >   — keyword within a contact and a week.
 > - `{query: "NEAR(meeting work, 5)"}` — proximity search.
 ### G. SMS tool fixes
 #### `get_sms_messages` — honor `days_radius`
 Today: `sms_client::fetch_messages_for_contact(contact, center_ts)`
 hardcodes `Duration::days(4)` (lines 31–37). The tool accepts
 `days_radius` and silently ignores it.
 **Fix:** widen the signature to
 `fetch_messages_for_contact(contact, center_ts, days_radius)`. Tool
 plumbs through. Default 4 retained for back-compat.
 #### `search_messages` — add date and contact_id filters
 Today: ImageApi's `search_messages` only forwards `query`, `mode`,
 `limit` to SMS-API.
 **Fix:** add `start_ts`, `end_ts`, `contact_id` parameters.
 - `contact_id` forwards directly to SMS-API
  (`/api/messages/search/?contact_id=`).
 - `start_ts` / `end_ts` are not natively accepted by SMS-API's search
  endpoint. Apply client-side post-filter on the response (Apollo's
  pattern: `chat_tools.py:670–680`). Bump the SMS-API `limit` to a
  larger fetch pool when a date filter is supplied so in-window matches
  aren't lost to out-of-window FTS rank.
 ---
 ## Implementation sequencing
 Each step is independently mergeable.
 ### ImageApi PRs
 1. **Split system-prompt assembly + neutralize user message.** Two
   named blocks; user message context-only. Default identity string
   added. Tests: golden snapshots of the resulting `system_content`
   with and without `custom_system_prompt`.
 2. **`system_prompt` field on chat request + swap/restore + amend
   persistence.** Mirrors `annotate_system_with_budget` pattern. Tests:
   round-trip system content unchanged in append mode; persisted in
   amend mode.
 3. **`fetch_messages_for_contact` honors `days_radius`.** Tool wires
   the param through. Tests: window math at the client level.
 4. **`ToolGateOpts` + per-tool description rewrites.** Description
   text changes are the bulk of the diff but no behavior change beyond
   gating.
 ### FileViewer-React PR
 5. **Chat hook sends `system_prompt`; modal gets style-note input;
   built-in personas updated to specify shape.** The
   `useInsightChat.sendTurn` call site picks up the persona and
   includes it on every chat turn body. Style-note input is a one-shot
   suffix.
 ## Testing & verification
 **Automated:**
 - Unit (Rust): swap-and-restore round-trip preserves stored transcript.
 - Unit (Rust): amend mode persists override into new insight row.
 - Unit (Rust): `fetch_messages_for_contact(days_radius=N)` produces a
  window of `2N` days centered on `center_ts`.
 - Unit (Rust): `build_tool_definitions(opts)` excludes gated tools when
  the corresponding flag is false.
 **Manual:**
 - Run a chat turn against an existing insight without `system_prompt` →
  output unchanged from baseline.
 - Same insight, with override → output reflects new voice.
 - Re-open chat → original baked persona still authoritative (override
  was ephemeral).
 - Regenerate an insight with the journal persona → model's voice
  matches journal style; no "memory assistant" framing leaks through.
 - Toggle data presence (delete a row from `calendar_events`) → tool
  drops from the catalog on the next turn.
 ## Risks
 - **Default identity wording matters.** A too-neutral default ("Use the
  gathered context to write a summary") might produce flatter output
  than today's "personal photo memory assistant" framing for users
  who never set a persona. Mitigation: tune the default with a small
  set of test photos before merging.
 - **Persona-suffix style notes can contradict persona voice.** A user
  who picks `journal` (first person, warm) and adds the style note
  "respond in bullet points" will get a tonal collision. Acceptable —
  the user expressed a per-turn intent and we honor it. Document the
  composition rule in the persona-manager UI.
 - **Lazy data-presence probes add a per-turn `SELECT 1`.** Negligible
  on SQLite (sub-millisecond) but adds up across many turns. Cache the
  result for the turn's duration; don't re-probe per-tool.
 ## Open questions
 None blocking. Items deferred to a possible follow-up cycle:
 - Apollo parity for the same per-turn override pattern (already
  present; just needs RN client wiring on the photo path which is
  already proxy).
 - Tool consolidation (`search_messages` + `get_sms_messages` →
  single `search_messages` with optional date filter, Apollo-style).
  Considered and deferred — separate spec.
@@ -0,0 +1,146 @@
 use anyhow::{Result, anyhow};
 use crate::ai::llm_client::LlmClient;
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum BackendKind {
    Local,
    Hybrid,
 }
 impl BackendKind {
    pub fn parse(s: &str) -> Result<Self> {
        match s.trim().to_lowercase().as_str() {
            "local" | "" => Ok(Self::Local),
            "hybrid" => Ok(Self::Hybrid),
            other => Err(anyhow!(
                "unknown backend '{}'; expected 'local' or 'hybrid'",
                other
            )),
        }
    }
    pub fn as_str(&self) -> &'static str {
        match self {
            Self::Local => "local",
            Self::Hybrid => "hybrid",
        }
    }
 }
 impl std::fmt::Display for BackendKind {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.write_str(self.as_str())
    }
 }
 pub struct SamplingOverrides {
    pub model: Option<String>,
    pub num_ctx: Option<i32>,
    pub temperature: Option<f32>,
    pub top_p: Option<f32>,
    pub top_k: Option<i32>,
    pub min_p: Option<f32>,
    /// Reasoning toggle. Only the llama.cpp backend honors it (forwarded as
    /// `chat_template_kwargs.enable_thinking`); other backends ignore it.
    /// `None` leaves the model/template default in place.
    pub enable_thinking: Option<bool>,
 }
 impl SamplingOverrides {
    pub fn has_sampling(&self) -> bool {
        self.temperature.is_some()
            || self.top_p.is_some()
            || self.top_k.is_some()
            || self.min_p.is_some()
    }
 }
 pub struct ResolvedBackend {
    chat: Box<dyn LlmClient>,
    local: Box<dyn LlmClient>,
    pub kind: BackendKind,
    /// `true` when the chat model receives images directly (Ollama with
    /// vision, or llamacpp). `false` for hybrid where we describe-then-inline.
    pub images_inline: bool,
 }
 impl ResolvedBackend {
    pub fn new(
        chat: Box<dyn LlmClient>,
        local: Box<dyn LlmClient>,
        kind: BackendKind,
        images_inline: bool,
    ) -> Self {
        Self {
            chat,
            local,
            kind,
            images_inline,
        }
    }
    pub fn chat(&self) -> &dyn LlmClient {
        self.chat.as_ref()
    }
    pub fn local(&self) -> &dyn LlmClient {
        self.local.as_ref()
    }
    pub fn model(&self) -> &str {
        self.chat.primary_model()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn parse_backend_kind() {
        assert_eq!(BackendKind::parse("local").unwrap(), BackendKind::Local);
        assert_eq!(BackendKind::parse("hybrid").unwrap(), BackendKind::Hybrid);
        assert_eq!(BackendKind::parse("  Local ").unwrap(), BackendKind::Local);
        assert_eq!(BackendKind::parse("HYBRID").unwrap(), BackendKind::Hybrid);
        assert_eq!(BackendKind::parse("").unwrap(), BackendKind::Local);
        assert!(BackendKind::parse("vllm").is_err());
    }
    #[test]
    fn backend_kind_as_str_roundtrips() {
        assert_eq!(
            BackendKind::parse(BackendKind::Local.as_str()).unwrap(),
            BackendKind::Local
        );
        assert_eq!(
            BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(),
            BackendKind::Hybrid
        );
    }
    #[test]
    fn sampling_overrides_has_sampling() {
        let empty = SamplingOverrides {
            model: None,
            num_ctx: None,
            temperature: None,
            top_p: None,
            top_k: None,
            min_p: None,
            enable_thinking: None,
        };
        assert!(!empty.has_sampling());
        let with_temp = SamplingOverrides {
            model: None,
            num_ctx: Some(4096),
            temperature: Some(0.7),
            top_p: None,
            top_k: None,
            min_p: None,
            enable_thinking: None,
        };
        assert!(with_temp.has_sampling());
    }
 }
@@ -0,0 +1,395 @@
 //! Thin async HTTP client for Apollo's `/api/internal/clip/*` endpoints.
 //!
 //! Apollo hosts the OpenAI CLIP inference service (ViT-L/14 by default,
 //! configurable via `APOLLO_CLIP_MODEL`). This client is the ImageApi side
 //! of the contract: shove image bytes through `/encode_image` to populate
 //! `image_exif.clip_embedding` during backfill, and call `/encode_text` to
 //! encode a user's natural-language query at search time. The actual
 //! cosine-similarity rerank runs locally in ImageApi.
 //!
 //! Mirrors `face_client.rs` / `tag_client.rs` shape: optional base URL
 //! (None = disabled — feature off, drain and search no-op), reqwest
 //! client with a generous timeout because GPU inference under a backlog
 //! can queue server-side (Apollo's threadpool is bounded to 1 worker on
 //! CUDA).
 //!
 //! Configured via `APOLLO_CLIP_API_BASE_URL`, falling back to
 //! `APOLLO_API_BASE_URL` when the dedicated var is unset (single-Apollo
 //! deploys are the common case).
 //!
 //! Wire format:
 //! - `/encode_image`: multipart/form-data with `file=<bytes>` and
 //!   `meta=<json>` (content_hash / library_id / rel_path for logging).
 //! - `/encode_text`: JSON `{"text": "<query>"}`.
 //!
 //! Both return `{model_version, embedding_dim, duration_ms, embedding}`
 //! where `embedding` is base64 of `dim×4` little-endian float32 bytes,
 //! L2-normalized so the rerank reduces to a plain dot product.
 //!
 //! Error mapping (reflected in [`ClipError`]):
 //! - 422 `decode_failed` / `empty_text` → permanent: ImageApi marks the
 //!   row failed or surfaces the empty-query error to the search caller.
 //! - 503 `cuda_oom` / `engine_unavailable` → defer-and-retry: no marker.
 //! - Any other 5xx / network error → defer.
 use anyhow::{Context, Result};
 use base64::Engine;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use std::time::Duration;
 #[derive(Debug, Clone, Serialize)]
 pub struct EncodeImageMeta {
    pub content_hash: String,
    pub library_id: i32,
    pub rel_path: String,
 }
 #[derive(Debug, Clone, Deserialize)]
 #[allow(dead_code)] // duration_ms logged by the backfill drain
 pub struct EncodeResponse {
    pub model_version: String,
    pub embedding_dim: i32,
    pub duration_ms: i64,
    /// base64 of `embedding_dim * 4` bytes (LE float32). ImageApi stores
    /// the decoded bytes verbatim as a BLOB.
    pub embedding: String,
 }
 impl EncodeResponse {
    /// Decode the wire-format embedding back into raw bytes for storage.
    /// Validates the buffer is `embedding_dim * 4` bytes long so a
    /// malformed response surfaces here rather than as a downstream
    /// silent length mismatch.
    pub fn decode_embedding(&self) -> Result<Vec<u8>> {
        let bytes = base64::engine::general_purpose::STANDARD
            .decode(self.embedding.as_bytes())
            .context("clip embedding base64 decode")?;
        let expected = (self.embedding_dim as usize) * 4;
        if bytes.len() != expected {
            anyhow::bail!(
                "clip embedding wrong size: got {} bytes, expected {} ({} * 4)",
                bytes.len(),
                expected,
                self.embedding_dim
            );
        }
        Ok(bytes)
    }
 }
 #[derive(Debug, Clone, Deserialize)]
 #[allow(dead_code)] // load_error consumed by future health probe
 pub struct ClipHealth {
    pub loaded: bool,
    pub device: String,
    pub model_version: String,
    pub embedding_dim: i32,
    #[serde(default)]
    pub load_error: Option<String>,
 }
 #[derive(Debug)]
 pub enum ClipError {
    /// Apollo refused for a reason that won't change on retry (decode
    /// failure on /encode_image, empty text on /encode_text).
    Permanent(anyhow::Error),
    /// Apollo couldn't process this turn but might next time (CUDA OOM,
    /// engine not loaded, network hiccup).
    Transient(anyhow::Error),
    /// Feature is disabled (no `APOLLO_CLIP_API_BASE_URL` /
    /// `APOLLO_API_BASE_URL`).
    Disabled,
 }
 impl std::fmt::Display for ClipError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            ClipError::Permanent(e) => write!(f, "permanent: {e}"),
            ClipError::Transient(e) => write!(f, "transient: {e}"),
            ClipError::Disabled => write!(f, "clip client disabled"),
        }
    }
 }
 impl std::error::Error for ClipError {}
 #[derive(Clone)]
 pub struct ClipClient {
    client: Client,
    base_url: Option<String>,
 }
 impl ClipClient {
    pub fn new(base_url: Option<String>) -> Self {
        let timeout_secs = std::env::var("CLIP_REQUEST_TIMEOUT_SEC")
            .ok()
            .and_then(|s| s.parse::<u64>().ok())
            .unwrap_or(60);
        let client = Client::builder()
            .timeout(Duration::from_secs(timeout_secs))
            .build()
            .expect("reqwest client build");
        Self {
            client,
            base_url: base_url.map(|u| u.trim_end_matches('/').to_string()),
        }
    }
    /// Read both standard env vars. `APOLLO_CLIP_API_BASE_URL` wins;
    /// fallback to `APOLLO_API_BASE_URL`. Both unset → disabled.
    pub fn from_env() -> Self {
        let base = std::env::var("APOLLO_CLIP_API_BASE_URL")
            .ok()
            .filter(|s| !s.trim().is_empty())
            .or_else(|| {
                std::env::var("APOLLO_API_BASE_URL")
                    .ok()
                    .filter(|s| !s.trim().is_empty())
            });
        Self::new(base)
    }
    pub fn is_enabled(&self) -> bool {
        self.base_url.is_some()
    }
    /// Encode an image to a 768-d (ViT-L/14) or 512-d (ViT-B/32)
    /// L2-normalized embedding. Used by the backfill drain.
    pub async fn encode_image(
        &self,
        bytes: Vec<u8>,
        meta: EncodeImageMeta,
    ) -> std::result::Result<EncodeResponse, ClipError> {
        let Some(base) = self.base_url.as_deref() else {
            return Err(ClipError::Disabled);
        };
        let url = format!("{}/api/internal/clip/encode_image", base);
        let meta_json = serde_json::to_string(&meta)
            .map_err(|e| ClipError::Permanent(anyhow::anyhow!("meta serialize: {e}")))?;
        let form = reqwest::multipart::Form::new()
            .text("meta", meta_json)
            .part(
                "file",
                reqwest::multipart::Part::bytes(bytes)
                    .file_name(meta.rel_path.clone())
                    .mime_str("application/octet-stream")
                    .unwrap_or_else(|_| reqwest::multipart::Part::bytes(Vec::new())),
            );
        self.send_multipart(&url, form).await
    }
    /// Encode a natural-language query to an embedding. Used by the
    /// search route to rank stored image embeddings by cosine sim.
    pub async fn encode_text(&self, text: &str) -> std::result::Result<EncodeResponse, ClipError> {
        let Some(base) = self.base_url.as_deref() else {
            return Err(ClipError::Disabled);
        };
        let url = format!("{}/api/internal/clip/encode_text", base);
        let body = serde_json::json!({ "text": text });
        let resp = match self.client.post(&url).json(&body).send().await {
            Ok(r) => r,
            Err(e) if e.is_timeout() || e.is_connect() => {
                log::warn!("clip encode_text network error to {url}: {e}");
                return Err(ClipError::Transient(anyhow::anyhow!(
                    "clip client network: {e}"
                )));
            }
            Err(e) => {
                log::warn!("clip encode_text request error to {url}: {e}");
                return Err(ClipError::Transient(anyhow::anyhow!(
                    "clip client request: {e}"
                )));
            }
        };
        let status = resp.status();
        if status.is_success() {
            let body: EncodeResponse = resp
                .json()
                .await
                .map_err(|e| ClipError::Transient(anyhow::anyhow!("clip response decode: {e}")))?;
            return Ok(body);
        }
        let body_text = resp.text().await.unwrap_or_default();
        log::warn!("clip encode_text HTTP {status} from {url}: {body_text}");
        Err(classify_error_response(status.as_u16(), &body_text))
    }
    /// Engine reachability + device/model report. Used as a startup
    /// sanity check from the probe binary and (later) the backlog drain.
    #[allow(dead_code)] // consumed by probe + drain
    pub async fn health(&self) -> Result<ClipHealth> {
        let base = self.base_url.as_deref().context("clip client disabled")?;
        let url = format!("{}/api/internal/clip/health", base);
        let resp = self.client.get(&url).send().await?.error_for_status()?;
        let body: ClipHealth = resp.json().await?;
        Ok(body)
    }
    async fn send_multipart(
        &self,
        url: &str,
        form: reqwest::multipart::Form,
    ) -> std::result::Result<EncodeResponse, ClipError> {
        let resp = match self.client.post(url).multipart(form).send().await {
            Ok(r) => r,
            Err(e) if e.is_timeout() || e.is_connect() => {
                return Err(ClipError::Transient(anyhow::anyhow!(
                    "clip client network: {e}"
                )));
            }
            Err(e) => {
                return Err(ClipError::Transient(anyhow::anyhow!(
                    "clip client request: {e}"
                )));
            }
        };
        let status = resp.status();
        if status.is_success() {
            let body: EncodeResponse = resp
                .json()
                .await
                .map_err(|e| ClipError::Transient(anyhow::anyhow!("clip response decode: {e}")))?;
            return Ok(body);
        }
        let body_text = resp.text().await.unwrap_or_default();
        Err(classify_error_response(status.as_u16(), &body_text))
    }
 }
 /// Pulled out as a pure function so the marker-row contract is unit-
 /// testable without spinning up an HTTP server. Matches the shape used
 /// by face_client::classify_error_response so future retry policies
 /// can share code.
 fn classify_error_response(status: u16, body_text: &str) -> ClipError {
    let detail_code = serde_json::from_str::<serde_json::Value>(body_text)
        .ok()
        .and_then(|v| {
            v.get("detail")
                .and_then(|d| d.as_str().map(str::to_string))
                .or_else(|| {
                    v.get("detail")
                        .and_then(|d| d.get("code"))
                        .and_then(|c| c.as_str())
                        .map(str::to_string)
                })
        })
        .unwrap_or_default();
    if status == 422 {
        return ClipError::Permanent(anyhow::anyhow!(
            "clip {} {}: {}",
            status,
            detail_code,
            body_text
        ));
    }
    if status == 503 {
        return ClipError::Transient(anyhow::anyhow!(
            "clip {} {}: {}",
            status,
            detail_code,
            body_text
        ));
    }
    // 408 / 413 / 429 are operator-fixable infra issues; defer.
    if matches!(status, 408 | 413 | 429) {
        return ClipError::Transient(anyhow::anyhow!(
            "clip {} {}: {}",
            status,
            detail_code,
            body_text
        ));
    }
    if (400..500).contains(&status) {
        ClipError::Permanent(anyhow::anyhow!(
            "clip {} {}: {}",
            status,
            detail_code,
            body_text
        ))
    } else {
        ClipError::Transient(anyhow::anyhow!(
            "clip {} {}: {}",
            status,
            detail_code,
            body_text
        ))
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    fn is_permanent(e: &ClipError) -> bool {
        matches!(e, ClipError::Permanent(_))
    }
    fn is_transient(e: &ClipError) -> bool {
        matches!(e, ClipError::Transient(_))
    }
    #[test]
    fn classify_422_decode_failed_is_permanent() {
        assert!(is_permanent(&classify_error_response(
            422,
            r#"{"detail":"decode_failed: bad bytes"}"#
        )));
    }
    #[test]
    fn classify_422_empty_text_is_permanent() {
        assert!(is_permanent(&classify_error_response(
            422,
            r#"{"detail":"empty_text"}"#
        )));
    }
    #[test]
    fn classify_503_cuda_oom_is_transient() {
        assert!(is_transient(&classify_error_response(
            503,
            r#"{"detail":{"code":"cuda_oom","error":"out of memory"}}"#,
        )));
    }
    #[test]
    fn classify_5xx_is_transient_other_4xx_is_permanent() {
        assert!(is_transient(&classify_error_response(500, "")));
        assert!(is_permanent(&classify_error_response(404, "{}")));
    }
    #[test]
    fn classify_infra_4xx_is_transient() {
        assert!(is_transient(&classify_error_response(408, "")));
        assert!(is_transient(&classify_error_response(413, "<html>")));
        assert!(is_transient(&classify_error_response(429, "{}")));
    }
    #[test]
    fn decode_embedding_size_mismatch_errors() {
        // dim=4 says we expect 16 bytes (4 floats × 4 bytes). Encode 8.
        use base64::Engine;
        let resp = EncodeResponse {
            model_version: "ViT-L/14".into(),
            embedding_dim: 4,
            duration_ms: 0,
            embedding: base64::engine::general_purpose::STANDARD.encode([0u8; 8]),
        };
        assert!(resp.decode_embedding().is_err());
    }
    #[test]
    fn decode_embedding_round_trip() {
        use base64::Engine;
        let bytes: Vec<u8> = (0..16).collect();
        let resp = EncodeResponse {
            model_version: "ViT-L/14".into(),
            embedding_dim: 4,
            duration_ms: 0,
            embedding: base64::engine::general_purpose::STANDARD.encode(&bytes),
        };
        assert_eq!(resp.decode_embedding().unwrap(), bytes);
    }
 }
@@ -383,7 +383,10 @@ mod tests {
        // body cap and rejected normal-size photos before they reached
        // the backend.
        assert!(is_transient(&classify_error_response(408, "")));
-        assert!(is_transient(&classify_error_response(413, "<html>nginx</html>")));
+        assert!(is_transient(&classify_error_response(
            413,
            "<html>nginx</html>"
        )));
        assert!(is_transient(&classify_error_response(429, "{}")));
    }
@@ -0,0 +1,88 @@
 // GPU lease — in-process coordination for llama-swap model contention.
 //
 // llama-swap runs the heavyweight models (chat / vision / Chatterbox TTS) as
 // a mutually-exclusive set on one GPU (matrix DSL `(q27 | … | tts) & e`): a
 // request for a non-resident model is HELD by llama-swap until the resident
 // model's in-flight requests drain, then the models swap. That hold counts
 // against the *holder's* reqwest timeout — measured live: a queued TTS burned
 // 77s of its budget behind a single LLM turn, and an LLM request behind a
 // running synthesis waited the entire remaining synth. Uncoordinated
 // cross-model traffic therefore times out instead of queueing.
 //
 // The lease moves that wait into this process, BEFORE the HTTP request is
 // sent and before its timeout starts:
 // - chat/vision requests (the LLM-side slots) share the READ lease;
 // - TTS synthesis and voice-library ops (anything that spins Chatterbox up
 //   and evicts the LLM) take the WRITE lease;
 // - embeddings take NO lease: the `embed` slot is in llama-swap's
 //   always-resident group (the `& e` term) and never participates in a swap,
 //   so leasing it would only stall searches behind a queued synthesis.
 //
 // tokio's RwLock is fair (FIFO, write-preferring): a queued TTS gets the GPU
 // right after the current LLM request drains, and later LLM requests queue
 // behind it — bounded waits in both directions, no starvation, no timeout
 // budget burned while waiting.
 //
 // RULES: hold a lease for exactly one HTTP request (for streaming, the
 // stream's lifetime) and NEVER acquire one while already holding one — once a
 // writer is queued, new read acquisitions block, so nested acquisition can
 // deadlock.
 use std::sync::LazyLock;
 use std::time::Instant;
 use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 static GPU_LEASE: LazyLock<RwLock<()>> = LazyLock::new(|| RwLock::new(()));
 /// Waits longer than this are logged — they mean a cross-model swap was
 /// avoided and quantify what the request *would* have burned of its timeout.
 const SLOW_WAIT_LOG_SECS: f64 = 2.0;
 /// Shared lease for LLM-side requests (chat / vision slots).
 pub async fn llm_lease() -> RwLockReadGuard<'static, ()> {
    let started = Instant::now();
    let guard = GPU_LEASE.read().await;
    log_slow_wait("llm", started);
    guard
 }
 /// Exclusive lease for TTS-side requests (speech synthesis + voice-library
 /// ops that spin up Chatterbox).
 pub async fn tts_lease() -> RwLockWriteGuard<'static, ()> {
    let started = Instant::now();
    let guard = GPU_LEASE.write().await;
    log_slow_wait("tts", started);
    guard
 }
 fn log_slow_wait(kind: &str, started: Instant) {
    let waited = started.elapsed().as_secs_f64();
    if waited > SLOW_WAIT_LOG_SECS {
        log::info!("GPU lease ({kind}): waited {waited:.1}s for the other model class to drain");
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    // One sequential test, not several: the lease is a single global, so
    // parallel tests interleaving reads and writes on it can hit the very
    // nested-acquisition deadlock the module comment warns about.
    #[tokio::test]
    async fn write_lease_excludes_readers_then_reads_share() {
        let w = tts_lease().await;
        // A reader must not acquire while the writer is held.
        let pending = tokio::spawn(async { drop(llm_lease().await) });
        tokio::task::yield_now().await;
        assert!(!pending.is_finished());
        drop(w);
        pending.await.expect("reader acquires after writer drops");
        // With no writer queued, read leases are shared.
        let a = llm_lease().await;
        let b = llm_lease().await;
        drop(a);
        drop(b);
    }
 }
@@ -170,3 +170,55 @@ pub struct ModelCapabilities {
    pub has_vision: bool,
    pub has_tool_calling: bool,
 }
 /// Strip a leading `<think>…</think>` reasoning block from model output.
 ///
 /// Thinking models sometimes emit chain-of-thought inside think tags before
 /// the real answer. Everything after the first `</think>` is the answer;
 /// when no tag is present — or the text after it is empty — the trimmed
 /// input is returned unchanged. Mirrors the behavior Ollama's
 /// `extract_final_answer` has applied to single-shot generation; shared here
 /// so the tool-calling final-content paths (agentic generation + chat) can
 /// apply the identical cleanup before parsing / persisting.
 pub fn strip_think_blocks(response: &str) -> String {
    let response = response.trim();
    if let Some(pos) = response.find("</think>") {
        let answer = response[pos + "</think>".len()..].trim();
        if !answer.is_empty() {
            return answer.to_string();
        }
    }
    response.to_string()
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn strip_think_blocks_removes_leading_think_block() {
        let raw = "<think>\nLet me reason about this.\n</think>\n\nTitle: A Day Out\n\nThe body.";
        assert_eq!(strip_think_blocks(raw), "Title: A Day Out\n\nThe body.");
    }
    #[test]
    fn strip_think_blocks_passes_through_plain_content() {
        assert_eq!(strip_think_blocks("  just an answer  "), "just an answer");
    }
    #[test]
    fn strip_think_blocks_keeps_content_when_answer_after_tag_is_empty() {
        // A think block with nothing after it: better to return the trimmed
        // original than an empty string (matches Ollama's fallback).
        let raw = "<think>only thoughts</think>";
        assert_eq!(strip_think_blocks(raw), raw);
    }
    #[test]
    fn strip_think_blocks_handles_unclosed_tag() {
        let raw = "<think>thinking forever";
        assert_eq!(strip_think_blocks(raw), raw);
    }
 }
@@ -0,0 +1,88 @@
 //! Bundle of the local LLM pair (Ollama + optional llama-swap) with the
 //! `LLM_BACKEND` dispatch baked in.
 //!
 //! Exists because passing the pair around as loose values invited the same
 //! bug three times: import/backfill tooling embedded corpora via
 //! `OllamaClient` directly while the query side dispatched through
 //! `embed_one`, so flipping `LLM_BACKEND=llamacpp` silently split queries
 //! and corpus into different vector spaces. Anything that writes or reads
 //! embeddings should go through this type (or `embed_one`/`embed_many`),
 //! never a concrete client.
 //!
 //! Deliberately knows nothing about chat policy — hybrid/OpenRouter routing
 //! is request-scoped and stays in `ResolvedBackend`. This is only the
 //! local stack: embeddings and offline single-shot generation.
 // Constructed by binaries, not the server — dead code from main.rs's view.
 #![allow(dead_code)]
 use std::sync::Arc;
 use anyhow::Result;
 use super::llamacpp::LlamaCppClient;
 use super::llm_client::LlmClient;
 use super::ollama::{EMBEDDING_MODEL, OllamaClient};
 #[derive(Clone)]
 pub struct LocalLlm {
    ollama: OllamaClient,
    llamacpp: Option<Arc<LlamaCppClient>>,
 }
 impl LocalLlm {
    pub fn new(ollama: OllamaClient, llamacpp: Option<Arc<LlamaCppClient>>) -> Self {
        Self { ollama, llamacpp }
    }
    /// Construct from the canonical env wiring shared with `AppState`.
    pub fn from_env() -> Self {
        Self::new(
            crate::state::build_ollama_from_env(),
            crate::state::build_llamacpp_from_env(),
        )
    }
    /// Embed a search query (applies `EMBED_QUERY_PREFIX`). Callers must
    /// pick query vs document — retrieval models treat the two sides
    /// differently and an unmarked embed invites prefix-mismatch bugs.
    pub async fn embed_query(&self, text: &str) -> Result<Vec<f32>> {
        super::embed_query(&self.ollama, self.llamacpp.as_deref(), text).await
    }
    /// Embed corpus text (applies `EMBED_DOCUMENT_PREFIX`).
    pub async fn embed_document(&self, text: &str) -> Result<Vec<f32>> {
        super::embed_document(&self.ollama, self.llamacpp.as_deref(), text).await
    }
    /// Single-shot local text generation via the `LLM_BACKEND`-selected
    /// client (offline tooling; chat turns belong to `ResolvedBackend`).
    pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
        if super::local_backend_is_llamacpp() {
            if let Some(lc) = self.llamacpp.as_deref() {
                return <LlamaCppClient as LlmClient>::generate(lc, prompt, system, None).await;
            }
            anyhow::bail!(
                "LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured — \
                 set LLAMA_SWAP_URL or switch to LLM_BACKEND=ollama"
            );
        }
        self.ollama.generate(prompt, system).await
    }
    /// Label identifying which backend + model produces embeddings right
    /// now. Store it alongside vectors (`model_version` columns) so a
    /// backend flip is detectable in the data, not just in env history.
    pub fn embedding_model_version(&self) -> String {
        if super::local_backend_is_llamacpp() {
            let slot = self
                .llamacpp
                .as_deref()
                .map(|c| c.embedding_model.as_str())
                .unwrap_or("embed");
            format!("llama-swap:{}", slot)
        } else {
            EMBEDDING_MODEL.to_string()
        }
    }
 }
@@ -1,13 +1,22 @@
 pub mod apollo_client;
 pub mod backend;
 pub mod clip_client;
 pub mod daily_summary_job;
 pub mod face_client;
 pub mod gpu;
 pub mod handlers;
 pub mod insight_chat;
 pub mod insight_generator;
 pub mod llamacpp;
 pub mod llm_client;
 pub mod local_llm;
 pub mod nl_query;
 pub mod ollama;
 pub mod openrouter;
 pub mod pronunciation;
 pub mod sms_client;
 pub mod tts;
 pub mod turn_registry;
 // strip_summary_boilerplate is used by binaries (test_daily_summary), not the library
 #[allow(unused_imports)]
@@ -16,18 +25,29 @@ pub use daily_summary_job::{
    generate_daily_summaries, strip_summary_boilerplate,
 };
 pub use handlers::{
-    chat_history_handler, chat_rewind_handler, chat_stream_handler, chat_turn_handler,
+    cancel_generation_handler, cancel_turn_handler, chat_history_handler, chat_rewind_handler,
-    delete_insight_handler, export_training_data_handler, generate_agentic_insight_handler,
+    chat_stream_handler, chat_turn_handler, delete_insight_handler, export_training_data_handler,
-    generate_insight_handler, get_all_insights_handler, get_available_models_handler,
+    generate_agentic_insight_handler, generate_insight_handler, generation_status_handler,
-    get_insight_handler, get_openrouter_models_handler, rate_insight_handler,
+    get_all_insights_handler, get_available_models_handler, get_insight_handler,
    get_insight_history_handler, get_openrouter_models_handler, rate_insight_handler,
    turn_async_handler, turn_replay_handler,
 };
 pub use insight_generator::InsightGenerator;
 pub use llamacpp::LlamaCppClient;
 #[allow(unused_imports)]
 pub use llm_client::{
    ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
 };
 // LocalLlm is constructed by binaries (reembed_embeddings, importers), not the server
 #[allow(unused_imports)]
 pub use local_llm::LocalLlm;
 pub use ollama::{EMBEDDING_MODEL, OllamaClient};
 pub use sms_client::{SmsApiClient, SmsMessage};
 pub use tts::{
    cancel_speech_job_handler, create_speech_job_handler, create_voice_from_library_handler,
    create_voice_upload_handler, delete_voice_handler, list_voices_handler,
    speech_job_status_handler, tts_speech_handler,
 };
 /// Display name used for the user in message transcripts and first-person
 /// prompt text. Reads the `USER_NAME` env var; defaults to `"Me"`. Models
@@ -37,3 +57,153 @@ pub use sms_client::{SmsApiClient, SmsMessage};
 pub fn user_display_name() -> String {
    std::env::var("USER_NAME").unwrap_or_else(|_| "Me".to_string())
 }
 /// One switch for the "local" LLM stack: when `LLM_BACKEND=llamacpp` is
 /// set, chat / vision describe / embeddings all route through llama-swap
 /// instead of Ollama. Any other value (including unset, the default) is
 /// Ollama. This is intentionally global — embeddings must be drawn from
 /// a single source or similarity search across the index breaks (mixed
 /// vector spaces, possibly mixed dims). The `backend=hybrid` per-request
 /// override remains orthogonal: it always sends chat to OpenRouter, and
 /// uses `LLM_BACKEND` for the describe-then-inline vision pass.
 pub fn local_backend_is_llamacpp() -> bool {
    matches!(
        std::env::var("LLM_BACKEND")
            .ok()
            .as_deref()
            .map(|s| s.trim().to_lowercase())
            .as_deref(),
        Some("llamacpp")
    )
 }
 /// Expected embedding dimensionality, env-overridable via `EMBEDDING_DIM`
 /// (default 768, nomic-embed-text). Every store/query dim check reads this —
 /// swapping to a different-dim model (e.g. Qwen3-Embedding-0.6B at 1024) is
 /// then a config flip plus a `reembed_embeddings` run, not a code change.
 /// Cached for the process lifetime; a flip requires a restart anyway since
 /// the corpus must be re-embedded with it.
 pub fn embedding_dim() -> usize {
    static DIM: std::sync::OnceLock<usize> = std::sync::OnceLock::new();
    *DIM.get_or_init(|| {
        std::env::var("EMBEDDING_DIM")
            .ok()
            .and_then(|v| v.parse().ok())
            .unwrap_or(768)
    })
 }
 /// Read an embedding prefix from the environment. `.env` values can't hold
 /// real newlines, so a literal `\n` in the value is expanded — Qwen3-style
 /// query instructions need one ("Instruct: ...\nQuery: ").
 fn embed_prefix(key: &str) -> String {
    std::env::var(key)
        .map(|v| v.replace("\\n", "\n"))
        .unwrap_or_default()
 }
 /// Embed a search query. Applies `EMBED_QUERY_PREFIX` (default empty) —
 /// retrieval models distinguish query-side from document-side text:
 /// nomic v1.5 wants `search_query: `, Qwen3-Embedding wants
 /// `Instruct: <task>\nQuery: `. Must pair with the document prefix the
 /// corpus was embedded with or similarity degrades.
 pub async fn embed_query(
    ollama: &OllamaClient,
    llamacpp: Option<&LlamaCppClient>,
    text: &str,
 ) -> anyhow::Result<Vec<f32>> {
    let prefixed = format!("{}{}", embed_prefix("EMBED_QUERY_PREFIX"), text);
    embed_one(ollama, llamacpp, &prefixed).await
 }
 /// Embed corpus text (the stored side of retrieval). Applies
 /// `EMBED_DOCUMENT_PREFIX` (default empty; nomic v1.5 wants
 /// `search_document: `, Qwen3-Embedding wants none).
 pub async fn embed_document(
    ollama: &OllamaClient,
    llamacpp: Option<&LlamaCppClient>,
    text: &str,
 ) -> anyhow::Result<Vec<f32>> {
    let prefixed = format!("{}{}", embed_prefix("EMBED_DOCUMENT_PREFIX"), text);
    embed_one(ollama, llamacpp, &prefixed).await
 }
 /// Embed a batch of strings via the configured local backend. Routes
 /// through llama-swap when `LLM_BACKEND=llamacpp` (and a client is
 /// configured), else Ollama. See [`local_backend_is_llamacpp`] for the
 /// rationale on consistency.
 pub async fn embed_many(
    ollama: &OllamaClient,
    llamacpp: Option<&LlamaCppClient>,
    texts: &[&str],
 ) -> anyhow::Result<Vec<Vec<f32>>> {
    if local_backend_is_llamacpp() {
        if let Some(lc) = llamacpp {
            return <LlamaCppClient as LlmClient>::generate_embeddings(lc, texts).await;
        }
        anyhow::bail!(
            "LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured — \
             set LLAMA_SWAP_URL or switch to LLM_BACKEND=ollama"
        );
    }
    ollama.generate_embeddings(texts).await
 }
 /// Embed one string via the configured local backend. Single-text
 /// convenience over [`embed_many`].
 pub async fn embed_one(
    ollama: &OllamaClient,
    llamacpp: Option<&LlamaCppClient>,
    text: &str,
 ) -> anyhow::Result<Vec<f32>> {
    let mut vecs = embed_many(ollama, llamacpp, &[text]).await?;
    vecs.pop()
        .ok_or_else(|| anyhow::anyhow!("embedding backend returned no embeddings"))
 }
 #[cfg(test)]
 mod env_dispatch_tests {
    use super::*;
    /// Env vars are process-global, and the test harness runs in parallel —
    /// without this lock the `LLM_BACKEND` tests race each other and flake.
    static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
    fn with_env<F: FnOnce()>(key: &str, val: Option<&str>, f: F) {
        let _guard = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let prev = std::env::var(key).ok();
        match val {
            Some(v) => unsafe { std::env::set_var(key, v) },
            None => unsafe { std::env::remove_var(key) },
        }
        f();
        match prev {
            Some(v) => unsafe { std::env::set_var(key, v) },
            None => unsafe { std::env::remove_var(key) },
        }
    }
    #[test]
    fn llm_backend_defaults_to_ollama() {
        with_env("LLM_BACKEND", None, || {
            assert!(!local_backend_is_llamacpp());
        });
    }
    #[test]
    fn llm_backend_llamacpp_case_insensitive() {
        with_env("LLM_BACKEND", Some("LlamaCpp"), || {
            assert!(local_backend_is_llamacpp());
        });
        with_env("LLM_BACKEND", Some("  llamacpp "), || {
            assert!(local_backend_is_llamacpp());
        });
    }
    #[test]
    fn llm_backend_unknown_value_is_ollama() {
        with_env("LLM_BACKEND", Some("vllm"), || {
            assert!(!local_backend_is_llamacpp());
        });
    }
 }
@@ -0,0 +1,408 @@
 //! Natural-language → structured-query translation for unified photo search.
 //!
 //! The unified search endpoint (`/photos/search/unified`, Phase 2) needs to
 //! turn a free-text query like *"sunset photos in Italy from last summer"*
 //! into the structured filter the existing `/photos` engine understands plus
 //! a semantic term for CLIP ranking. That translation is a single grounded
 //! LLM call, isolated here so it can be unit-tested without a network or the
 //! full `InsightGenerator`.
 //!
 //! Two-stage design:
 //!  1. The LLM emits a [`RawNlQuery`] — references are by *name* (tags) and
 //!     dates as ISO strings, never numeric ids it could hallucinate.
 //!  2. [`resolve_raw_query`] maps names against the real tag vocabulary and
 //!     converts ISO dates to unix seconds, producing a [`StructuredQuery`].
 //!     A tag the model invents that isn't in the vocab is surfaced in
 //!     `unmatched_tags` (the caller folds it back into the semantic term)
 //!     rather than silently dropped — this is the anti-noise guard.
 //!
 //! Geocoding of `place` and person filtering are intentionally *not* handled
 //! here: `place` stays as text for the caller to forward-geocode (async, see
 //! `geo::forward_geocode`), and person filtering is deferred until a
 //! person→photos resolver exists.
 use crate::ai::llm_client::{ChatMessage, LlmClient, Tool, strip_think_blocks};
 use anyhow::{Result, anyhow};
 use serde::{Deserialize, Serialize};
 /// Raw query object as emitted by the LLM. Tag references are by name
 /// (resolved against the real vocab in Rust); dates are ISO `YYYY-MM-DD`.
 /// Every field is optional so a partial / minimal model response still
 /// deserializes.
 #[derive(Debug, Clone, Default, Deserialize, PartialEq)]
 pub struct RawNlQuery {
    /// Visual/scene description handed to CLIP for ranking. The descriptive
    /// remainder after structured filters are peeled off.
    #[serde(default)]
    pub semantic: Option<String>,
    /// Tag names the photos must have. Matched case-insensitively against
    /// the supplied vocabulary; non-matches land in `unmatched_tags`.
    #[serde(default)]
    pub tags: Vec<String>,
    /// Tag names the photos must NOT have.
    #[serde(default)]
    pub exclude_tags: Vec<String>,
    #[serde(default)]
    pub camera_make: Option<String>,
    #[serde(default)]
    pub camera_model: Option<String>,
    #[serde(default)]
    pub lens_model: Option<String>,
    /// Free-text place/location name to forward-geocode (e.g. "Italy").
    #[serde(default)]
    pub place: Option<String>,
    /// Inclusive start date, ISO `YYYY-MM-DD`.
    #[serde(default)]
    pub date_from: Option<String>,
    /// Inclusive end date, ISO `YYYY-MM-DD`.
    #[serde(default)]
    pub date_to: Option<String>,
    /// "photo" | "video" — normalized in [`resolve_raw_query`].
    #[serde(default)]
    pub media_type: Option<String>,
 }
 /// Resolved structured query: tag names mapped to ids against the real
 /// vocab, ISO dates converted to unix seconds. `place` stays as text for the
 /// caller to forward-geocode into a gps circle. Serializable so the endpoint
 /// can echo it back to the client as "this is how I read your query"
 /// (editable filter chips).
 #[derive(Debug, Clone, Default, PartialEq, Serialize)]
 pub struct StructuredQuery {
    pub semantic: Option<String>,
    pub tag_ids: Vec<i32>,
    pub exclude_tag_ids: Vec<i32>,
    /// Tag names the model produced that don't exist in the vocabulary.
    /// The caller folds these back into the semantic term so the concept
    /// isn't lost — and surfacing them keeps a hallucinated tag from
    /// silently filtering the whole library to nothing.
    pub unmatched_tags: Vec<String>,
    pub camera_make: Option<String>,
    pub camera_model: Option<String>,
    pub lens_model: Option<String>,
    /// Raw place name awaiting forward-geocoding by the caller.
    pub place: Option<String>,
    pub date_from: Option<i64>,
    pub date_to: Option<i64>,
    /// Normalized to "photo" | "video"; `None` means no media-type filter.
    pub media_type: Option<String>,
 }
 /// Convert an ISO `YYYY-MM-DD` date to a unix timestamp (seconds). With
 /// `end_of_day`, returns 23:59:59 of that day so a `date_to` filter is
 /// inclusive of the whole day; otherwise 00:00:00. Returns `None` for any
 /// unparseable input (the filter is simply omitted rather than erroring).
 pub fn iso_to_unix(date: &str, end_of_day: bool) -> Option<i64> {
    let d = chrono::NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d").ok()?;
    let time = if end_of_day {
        chrono::NaiveTime::from_hms_opt(23, 59, 59)?
    } else {
        chrono::NaiveTime::from_hms_opt(0, 0, 0)?
    };
    Some(d.and_time(time).and_utc().timestamp())
 }
 /// Normalize a free-form media-type string to the engine's vocabulary.
 /// Anything that isn't clearly photo or video (including "all") yields
 /// `None` — no filter.
 fn normalize_media_type(raw: &str) -> Option<String> {
    match raw.trim().to_lowercase().as_str() {
        "photo" | "photos" | "image" | "images" | "picture" | "pictures" => {
            Some("photo".to_string())
        }
        "video" | "videos" | "movie" | "movies" | "clip" | "clips" => Some("video".to_string()),
        _ => None,
    }
 }
 /// Resolve a raw LLM query against the real tag vocabulary, producing the
 /// structured filter. Pure — no network, no LLM — so it carries the
 /// correctness-critical mapping logic under unit test.
 ///
 /// `tag_vocab` is `(tag_id, tag_name)` pairs (the shape `TagDao::get_all_tags`
 /// yields once the count is dropped). Matching is case-insensitive and exact
 /// on the trimmed name.
 pub fn resolve_raw_query(raw: RawNlQuery, tag_vocab: &[(i32, String)]) -> StructuredQuery {
    // Case-insensitive name → id lookup. Built once per call.
    let lookup: std::collections::HashMap<String, i32> = tag_vocab
        .iter()
        .map(|(id, name)| (name.trim().to_lowercase(), *id))
        .collect();
    let resolve_names = |names: &[String], ids: &mut Vec<i32>, unmatched: &mut Vec<String>| {
        for name in names {
            let key = name.trim().to_lowercase();
            if key.is_empty() {
                continue;
            }
            match lookup.get(&key) {
                Some(id) if !ids.contains(id) => ids.push(*id),
                Some(_) => {} // duplicate, already collected
                None => {
                    if !unmatched.iter().any(|u| u.eq_ignore_ascii_case(name)) {
                        unmatched.push(name.trim().to_string());
                    }
                }
            }
        }
    };
    let mut tag_ids = Vec::new();
    let mut unmatched_tags = Vec::new();
    resolve_names(&raw.tags, &mut tag_ids, &mut unmatched_tags);
    // Excluded tags that don't match a real tag are simply ignored — you
    // can't exclude a tag that doesn't exist, and folding them into
    // `semantic` would make no sense.
    let mut exclude_tag_ids = Vec::new();
    let mut exclude_unmatched = Vec::new();
    resolve_names(
        &raw.exclude_tags,
        &mut exclude_tag_ids,
        &mut exclude_unmatched,
    );
    let clean = |s: Option<String>| s.map(|v| v.trim().to_string()).filter(|v| !v.is_empty());
    StructuredQuery {
        semantic: clean(raw.semantic),
        tag_ids,
        exclude_tag_ids,
        unmatched_tags,
        camera_make: clean(raw.camera_make),
        camera_model: clean(raw.camera_model),
        lens_model: clean(raw.lens_model),
        place: clean(raw.place),
        date_from: raw.date_from.as_deref().and_then(|d| iso_to_unix(d, false)),
        date_to: raw.date_to.as_deref().and_then(|d| iso_to_unix(d, true)),
        media_type: raw.media_type.as_deref().and_then(normalize_media_type),
    }
 }
 /// Build the grounded system prompt. The model is told the current date (so
 /// "last summer" resolves) and the exact tag vocabulary (so it uses real
 /// tags or routes the concept to `semantic` instead of inventing one).
 fn build_system_prompt(tag_vocab: &[(i32, String)], today: chrono::NaiveDate) -> String {
    // Cap the vocab dump so a huge library doesn't blow the context window;
    // the most-used tags are the ones a query is likely to reference.
    const MAX_TAGS: usize = 400;
    let mut names: Vec<&str> = tag_vocab.iter().map(|(_, n)| n.as_str()).collect();
    names.sort_unstable();
    names.dedup();
    let shown = names.len().min(MAX_TAGS);
    let vocab = names[..shown].join(", ");
    let truncation = if names.len() > MAX_TAGS {
        format!(" (showing {MAX_TAGS} of {} tags)", names.len())
    } else {
        String::new()
    };
    format!(
        "You translate a user's natural-language photo-search request into a JSON \
 filter. Today's date is {today}. Respond with ONLY a JSON object, no prose, no \
 code fences.\n\n\
 Schema (all fields optional):\n\
 {{\n  \
 \"semantic\": string|null,        // visual scene/subject for image similarity search\n  \
 \"tags\": string[],               // ONLY names from the tag list below\n  \
 \"exclude_tags\": string[],       // ONLY names from the tag list below\n  \
 \"camera_make\": string|null,\n  \
 \"camera_model\": string|null,\n  \
 \"lens_model\": string|null,\n  \
 \"place\": string|null,           // a location name to look up (city, country, landmark)\n  \
 \"date_from\": \"YYYY-MM-DD\"|null,  // inclusive\n  \
 \"date_to\": \"YYYY-MM-DD\"|null,    // inclusive\n  \
 \"media_type\": \"photo\"|\"video\"|null\n\
 }}\n\n\
 Rules:\n\
 - Put descriptive/visual concepts (\"sunset\", \"crowded beach\", \"red car\") in \"semantic\".\n\
 - Only use \"tags\"/\"exclude_tags\" values that appear EXACTLY in the tag list. If a \
 concept isn't a listed tag, put it in \"semantic\" instead — never invent a tag.\n\
 - Resolve relative dates against today's date (\"last summer\", \"2023\", \"last month\").\n\
 - Put place/location names in \"place\" (not \"semantic\").\n\
 - Omit (use null / empty array) anything the request doesn't mention.\n\n\
 Available tags{truncation}: {vocab}"
    )
 }
 /// Extract the JSON object from a model response that may include a leading
 /// `<think>` block, code fences, or trailing prose. Strips the think block
 /// first (so reasoning that mentions braces can't fool the scan), then
 /// returns the substring from the first `{` to the last `}` inclusive — or
 /// the trimmed text if no braces are found (which then fails to parse with a
 /// clear error).
 fn extract_json(raw: &str) -> String {
    let s = strip_think_blocks(raw);
    let start = s.find('{');
    let end = s.rfind('}');
    match (start, end) {
        (Some(a), Some(b)) if b >= a => s[a..=b].to_string(),
        _ => s.trim().to_string(),
    }
 }
 /// Parse a model response string into a [`StructuredQuery`], resolving names
 /// against the vocab. Separated from the LLM call so it's unit-testable.
 pub fn parse_response(response: &str, tag_vocab: &[(i32, String)]) -> Result<StructuredQuery> {
    let json = extract_json(response);
    let raw: RawNlQuery = serde_json::from_str(&json)
        .map_err(|e| anyhow!("failed to parse NL query JSON: {e}; raw response: {response:?}"))?;
    Ok(resolve_raw_query(raw, tag_vocab))
 }
 /// Translate a natural-language query into a [`StructuredQuery`] via one
 /// grounded LLM call. The `client` is any configured backend (the unified
 /// endpoint passes the resolved chat backend); `tag_vocab` grounds the tag
 /// mapping; `today` anchors relative-date resolution.
 pub async fn translate_nl_query(
    client: &dyn LlmClient,
    nl: &str,
    tag_vocab: &[(i32, String)],
    today: chrono::NaiveDate,
 ) -> Result<StructuredQuery> {
    let system = build_system_prompt(tag_vocab, today);
    let messages = vec![ChatMessage::system(system), ChatMessage::user(nl)];
    let (msg, _, _) = client.chat_with_tools(messages, Vec::<Tool>::new()).await?;
    parse_response(&msg.content, tag_vocab)
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    fn vocab() -> Vec<(i32, String)> {
        vec![
            (1, "beach".to_string()),
            (2, "Sunset".to_string()), // mixed case to exercise case-insensitivity
            (3, "family".to_string()),
        ]
    }
    #[test]
    fn iso_to_unix_start_and_end_of_day() {
        // 2023-01-01 UTC midnight = 1672531200.
        assert_eq!(iso_to_unix("2023-01-01", false), Some(1_672_531_200));
        // End of that day is 86399 seconds later.
        assert_eq!(
            iso_to_unix("2023-01-01", true),
            Some(1_672_531_200 + 86_399)
        );
    }
    #[test]
    fn iso_to_unix_rejects_garbage() {
        assert_eq!(iso_to_unix("last summer", false), None);
        assert_eq!(iso_to_unix("2023-13-99", false), None);
        assert_eq!(iso_to_unix("", false), None);
    }
    #[test]
    fn resolve_matches_tags_case_insensitively() {
        let raw = RawNlQuery {
            tags: vec!["BEACH".to_string(), "sunset".to_string()],
            ..Default::default()
        };
        let q = resolve_raw_query(raw, &vocab());
        assert_eq!(q.tag_ids, vec![1, 2]);
        assert!(q.unmatched_tags.is_empty());
    }
    #[test]
    fn resolve_surfaces_unmatched_tags_not_silently_dropped() {
        // A hallucinated / non-vocab tag must be surfaced so the caller can
        // fold it into semantic — never silently used as a hard filter.
        let raw = RawNlQuery {
            tags: vec!["beach".to_string(), "golden hour".to_string()],
            ..Default::default()
        };
        let q = resolve_raw_query(raw, &vocab());
        assert_eq!(q.tag_ids, vec![1]);
        assert_eq!(q.unmatched_tags, vec!["golden hour".to_string()]);
    }
    #[test]
    fn resolve_dedups_repeated_tags() {
        let raw = RawNlQuery {
            tags: vec![
                "beach".to_string(),
                "Beach".to_string(),
                "beach".to_string(),
            ],
            ..Default::default()
        };
        let q = resolve_raw_query(raw, &vocab());
        assert_eq!(q.tag_ids, vec![1]);
    }
    #[test]
    fn resolve_normalizes_media_type_and_dates() {
        let raw = RawNlQuery {
            media_type: Some("Videos".to_string()),
            date_from: Some("2023-06-01".to_string()),
            date_to: Some("2023-06-30".to_string()),
            ..Default::default()
        };
        let q = resolve_raw_query(raw, &vocab());
        assert_eq!(q.media_type.as_deref(), Some("video"));
        assert_eq!(q.date_from, iso_to_unix("2023-06-01", false));
        assert_eq!(q.date_to, iso_to_unix("2023-06-30", true));
    }
    #[test]
    fn resolve_media_type_all_is_no_filter() {
        let raw = RawNlQuery {
            media_type: Some("all".to_string()),
            ..Default::default()
        };
        assert_eq!(resolve_raw_query(raw, &vocab()).media_type, None);
    }
    #[test]
    fn resolve_trims_and_empties_to_none() {
        let raw = RawNlQuery {
            semantic: Some("   ".to_string()),
            camera_make: Some("  Fujifilm  ".to_string()),
            place: Some("".to_string()),
            ..Default::default()
        };
        let q = resolve_raw_query(raw, &vocab());
        assert_eq!(q.semantic, None);
        assert_eq!(q.camera_make.as_deref(), Some("Fujifilm"));
        assert_eq!(q.place, None);
    }
    #[test]
    fn parse_response_handles_code_fences_and_prose() {
        let resp = "Here is the filter:\n```json\n{\"semantic\":\"sunset\",\"tags\":[\"beach\"]}\n```\nDone.";
        let q = parse_response(resp, &vocab()).expect("parse");
        assert_eq!(q.semantic.as_deref(), Some("sunset"));
        assert_eq!(q.tag_ids, vec![1]);
    }
    #[test]
    fn parse_response_handles_think_block_then_json() {
        let resp = "<think>user wants beach sunsets</think>{\"tags\":[\"beach\",\"sunset\"]}";
        let q = parse_response(resp, &vocab()).expect("parse");
        assert_eq!(q.tag_ids, vec![1, 2]);
    }
    #[test]
    fn parse_response_errors_on_non_json() {
        assert!(parse_response("I cannot help with that.", &vocab()).is_err());
    }
    #[test]
    fn build_system_prompt_includes_date_and_vocab() {
        let today = chrono::NaiveDate::from_ymd_opt(2026, 6, 14).unwrap();
        let prompt = build_system_prompt(&vocab(), today);
        assert!(
            prompt.contains("2026-06-14"),
            "prompt should state today's date"
        );
        assert!(prompt.contains("beach"), "prompt should list the vocab");
        assert!(
            prompt.contains("never invent a tag"),
            "prompt should warn against inventing tags"
        );
    }
 }
@@ -360,18 +360,7 @@ impl OllamaClient {
    /// Extract final answer from thinking model output
    /// Handles <think>...</think> tags and takes everything after
    fn extract_final_answer(&self, response: &str) -> String {
-        let response = response.trim();
+        crate::ai::llm_client::strip_think_blocks(response)
        // Look for </think> tag and take everything after it
        if let Some(pos) = response.find("</think>") {
            let answer = response[pos + 8..].trim();
            if !answer.is_empty() {
                return answer.to_string();
            }
        }
        // Fallback: return the whole response trimmed
        response.to_string()
    }
    async fn try_generate(
@@ -424,10 +413,7 @@ impl OllamaClient {
        self.generate_with_images(prompt, system, None).await
    }
-    /// Variant of `generate` that sets Ollama's top-level `think: false`.
+    #[allow(dead_code)]
    /// Used by latency-sensitive callers like the rerank pass, where the
    /// task has nothing to reason about and chain-of-thought tokens are
    /// wasted wall time. Server-side no-op on non-reasoning models.
    pub async fn generate_no_think(&self, prompt: &str, system: Option<&str>) -> Result<String> {
        self.generate_with_options(prompt, system, None, Some(false))
            .await
@@ -562,7 +548,16 @@ Capture the key moment or theme. Return ONLY the title, nothing else."#,
        let title = self
            .generate_with_images(&prompt, Some(system), None)
            .await?;
-        Ok(title.trim().trim_matches('"').to_string())
+        // Models decorate despite "Return ONLY the title": quotes, bold
        // markers, sometimes a "Title:" label.
        use crate::ai::insight_generator::strip_title_markdown;
        let cleaned = strip_title_markdown(title.trim());
        let cleaned = cleaned
            .strip_prefix("Title:")
            .or_else(|| cleaned.strip_prefix("title:"))
            .map(strip_title_markdown)
            .unwrap_or(cleaned);
        Ok(cleaned.to_string())
    }
    /// Generate a summary for a single photo based on its context
@@ -849,11 +844,14 @@ Analyze the image and use specific details from both the visual content and the
                            if !chunk.message.role.is_empty() {
                                role = chunk.message.role;
                            }
-                            // Ollama only attaches tool_calls on the final chunk.
+                            // Ollama ≥0.8 can stream tool_calls incrementally
                            // across chunks (older servers attach them all to
                            // one chunk) — append rather than overwrite so
                            // calls from earlier chunks survive.
                            if let Some(tcs) = chunk.message.tool_calls
                                && !tcs.is_empty()
                            {
-                                tool_calls = Some(tcs);
+                                append_streamed_tool_calls(&mut tool_calls, tcs);
                            }
                            if chunk.done {
                                prompt_eval_count = chunk.prompt_eval_count;
@@ -1057,13 +1055,14 @@ Analyze the image and use specific details from both the visual content and the
            }
        };
-        // Validate embedding dimensions (should be 768 for nomic-embed-text:v1.5)
+        // Validate embedding dimensions (EMBEDDING_DIM; 768 for nomic-embed-text:v1.5)
        for (i, embedding) in embeddings.iter().enumerate() {
-            if embedding.len() != 768 {
+            if embedding.len() != crate::ai::embedding_dim() {
                log::warn!(
-                    "Unexpected embedding dimensions for item {}: {} (expected 768)",
+                    "Unexpected embedding dimensions for item {}: {} (expected {})",
                    i,
-                    embedding.len()
+                    embedding.len(),
                    crate::ai::embedding_dim()
                );
            }
        }
@@ -1332,8 +1331,20 @@ struct OllamaEmbedResponse {
    embeddings: Vec<Vec<f32>>,
 }
 /// Accumulate tool calls streamed across NDJSON chunks. Ollama ≥0.8 may
 /// emit each tool call on its own chunk; replacing the accumulator on every
 /// chunk would keep only the last call, so extend instead.
 fn append_streamed_tool_calls(
    acc: &mut Option<Vec<crate::ai::llm_client::ToolCall>>,
    new: Vec<crate::ai::llm_client::ToolCall>,
 ) {
    acc.get_or_insert_with(Vec::new).extend(new);
 }
 #[cfg(test)]
 mod tests {
    use super::append_streamed_tool_calls;
    use crate::ai::llm_client::{ToolCall, ToolCallFunction};
    #[test]
    fn generate_photo_description_prompt_is_concise() {
@@ -1344,4 +1355,38 @@ mod tests {
                      Focus on the people, location, and activity.";
        assert!(prompt.len() < 200, "Prompt should be concise");
    }
    fn call(name: &str) -> ToolCall {
        ToolCall {
            id: None,
            function: ToolCallFunction {
                name: name.to_string(),
                arguments: serde_json::json!({}),
            },
        }
    }
    #[test]
    fn streamed_tool_calls_across_chunks_accumulate() {
        // Two tool calls arriving in two separate stream chunks must BOTH
        // survive assembly — the old `tool_calls = Some(tcs)` kept only the
        // last chunk's calls.
        let mut acc: Option<Vec<ToolCall>> = None;
        append_streamed_tool_calls(&mut acc, vec![call("get_sms_messages")]);
        append_streamed_tool_calls(&mut acc, vec![call("reverse_geocode")]);
        let calls = acc.expect("tool calls accumulated");
        assert_eq!(calls.len(), 2);
        assert_eq!(calls[0].function.name, "get_sms_messages");
        assert_eq!(calls[1].function.name, "reverse_geocode");
    }
    #[test]
    fn streamed_tool_calls_single_chunk_batch_kept_intact() {
        // Older Ollama servers attach all calls to one chunk — unchanged.
        let mut acc: Option<Vec<ToolCall>> = None;
        append_streamed_tool_calls(&mut acc, vec![call("a"), call("b")]);
        let calls = acc.expect("tool calls accumulated");
        assert_eq!(calls.len(), 2);
    }
 }
@@ -0,0 +1,282 @@
 // User-configurable pronunciation overrides for TTS. Chatterbox mispronounces
 // place names ("Worcester"), initialisms ("WSL"), and clipped abbreviations
 // ("blvd"), so we rewrite them to phonetic spellings before synthesis.
 //
 // The map lives in a JSON file on the server — a flat object of
 // `"written form": "spoken form"` pairs, e.g.:
 //
 //   {
 //     "Worcester": "Wuster",
 //     "WSL": "W S L",
 //     "blvd": "boulevard",
 //     "Dr.": "Doctor"
 //   }
 //
 // Path comes from `TTS_PRONUNCIATIONS_PATH` (default `tts_pronunciations.json`
 // in the working directory). A missing file simply disables the feature. The
 // file is re-read whenever its mtime changes, so edits apply to the next
 // synthesis without a restart; a malformed edit keeps the last good map and
 // logs the parse error instead of silently dropping all overrides.
 //
 // Matching rules:
 // - Whole words only — `cat` never rewrites `category`. (Boundaries are only
 //   asserted next to word characters, so keys like `Dr.` still work.)
 // - Smartcase: an all-lowercase key matches case-insensitively; a key with
 //   any uppercase matches exactly. That lets `worcester` catch every casing
 //   while `US` (the country) leaves the pronoun `us` alone.
 // - Longer keys win over shorter ones (`New York Times` before `New York`).
 use regex::Regex;
 use std::collections::HashMap;
 use std::path::Path;
 use std::sync::{Arc, LazyLock, Mutex as StdMutex};
 use std::time::SystemTime;
 /// A compiled pronunciation map: one alternation regex over every key plus
 /// the lookup tables the replacement closure resolves matches against.
 #[derive(Default)]
 struct CompiledMap {
    /// `None` when the map is empty — apply() is then a no-op.
    regex: Option<Regex>,
    /// Case-sensitive entries, keyed verbatim.
    exact: HashMap<String, String>,
    /// Case-insensitive entries, keyed lowercased.
    folded: HashMap<String, String>,
 }
 impl CompiledMap {
    fn from_entries(entries: &HashMap<String, String>) -> Self {
        let mut keys: Vec<&str> = entries
            .keys()
            .map(|k| k.as_str())
            .filter(|k| !k.trim().is_empty())
            .collect();
        if keys.is_empty() {
            return Self::default();
        }
        // Longest key first so overlapping entries prefer the more specific
        // one (regex alternation is first-match-wins, not longest-match).
        keys.sort_by(|a, b| b.len().cmp(&a.len()).then(a.cmp(b)));
        let mut exact = HashMap::new();
        let mut folded = HashMap::new();
        let alternatives: Vec<String> = keys
            .iter()
            .map(|key| {
                let escaped = regex::escape(key);
                // Only assert a word boundary where the key edge is a word
                // character — `\b` adjacent to punctuation (e.g. the dot in
                // `Dr.`) would otherwise never match.
                let lead = if key
                    .chars()
                    .next()
                    .is_some_and(|c| c.is_alphanumeric() || c == '_')
                {
                    r"\b"
                } else {
                    ""
                };
                let trail = if key
                    .chars()
                    .last()
                    .is_some_and(|c| c.is_alphanumeric() || c == '_')
                {
                    r"\b"
                } else {
                    ""
                };
                let case_sensitive = key.chars().any(|c| c.is_uppercase());
                if case_sensitive {
                    exact.insert(key.to_string(), entries[*key].clone());
                    format!("{lead}{escaped}{trail}")
                } else {
                    folded.insert(key.to_lowercase(), entries[*key].clone());
                    format!("{lead}(?i:{escaped}){trail}")
                }
            })
            .collect();
        // Escaped fixed strings can't produce an invalid pattern; if one ever
        // does, treat the whole map as empty rather than panicking a handler.
        let pattern = alternatives.join("|");
        let regex = match Regex::new(&pattern) {
            Ok(r) => Some(r),
            Err(e) => {
                log::error!("pronunciation map failed to compile: {e}");
                None
            }
        };
        Self {
            regex,
            exact,
            folded,
        }
    }
    fn apply(&self, text: &str) -> String {
        let Some(re) = &self.regex else {
            return text.to_string();
        };
        re.replace_all(text, |caps: &regex::Captures| {
            let m = &caps[0];
            self.exact
                .get(m)
                .or_else(|| self.folded.get(&m.to_lowercase()))
                .cloned()
                // Unreachable in practice — every alternative came from one
                // of the two maps — but never drop the user's text.
                .unwrap_or_else(|| m.to_string())
        })
        .into_owned()
    }
 }
 struct CacheEntry {
    mtime: Option<SystemTime>,
    compiled: Arc<CompiledMap>,
 }
 static CACHE: LazyLock<StdMutex<Option<CacheEntry>>> = LazyLock::new(|| StdMutex::new(None));
 fn config_path() -> String {
    std::env::var("TTS_PRONUNCIATIONS_PATH")
        .ok()
        .map(|s| s.trim().to_string())
        .filter(|s| !s.is_empty())
        .unwrap_or_else(|| "tts_pronunciations.json".to_string())
 }
 /// Load the compiled map, re-reading the file only when its mtime changed
 /// since the last call (or it appeared/disappeared). Synthesis is serialized
 /// on a single GPU permit, so a stat per call is noise.
 fn current_map() -> Arc<CompiledMap> {
    let path_s = config_path();
    let path = Path::new(&path_s);
    let mtime = std::fs::metadata(path).and_then(|m| m.modified()).ok();
    let mut cache = CACHE.lock().unwrap();
    if let Some(entry) = cache.as_ref()
        && entry.mtime == mtime
    {
        return entry.compiled.clone();
    }
    let compiled = match mtime {
        None => Arc::new(CompiledMap::default()), // no file → no overrides
        Some(_) => match std::fs::read_to_string(path)
            .map_err(anyhow::Error::from)
            .and_then(|s| Ok(serde_json::from_str::<HashMap<String, String>>(&s)?))
        {
            Ok(entries) => {
                log::info!(
                    "loaded {} pronunciation override(s) from {path_s}",
                    entries.len()
                );
                Arc::new(CompiledMap::from_entries(&entries))
            }
            Err(e) => {
                log::error!("failed to load pronunciation map {path_s}: {e}");
                // Keep serving the previous map rather than regressing to
                // none mid-edit; still record the new mtime so the error
                // logs once per bad save, not once per synthesis.
                cache
                    .as_ref()
                    .map(|c| c.compiled.clone())
                    .unwrap_or_default()
            }
        },
    };
    *cache = Some(CacheEntry {
        mtime,
        compiled: compiled.clone(),
    });
    compiled
 }
 /// Rewrite configured words/abbreviations to their phonetic spellings.
 /// Call on cleaned (post-markdown-strip) text, right before synthesis.
 pub fn apply_pronunciations(text: &str) -> String {
    current_map().apply(text)
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    fn compile(pairs: &[(&str, &str)]) -> CompiledMap {
        let entries = pairs
            .iter()
            .map(|(k, v)| (k.to_string(), v.to_string()))
            .collect();
        CompiledMap::from_entries(&entries)
    }
    #[test]
    fn empty_map_is_a_noop() {
        let m = compile(&[]);
        assert_eq!(m.apply("nothing changes"), "nothing changes");
    }
    #[test]
    fn replaces_whole_words_only() {
        let m = compile(&[("cat", "kitty")]);
        assert_eq!(m.apply("the cat sat"), "the kitty sat");
        // No substring rewrites.
        assert_eq!(m.apply("the category"), "the category");
        assert_eq!(m.apply("concatenate"), "concatenate");
    }
    #[test]
    fn lowercase_keys_match_any_casing() {
        let m = compile(&[("worcester", "Wuster")]);
        assert_eq!(m.apply("Worcester is nice"), "Wuster is nice");
        assert_eq!(m.apply("in WORCESTER today"), "in Wuster today");
        assert_eq!(m.apply("worcester sauce"), "Wuster sauce");
    }
    #[test]
    fn uppercase_keys_match_case_sensitively() {
        let m = compile(&[("US", "U S")]);
        assert_eq!(m.apply("the US economy"), "the U S economy");
        // The pronoun survives.
        assert_eq!(m.apply("join us today"), "join us today");
    }
    #[test]
    fn keys_with_punctuation_work() {
        // `\b` is only asserted next to word characters, so the trailing dot
        // doesn't break matching.
        let m = compile(&[("Dr.", "Doctor"), ("blvd", "boulevard")]);
        assert_eq!(
            m.apply("Dr. Smith on Sunset blvd"),
            "Doctor Smith on Sunset boulevard"
        );
    }
    #[test]
    fn longer_keys_win_over_shorter() {
        let m = compile(&[("new york", "Noo York"), ("new york times", "the Times")]);
        assert_eq!(m.apply("read the new york times"), "read the the Times");
        assert_eq!(m.apply("visit new york soon"), "visit Noo York soon");
    }
    #[test]
    fn multiple_occurrences_all_rewrite() {
        let m = compile(&[("wsl", "W S L")]);
        assert_eq!(m.apply("WSL and wsl and Wsl"), "W S L and W S L and W S L");
    }
    #[test]
    fn replacement_text_is_verbatim() {
        // Replacements aren't re-scanned — a value containing another key
        // doesn't cascade.
        let m = compile(&[("a1", "b2"), ("b2", "c3")]);
        assert_eq!(m.apply("a1"), "b2");
    }
    #[test]
    fn blank_keys_are_ignored() {
        let m = compile(&[("", "x"), ("  ", "y"), ("ok", "fine")]);
        assert_eq!(m.apply("ok then"), "fine then");
    }
 }
@@ -20,31 +20,36 @@ impl SmsApiClient {
        }
    }
-    /// Fetch messages for a specific contact within ±4 days of the given timestamp
+    /// Compute a `[start, end]` unix-second window of `2 * radius_days`
-    /// Falls back to all contacts if no messages found for the specific contact
+    /// centered on `center_ts`. `radius_days < 1` is clamped to 1 to avoid
-    /// Messages are sorted by proximity to the center timestamp
+    /// degenerate zero-width windows.
    pub(crate) fn window_for_radius(center_ts: i64, radius_days: i64) -> (i64, i64) {
        let r = radius_days.max(1);
        let span = r * 86400;
        (center_ts - span, center_ts + span)
    }
    /// Fetch messages for a specific contact within ±`radius_days` of the
    /// given timestamp. Falls back to all contacts when no messages found
    /// for the named contact. Sorted by proximity to the center timestamp.
    pub async fn fetch_messages_for_contact(
        &self,
        contact: Option<&str>,
        center_timestamp: i64,
        radius_days: i64,
    ) -> Result<Vec<SmsMessage>> {
-        use chrono::Duration;
+        let effective_radius = radius_days.max(1);
        let (start_ts, end_ts) = Self::window_for_radius(center_timestamp, radius_days);
        // Calculate ±4 days range around the center timestamp
        let center_dt = chrono::DateTime::from_timestamp(center_timestamp, 0)
            .ok_or_else(|| anyhow::anyhow!("Invalid timestamp"))?;
        let start_dt = center_dt - Duration::days(4);
        let end_dt = center_dt + Duration::days(4);
        let start_ts = start_dt.timestamp();
        let end_ts = end_dt.timestamp();
        // If contact specified, try fetching for that contact first
        if let Some(contact_name) = contact {
            log::info!(
-                "Fetching SMS for contact: {} (±4 days from {})",
+                "Fetching SMS for contact: {} (±{} days from {})",
                contact_name,
                effective_radius,
                center_dt.format("%Y-%m-%d %H:%M:%S")
            );
            let messages = self
@@ -68,7 +73,8 @@ impl SmsApiClient {
        // Fallback to all contacts
        log::info!(
-            "Fetching all SMS messages (±4 days from {})",
+            "Fetching all SMS messages (±{} days from {})",
            effective_radius,
            center_dt.format("%Y-%m-%d %H:%M:%S")
        );
        self.fetch_messages(start_ts, end_ts, None, Some(center_timestamp))
@@ -251,23 +257,48 @@ impl SmsApiClient {
    }
    /// Search message bodies via the Django side's FTS5 / semantic / hybrid
-    /// endpoint. `mode` selects the ranking strategy:
+    /// endpoint. `params.mode` selects the ranking strategy:
    ///   - "fts5"     keyword-only, supports phrase / prefix / boolean / NEAR
    ///   - "semantic" embedding similarity
    ///   - "hybrid"   both merged via reciprocal rank fusion (recommended)
    ///
    /// All of `contact_id`, `date_from` / `date_to` (unix seconds), `is_mms`,
    /// `has_media`, and `offset` are pushed to SMS-API server-side so the
    /// filtered+paginated result set is exact rather than a client-side
    /// over-fetch.
    pub async fn search_messages(
        &self,
        query: &str,
-        mode: &str,
+        params: &SmsSearchParams<'_>,
        limit: usize,
    ) -> Result<Vec<SmsSearchHit>> {
-        let url = format!(
+        let mut url = format!(
            "{}/api/messages/search/?q={}&mode={}&limit={}",
            self.base_url,
            urlencoding::encode(query),
-            urlencoding::encode(mode),
+            urlencoding::encode(params.mode),
-            limit
+            params.limit,
        );
        if let Some(cid) = params.contact_id {
            url.push_str(&format!("&contact_id={}", cid));
        }
        if let Some(ref c) = params.contact {
            url.push_str(&format!("&contact={}", urlencoding::encode(c)));
        }
        if let Some(off) = params.offset {
            url.push_str(&format!("&offset={}", off));
        }
        if let Some(from) = params.date_from {
            url.push_str(&format!("&date_from={}", from));
        }
        if let Some(to) = params.date_to {
            url.push_str(&format!("&date_to={}", to));
        }
        if let Some(is_mms) = params.is_mms {
            url.push_str(&format!("&is_mms={}", is_mms));
        }
        if let Some(has_media) = params.has_media {
            url.push_str(&format!("&has_media={}", has_media));
        }
        let mut request = self.client.get(&url);
        if let Some(token) = &self.token {
@@ -370,6 +401,33 @@ pub struct SmsSearchHit {
    /// Present for semantic / hybrid modes; absent for fts5.
    #[serde(default)]
    pub similarity_score: Option<f32>,
    /// SMS-API-generated excerpt around the match, wrapped in `<mark>` tags.
    /// For MMS messages that only matched via attachment text / filename
    /// (empty `body`), the snippet is the only meaningful preview.
    #[serde(default)]
    pub snippet: Option<String>,
 }
 /// Optional filter / paging knobs for [`SmsApiClient::search_messages`].
 /// All fields except `mode` and `limit` map 1:1 to the same-named SMS-API
 /// query params (added in the 2026-05 search-enhancements release).
 #[derive(Debug, Clone)]
 pub struct SmsSearchParams<'a> {
    pub mode: &'a str,
    pub limit: usize,
    pub contact_id: Option<i64>,
    /// Contact name (case-insensitive). Resolved to a numeric ID by the
    /// SMS-API server when `contact_id` is not set.
    pub contact: Option<String>,
    /// Unix-seconds inclusive lower bound on `date`.
    pub date_from: Option<i64>,
    /// Unix-seconds inclusive upper bound on `date`.
    pub date_to: Option<i64>,
    /// `Some(true)` = MMS only, `Some(false)` = SMS only, `None` = both.
    pub is_mms: Option<bool>,
    /// `Some(true)` = only messages with image/video/audio attachments.
    pub has_media: Option<bool>,
    pub offset: Option<usize>,
 }
 #[derive(Deserialize)]
@@ -379,3 +437,29 @@ struct SmsSearchResponse {
    #[serde(default)]
    search_method: String,
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn window_for_radius_produces_2n_day_span() {
        let center: i64 = 1_700_000_000;
        let (start, end) = SmsApiClient::window_for_radius(center, 7);
        assert_eq!(end - start, 14 * 86400);
        assert_eq!(start + 7 * 86400, center);
        assert_eq!(end - 7 * 86400, center);
    }
    #[test]
    fn window_for_radius_clamps_zero_to_one() {
        let (start, end) = SmsApiClient::window_for_radius(100_000, 0);
        assert_eq!(end - start, 2 * 86400);
    }
    #[test]
    fn window_for_radius_clamps_negative_to_one() {
        let (start, end) = SmsApiClient::window_for_radius(100_000, -7);
        assert_eq!(end - start, 2 * 86400);
    }
 }
@@ -0,0 +1,748 @@
 use crate::ai::insight_chat::ChatStreamEvent;
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::sync::Mutex as StdMutex;
 use std::sync::atomic::{AtomicU32, Ordering};
 use std::time::Instant;
 use tokio::sync::{Mutex, Notify};
 use tokio::task::AbortHandle;
 /// Maximum number of events buffered per turn. Agentic turns typically
 /// produce ~120 events; 500 provides 4× headroom. When exceeded, oldest
 /// events are evicted from the front.
 const MAX_BUFFERED_EVENTS: usize = 500;
 /// Turn status codes used by `TurnEntry::status`.
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum TurnStatus {
    Running = 0,
    Done = 1,
    Error = 2,
    Cancelled = 3,
 }
 impl From<u32> for TurnStatus {
    fn from(v: u32) -> Self {
        match v {
            0 => TurnStatus::Running,
            1 => TurnStatus::Done,
            2 => TurnStatus::Error,
            3 => TurnStatus::Cancelled,
            _ => TurnStatus::Running,
        }
    }
 }
 impl TurnStatus {
    pub fn as_str(&self) -> &'static str {
        match self {
            TurnStatus::Running => "running",
            TurnStatus::Done => "done",
            TurnStatus::Error => "error",
            TurnStatus::Cancelled => "cancelled",
        }
    }
 }
 /// Shared metadata about a turn, read by the SSE replay handler to emit
 /// the initial `turn_info` event and to decide whether to wait for new
 /// events or close immediately.
 #[derive(Debug, Clone)]
 pub struct TurnInfo {
    pub turn_id: String,
    pub file_path: String,
    pub library_id: i32,
    pub status: TurnStatus,
    pub total_events_pushed: u32,
    pub buffered_count: u32,
 }
 /// Result of reading events at or after an absolute `skip_before` index.
 #[derive(Debug)]
 pub enum ReplayOutcome {
    /// New events are available. `next_skip` is the absolute index to pass
    /// on the next read (i.e. one past the last event returned).
    Events {
        events: Vec<ChatStreamEvent>,
        next_skip: u32,
    },
    /// The reader is caught up to the live edge — no events past `skip_before`
    /// yet. `next_skip` is the current high-water mark.
    CaughtUp { next_skip: u32 },
    /// `skip_before` points below the buffer's base index: the requested
    /// events were evicted. Maps to HTTP 410 Gone.
    Gone,
 }
 /// Per-turn state shared between the agentic loop (writer) and all SSE
 /// replay connections (readers).
 pub struct TurnEntry {
    pub turn_id: String,
    pub file_path: String,
    pub library_id: i32,
    /// Shared event buffer — multiple SSE connections can read independently.
    /// Each connection tracks its own `skip_before` offset.
    events: Mutex<Vec<ChatStreamEvent>>,
    /// Monotonic counter: total events pushed (may exceed events.len()
    /// due to eviction). Used for skip_before indexing.
    total_events_pushed: AtomicU32,
    /// The event index that this entry started with. Adjusts on eviction
    /// so that `skip_before` stays absolute across connections.
    base_index: AtomicU32,
    pub status: AtomicU32,
    /// Abort handle for the spawned agentic task, set once after spawn.
    /// Behind a std `Mutex` because the entry is shared via `Arc` and the
    /// handle is installed after the entry is already in the registry.
    abort_handle: StdMutex<Option<AbortHandle>>,
    pub created_at: Instant,
    notify: Arc<Notify>,
 }
 impl TurnEntry {
    pub fn new(turn_id: String, file_path: String, library_id: i32) -> Self {
        Self {
            turn_id,
            file_path,
            library_id,
            events: Mutex::new(Vec::new()),
            total_events_pushed: AtomicU32::new(0),
            base_index: AtomicU32::new(0),
            status: AtomicU32::new(TurnStatus::Running as u32),
            abort_handle: StdMutex::new(None),
            created_at: Instant::now(),
            notify: Arc::new(Notify::new()),
        }
    }
    /// Install the abort handle for the spawned agentic task. Called once,
    /// right after the task is spawned.
    pub fn set_abort_handle(&self, handle: AbortHandle) {
        *self.abort_handle.lock().expect("abort_handle poisoned") = Some(handle);
    }
    /// Abort the spawned agentic task, if a handle was installed. Returns
    /// `true` if a task was aborted.
    pub fn abort(&self) -> bool {
        if let Some(handle) = self
            .abort_handle
            .lock()
            .expect("abort_handle poisoned")
            .take()
        {
            handle.abort();
            true
        } else {
            false
        }
    }
    /// Push an event into the buffer. Evicts oldest events if the buffer
    /// exceeds `MAX_BUFFERED_EVENTS`. Notifies all waiting SSE connections.
    pub async fn push_event(&self, event: ChatStreamEvent) {
        {
            let mut events = self.events.lock().await;
            // Evict oldest events if we've hit the cap.
            if events.len() >= MAX_BUFFERED_EVENTS {
                // Drop the oldest event to make room and advance the base
                // index so skip_before stays absolute across connections.
                events.remove(0);
                self.base_index.fetch_add(1, Ordering::Relaxed);
            }
            events.push(event);
            // Increment while holding the buffer lock so the counter stays in
            // lock-step with the buffer even if multiple writers ever exist.
            self.total_events_pushed.fetch_add(1, Ordering::Relaxed);
        }
        self.notify.notify_waiters();
    }
    /// Get a snapshot of turn metadata for the `turn_info` SSE event.
    pub async fn info(&self) -> TurnInfo {
        let events = self.events.lock().await;
        let buffered = events.len() as u32;
        let total = self.total_events_pushed.load(Ordering::Relaxed);
        drop(events);
        TurnInfo {
            turn_id: self.turn_id.clone(),
            file_path: self.file_path.clone(),
            library_id: self.library_id,
            status: self.status.load(Ordering::Relaxed).into(),
            total_events_pushed: total,
            buffered_count: buffered,
        }
    }
    /// Set the terminal status and notify all waiters.
    pub fn set_terminal_status(&self, status: TurnStatus) {
        self.status.store(status as u32, Ordering::Relaxed);
        self.notify.notify_waiters();
    }
    /// Read buffered events at or after absolute index `skip_before` without
    /// waiting. Distinguishes "evicted" (Gone) from "caught up" (no new
    /// events yet) — the previous boolean/`Option` API conflated the two.
    pub async fn replay_from(&self, skip_before: u32) -> ReplayOutcome {
        let events = self.events.lock().await;
        let base = self.base_index.load(Ordering::Relaxed);
        // The buffer holds absolute indices [base, base + len). A request
        // below `base` asked for events that have been evicted.
        if skip_before < base {
            return ReplayOutcome::Gone;
        }
        let offset = (skip_before - base) as usize;
        let next_skip = base + events.len() as u32;
        if offset >= events.len() {
            // Caught up to (or past) the live edge — nothing new yet.
            return ReplayOutcome::CaughtUp { next_skip };
        }
        ReplayOutcome::Events {
            events: events[offset..].to_vec(),
            next_skip,
        }
    }
    /// Wait for the next batch of events past `skip_before`, the turn to
    /// finish, or eviction. Returns:
    /// - `Events` when new events are available (drained before any terminal
    ///   signal so the final `Done`/`Error` is never dropped),
    /// - `CaughtUp` only when the turn has reached a terminal status and the
    ///   reader is fully drained (the caller should close the stream),
    /// - `Gone` when `skip_before` points into evicted territory.
    pub async fn next_batch(&self, skip_before: u32) -> ReplayOutcome {
        loop {
            // Register interest BEFORE inspecting state so a push/terminal that
            // races between our read and our await can't be lost (Notify's
            // `notify_waiters` does not store a permit).
            let notified = self.notify.notified();
            tokio::pin!(notified);
            notified.as_mut().enable();
            match self.replay_from(skip_before).await {
                ReplayOutcome::CaughtUp { next_skip } => {
                    // No new events. If the turn is finished, every event
                    // (including the terminal one) has already been drained
                    // above on a prior call, so signal the caller to close.
                    if !self.is_running() {
                        return ReplayOutcome::CaughtUp { next_skip };
                    }
                    // Still running — wait for the next push or terminal.
                }
                other => return other, // Events or Gone
            }
            notified.await;
        }
    }
    /// Check if this turn is still running.
    pub fn is_running(&self) -> bool {
        self.status.load(Ordering::Relaxed) == TurnStatus::Running as u32
    }
 }
 /// In-memory registry of all active chat turns. Injected into `AppState`
 /// and shared across all handlers.
 pub struct TurnRegistry {
    entries: Mutex<HashMap<String, Arc<TurnEntry>>>,
    timeout_secs: u64,
 }
 impl TurnRegistry {
    pub fn new(timeout_secs: u64) -> Self {
        Self {
            entries: Mutex::new(HashMap::new()),
            timeout_secs,
        }
    }
    /// Returns the cleanup timeout in seconds.
    pub fn timeout_secs(&self) -> u64 {
        self.timeout_secs
    }
    /// Insert a new turn entry. Returns the turn_id.
    pub async fn insert(&self, entry: Arc<TurnEntry>) -> String {
        let turn_id = entry.turn_id.clone();
        let mut entries = self.entries.lock().await;
        entries.insert(turn_id.clone(), entry);
        turn_id
    }
    /// Look up a turn by id. Returns None if not found or expired.
    pub async fn get(&self, turn_id: &str) -> Option<Arc<TurnEntry>> {
        let entries = self.entries.lock().await;
        entries.get(turn_id).cloned()
    }
    /// Clean up stale entries older than the timeout. Returns the count of
    /// entries removed.
    pub async fn cleanup_stale(&self) -> usize {
        let mut entries = self.entries.lock().await;
        let _now = Instant::now();
        let stale: Vec<String> = entries
            .iter()
            .filter(|(_, entry)| entry.created_at.elapsed().as_secs() > self.timeout_secs)
            .map(|(id, _)| id.clone())
            .collect();
        for id in &stale {
            entries.remove(id);
        }
        if !stale.is_empty() {
            log::info!(
                "TurnRegistry: cleaned up {} stale entries (timeout={}s)",
                stale.len(),
                self.timeout_secs
            );
        }
        stale.len()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::ai::insight_chat::ChatStreamEvent;
    use std::time::Duration;
    /// Unwrap the events from a `ReplayOutcome::Events`, panicking otherwise.
    fn events_of(outcome: ReplayOutcome) -> Vec<ChatStreamEvent> {
        match outcome {
            ReplayOutcome::Events { events, .. } => events,
            other => panic!("expected Events, got {other:?}"),
        }
    }
    // ── TurnStatus ──────────────────────────────────────────────────
    #[test]
    fn turn_status_from_u32_valid_values() {
        assert_eq!(TurnStatus::from(0), TurnStatus::Running);
        assert_eq!(TurnStatus::from(1), TurnStatus::Done);
        assert_eq!(TurnStatus::from(2), TurnStatus::Error);
        assert_eq!(TurnStatus::from(3), TurnStatus::Cancelled);
    }
    #[test]
    fn turn_status_from_u32_unknown_defaults_to_running() {
        assert_eq!(TurnStatus::from(4), TurnStatus::Running);
        assert_eq!(TurnStatus::from(u32::MAX), TurnStatus::Running);
    }
    #[test]
    fn turn_status_as_str() {
        assert_eq!(TurnStatus::Running.as_str(), "running");
        assert_eq!(TurnStatus::Done.as_str(), "done");
        assert_eq!(TurnStatus::Error.as_str(), "error");
        assert_eq!(TurnStatus::Cancelled.as_str(), "cancelled");
    }
    // ── TurnEntry ───────────────────────────────────────────────────
    #[tokio::test]
    async fn turn_entry_push_and_replay() {
        let entry = Arc::new(TurnEntry::new(
            "t1".to_string(),
            "/photo.jpg".to_string(),
            1,
        ));
        entry
            .push_event(ChatStreamEvent::TextDelta("hello".to_string()))
            .await;
        entry
            .push_event(ChatStreamEvent::TextDelta(" world".to_string()))
            .await;
        let events = events_of(entry.replay_from(0).await);
        assert_eq!(events.len(), 2);
    }
    #[tokio::test]
    async fn turn_entry_replay_with_skip() {
        let entry = Arc::new(TurnEntry::new(
            "t1".to_string(),
            "/photo.jpg".to_string(),
            1,
        ));
        for i in 0..5 {
            entry
                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
                .await;
        }
        // skip_before=0 → all 5 events
        let all = events_of(entry.replay_from(0).await);
        assert_eq!(all.len(), 5);
        // skip_before=2 → events 2,3,4 (3 events)
        let skipped = events_of(entry.replay_from(2).await);
        assert_eq!(skipped.len(), 3);
        // skip_before=5 → caught up to the live edge (not Gone).
        assert!(matches!(
            entry.replay_from(5).await,
            ReplayOutcome::CaughtUp { next_skip: 5 }
        ));
    }
    #[tokio::test]
    async fn turn_entry_replay_empty_by_default() {
        let entry = Arc::new(TurnEntry::new(
            "t1".to_string(),
            "/photo.jpg".to_string(),
            1,
        ));
        // Empty buffer with skip_before=0 → caught up (nothing to replay yet).
        assert!(matches!(
            entry.replay_from(0).await,
            ReplayOutcome::CaughtUp { next_skip: 0 }
        ));
    }
    #[tokio::test]
    async fn turn_entry_is_running_initially() {
        let entry = TurnEntry::new("t1".to_string(), "/photo.jpg".to_string(), 1);
        assert!(entry.is_running());
    }
    #[tokio::test]
    async fn turn_entry_set_terminal_status() {
        let entry = Arc::new(TurnEntry::new(
            "t1".to_string(),
            "/photo.jpg".to_string(),
            1,
        ));
        assert!(entry.is_running());
        entry.set_terminal_status(TurnStatus::Done);
        assert!(!entry.is_running());
    }
    #[tokio::test]
    async fn turn_entry_info() {
        let entry = Arc::new(TurnEntry::new(
            "t1".to_string(),
            "/photo.jpg".to_string(),
            42,
        ));
        entry
            .push_event(ChatStreamEvent::TextDelta("x".to_string()))
            .await;
        entry.set_terminal_status(TurnStatus::Done);
        let info = entry.info().await;
        assert_eq!(info.turn_id, "t1");
        assert_eq!(info.file_path, "/photo.jpg");
        assert_eq!(info.library_id, 42);
        assert_eq!(info.status, TurnStatus::Done);
        assert_eq!(info.total_events_pushed, 1);
        assert_eq!(info.buffered_count, 1);
    }
    #[tokio::test]
    async fn turn_entry_eviction_caps_buffer() {
        let entry = Arc::new(TurnEntry::new(
            "t1".to_string(),
            "/photo.jpg".to_string(),
            1,
        ));
        // Push MAX_BUFFERED_EVENTS + 10 events.
        for i in 0..(MAX_BUFFERED_EVENTS + 10) {
            entry
                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
                .await;
        }
        // Asking from absolute 0 after eviction is Gone (0-9 were dropped).
        assert!(matches!(entry.replay_from(0).await, ReplayOutcome::Gone));
        // Reading from the new base (10) returns the full capped buffer.
        let events = events_of(entry.replay_from(10).await);
        assert_eq!(events.len(), MAX_BUFFERED_EVENTS);
        // First event should be at index 10 (0-9 were evicted).
        if let ChatStreamEvent::TextDelta(s) = &events[0] {
            assert_eq!(s, "e10");
        } else {
            panic!("expected TextDelta");
        }
        // Last event should be at index MAX_BUFFERED_EVENTS + 9.
        if let ChatStreamEvent::TextDelta(s) = &events[events.len() - 1] {
            assert_eq!(s, &format!("e{}", MAX_BUFFERED_EVENTS + 9));
        } else {
            panic!("expected TextDelta");
        }
    }
    #[tokio::test]
    async fn turn_entry_replay_evicted_index_is_gone() {
        let entry = Arc::new(TurnEntry::new(
            "t1".to_string(),
            "/photo.jpg".to_string(),
            1,
        ));
        // Push one past the cap so exactly one event (index 0) is evicted.
        for i in 0..=MAX_BUFFERED_EVENTS {
            entry
                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
                .await;
        }
        // Base is now 1; asking from absolute 0 is evicted territory → Gone.
        assert!(matches!(entry.replay_from(0).await, ReplayOutcome::Gone));
        // skip_before = MAX_BUFFERED_EVENTS → last event only (index valid).
        let last = events_of(entry.replay_from(MAX_BUFFERED_EVENTS as u32).await);
        assert_eq!(last.len(), 1);
        // skip_before = MAX_BUFFERED_EVENTS + 1 → caught up to the live edge.
        assert!(matches!(
            entry.replay_from((MAX_BUFFERED_EVENTS + 1) as u32).await,
            ReplayOutcome::CaughtUp { .. }
        ));
    }
    // ── TurnRegistry ────────────────────────────────────────────────
    #[tokio::test]
    async fn turn_registry_insert_and_get() {
        let registry = TurnRegistry::new(300);
        let entry = Arc::new(TurnEntry::new(
            "t1".to_string(),
            "/photo.jpg".to_string(),
            1,
        ));
        let id = registry.insert(entry).await;
        assert_eq!(id, "t1");
        let retrieved = registry.get("t1").await;
        assert!(retrieved.is_some());
        assert_eq!(retrieved.unwrap().turn_id, "t1");
    }
    #[tokio::test]
    async fn turn_registry_get_nonexistent_returns_none() {
        let registry = TurnRegistry::new(300);
        assert!(registry.get("nonexistent").await.is_none());
    }
    #[tokio::test]
    async fn turn_registry_cleanup_stale_removes_old_entries() {
        let registry = TurnRegistry::new(0);
        let mut entry = TurnEntry::new("t1".to_string(), "/photo.jpg".to_string(), 1);
        entry.created_at = Instant::now() - Duration::from_secs(1);
        registry.insert(Arc::new(entry)).await;
        let cleaned = registry.cleanup_stale().await;
        assert_eq!(cleaned, 1);
        assert!(registry.get("t1").await.is_none());
    }
    #[tokio::test]
    async fn turn_registry_cleanup_stale_preserves_recent() {
        let registry = TurnRegistry::new(3600); // 1 hour
        let entry = Arc::new(TurnEntry::new(
            "t1".to_string(),
            "/photo.jpg".to_string(),
            1,
        ));
        registry.insert(entry).await;
        let cleaned = registry.cleanup_stale().await;
        assert_eq!(cleaned, 0);
        assert!(registry.get("t1").await.is_some());
    }
    #[tokio::test]
    async fn turn_registry_cleanup_stale_multiple() {
        let registry = TurnRegistry::new(0);
        for i in 0..5 {
            let mut entry = TurnEntry::new(format!("t{i}"), "/photo.jpg".to_string(), 1);
            entry.created_at = Instant::now() - Duration::from_secs(1);
            registry.insert(Arc::new(entry)).await;
        }
        let cleaned = registry.cleanup_stale().await;
        assert_eq!(cleaned, 5);
    }
    #[tokio::test]
    async fn turn_registry_timeout_secs() {
        let registry = TurnRegistry::new(600);
        assert_eq!(registry.timeout_secs(), 600);
    }
    // ── next_batch / live replay ────────────────────────────────────
    /// Drain a turn the way the SSE replay handler does: pull batches via
    /// `next_batch` until the turn is finished and fully drained.
    async fn drain_to_end(entry: Arc<TurnEntry>) -> Vec<ChatStreamEvent> {
        let mut out = Vec::new();
        let mut skip = 0u32;
        while let ReplayOutcome::Events { events, next_skip } = entry.next_batch(skip).await {
            out.extend(events);
            skip = next_skip;
        }
        out
    }
    fn is_terminal(ev: &ChatStreamEvent) -> bool {
        matches!(ev, ChatStreamEvent::Done { .. } | ChatStreamEvent::Error(_))
    }
    /// The core guarantee behind the replay rewrite: a reader waiting on
    /// `next_batch` always receives the terminal event, even though the
    /// writer flips status to terminal immediately after pushing it.
    #[tokio::test]
    async fn next_batch_always_delivers_terminal_event() {
        for _ in 0..50 {
            let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
            let writer = entry.clone();
            let w = tokio::spawn(async move {
                writer
                    .push_event(ChatStreamEvent::IterationStart { n: 1, max: 6 })
                    .await;
                writer
                    .push_event(ChatStreamEvent::TextDelta("hi".into()))
                    .await;
                // Push terminal then flip status with no await between — the
                // race that previously dropped the Done on the reader side.
                writer
                    .push_event(ChatStreamEvent::Done {
                        tool_calls_made: 0,
                        iterations_used: 1,
                        truncated: false,
                        prompt_tokens: None,
                        eval_tokens: None,
                        num_ctx: None,
                        amended_insight_id: None,
                        backend_used: "local".into(),
                        model_used: "m".into(),
                        cancelled: false,
                    })
                    .await;
                writer.set_terminal_status(TurnStatus::Done);
            });
            let events = drain_to_end(entry).await;
            w.await.unwrap();
            assert!(
                events.last().is_some_and(is_terminal),
                "terminal event missing; got {} events",
                events.len()
            );
            assert_eq!(events.len(), 3, "expected IterationStart, TextDelta, Done");
        }
    }
    /// A reader that connects before any event is pushed blocks in
    /// `next_batch` and then receives events as the writer produces them.
    #[tokio::test]
    async fn next_batch_waits_for_late_events() {
        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
        let writer = entry.clone();
        tokio::spawn(async move {
            tokio::task::yield_now().await;
            writer
                .push_event(ChatStreamEvent::TextDelta("late".into()))
                .await;
            writer.set_terminal_status(TurnStatus::Done);
        });
        // First call blocks until the writer pushes, rather than returning
        // CaughtUp on the empty buffer of a running turn.
        match entry.next_batch(0).await {
            ReplayOutcome::Events { events, next_skip } => {
                assert_eq!(events.len(), 1);
                assert_eq!(next_skip, 1);
            }
            other => panic!("expected Events, got {other:?}"),
        }
    }
    #[tokio::test]
    async fn next_batch_closes_on_terminal_when_caught_up() {
        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
        entry
            .push_event(ChatStreamEvent::TextDelta("x".into()))
            .await;
        entry.set_terminal_status(TurnStatus::Done);
        // Caught up (skip past the one buffered event) on a finished turn →
        // CaughtUp so the handler closes the stream rather than hanging.
        assert!(matches!(
            entry.next_batch(1).await,
            ReplayOutcome::CaughtUp { .. }
        ));
    }
    #[tokio::test]
    async fn next_batch_reports_gone_for_evicted_index() {
        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
        for i in 0..=MAX_BUFFERED_EVENTS {
            entry
                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
                .await;
        }
        // Index 0 was evicted (base advanced to 1).
        assert!(matches!(entry.next_batch(0).await, ReplayOutcome::Gone));
    }
    // ── abort handle (#1 cancellation) ──────────────────────────────
    #[tokio::test]
    async fn abort_handle_aborts_task_once() {
        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
        // No handle installed yet → abort is a no-op.
        assert!(!entry.abort());
        let handle = tokio::spawn(async {
            // Long-lived task that only ends via abort.
            futures::future::pending::<()>().await;
        });
        entry.set_abort_handle(handle.abort_handle());
        assert!(entry.abort(), "first abort should fire");
        assert!(!entry.abort(), "handle is taken; second abort is a no-op");
        // The aborted task resolves to a cancellation JoinError.
        let join = handle.await;
        assert!(join.unwrap_err().is_cancelled());
    }
    #[tokio::test]
    async fn base_index_tracks_eviction() {
        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
        for i in 0..(MAX_BUFFERED_EVENTS + 5) {
            entry
                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
                .await;
        }
        let info = entry.info().await;
        // 5 events evicted; total keeps climbing, buffer stays capped.
        assert_eq!(info.total_events_pushed, (MAX_BUFFERED_EVENTS + 5) as u32);
        assert_eq!(info.buffered_count, MAX_BUFFERED_EVENTS as u32);
        // First live index is 5: reading from there yields the full buffer.
        let from_base = events_of(entry.replay_from(5).await);
        assert_eq!(from_base.len(), MAX_BUFFERED_EVENTS);
    }
 }
@@ -0,0 +1,796 @@
 //! Per-tick drains the watcher runs alongside ingest.
 //!
 //! These passes were previously inlined in `main.rs`; they exist because
 //! a quick scan only walks recently-modified files, so any backlog of
 //! rows missing a `content_hash` / `date_taken` / face detection
 //! wouldn't otherwise drain except during the once-an-hour full scan.
 //! Each function is bounded per call by a `*_PER_TICK` env-var cap.
 use std::collections::HashMap;
 use std::path::PathBuf;
 use std::sync::{Arc, Mutex};
 use log::{debug, info, warn};
 use crate::content_hash;
 use crate::database::ExifDao;
 use crate::date_resolver;
 use crate::face_watch;
 use crate::faces;
 use crate::file_types;
 use crate::libraries;
 use crate::tags;
 /// Compute and persist content_hash for image_exif rows where it's NULL.
 ///
 /// Bounded per call by `FACE_HASH_BACKFILL_MAX_PER_TICK` (default 2000)
 /// so a watcher tick on a large legacy library doesn't block for hours
 /// blake3-ing every photo at once. Subsequent scans pick up the rest.
 /// For 50k+ libraries the dedicated `cargo run --bin backfill_hashes`
 /// is still faster (it doesn't fight a watcher loop for the DAO mutex).
 ///
 /// Drains unhashed image_exif rows by querying them directly, independent
 /// of the filesystem walk. Quick scans only walk recently-modified files,
 /// so a backlog of pre-existing unhashed rows never enters
 /// `process_new_files`'s candidate set — left alone, it would only drain
 /// on full scans (default once an hour). Calling this every tick keeps
 /// the face-detection backlog moving regardless.
 ///
 /// Returns the number of rows successfully backfilled this pass.
 pub fn backfill_unhashed_backlog(
    context: &opentelemetry::Context,
    library: &libraries::Library,
    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
 ) -> usize {
    let cap: i64 = dotenv::var("FACE_HASH_BACKFILL_MAX_PER_TICK")
        .ok()
        .and_then(|s| s.parse().ok())
        .filter(|n: &i64| *n > 0)
        .unwrap_or(2000);
    // Fetch up to cap+1 rows so we can tell "more remain" without a
    // separate count query. Across libraries — there's no per-library
    // filter on get_rows_missing_hash today — but we only ever update
    // rows whose library_id matches the caller's library, so other
    // libraries' rows just get skipped here and picked up on the next
    // library's tick. Negligible cost given the cap.
    let rows: Vec<(i32, String)> = {
        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
        dao.get_rows_missing_hash(context, cap + 1)
            .unwrap_or_default()
    };
    if rows.is_empty() {
        return 0;
    }
    let more_than_cap = rows.len() as i64 > cap;
    let base_path = std::path::Path::new(&library.root_path);
    let mut backfilled = 0usize;
    let mut errors = 0usize;
    let mut skipped_other_lib = 0usize;
    for (lib_id, rel_path) in rows.iter().take(cap as usize) {
        if *lib_id != library.id {
            skipped_other_lib += 1;
            continue;
        }
        let abs = base_path.join(rel_path);
        if !abs.exists() {
            // File walked away — the watcher's reconciliation pass will
            // remove the orphan exif row eventually.
            continue;
        }
        match content_hash::compute(&abs) {
            Ok(id) => {
                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
                if let Err(e) = dao.backfill_content_hash(
                    context,
                    library.id,
                    rel_path,
                    &id.content_hash,
                    id.size_bytes,
                ) {
                    warn!(
                        "face_watch: backfill_content_hash failed for {}: {:?}",
                        rel_path, e
                    );
                    errors += 1;
                } else {
                    backfilled += 1;
                }
            }
            Err(e) => {
                debug!(
                    "face_watch: hash compute failed for {} ({:?})",
                    abs.display(),
                    e
                );
                errors += 1;
            }
        }
    }
    if backfilled > 0 || errors > 0 || more_than_cap {
        info!(
            "face_watch: backfill pass for library '{}': hashed {} ({} error(s), {} skipped to other libraries; {} cap, more_remain={})",
            library.name, backfilled, errors, skipped_other_lib, cap, more_than_cap
        );
    }
    backfilled
 }
 /// Drain image_exif rows whose `date_taken` was never resolved or was
 /// resolved by the weakest fallback (`fs_time`). Runs the canonical-date
 /// waterfall — exiftool batch (one subprocess for the whole tick's
 /// rows) → filename regex → earliest_fs_time — and persists each
 /// resolution with its source tag. Capped per tick by
 /// `DATE_BACKFILL_MAX_PER_TICK` (default 500) so a 14k-row library
 /// drains over a few quick-scan ticks without blocking the watcher.
 ///
 /// kamadak-exif is intentionally skipped here: the row already has a
 /// NULL date_taken because the ingest path's kamadak-exif call returned
 /// nothing, and re-running it would just produce the same answer.
 /// exiftool is the meaningful new attempt — it handles videos and
 /// MakerNote-hosted dates kamadak can't reach.
 pub fn backfill_missing_date_taken(
    context: &opentelemetry::Context,
    library: &libraries::Library,
    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
 ) -> usize {
    let cap: i64 = dotenv::var("DATE_BACKFILL_MAX_PER_TICK")
        .ok()
        .and_then(|s| s.parse().ok())
        .filter(|n: &i64| *n > 0)
        .unwrap_or(500);
    let rows: Vec<(i32, String)> = {
        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
        dao.get_rows_needing_date_backfill(context, library.id, cap + 1)
            .unwrap_or_default()
    };
    if rows.is_empty() {
        return 0;
    }
    let more_than_cap = rows.len() as i64 > cap;
    let base_path = std::path::Path::new(&library.root_path);
    // Build absolute paths and drop rows whose files no longer exist —
    // the missing-file scan in library_maintenance retires deleted rows
    // separately. Without this filter, NULL-date rows for missing files
    // would loop through the drain forever (no source can resolve them).
    let mut existing: Vec<(String, PathBuf)> = Vec::with_capacity(rows.len());
    for (_, rel_path) in rows.iter().take(cap as usize) {
        let abs = base_path.join(rel_path);
        if abs.exists() {
            existing.push((rel_path.clone(), abs));
        }
    }
    if existing.is_empty() {
        return 0;
    }
    // One exiftool subprocess for the whole batch; the resolver falls
    // through to filename / fs_time per file when exiftool can't supply
    // a date (or isn't installed at all).
    let paths: Vec<PathBuf> = existing.iter().map(|(_, p)| p.clone()).collect();
    let resolved = date_resolver::resolve_dates_batch(&paths, &HashMap::new());
    let mut backfilled = 0usize;
    let mut unresolved = 0usize;
    let mut by_source: HashMap<&'static str, usize> = HashMap::new();
    {
        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
        for (rel_path, abs) in &existing {
            let Some(rd) = resolved.get(abs).copied() else {
                unresolved += 1;
                continue;
            };
            match dao.backfill_date_taken(
                context,
                library.id,
                rel_path,
                rd.timestamp,
                rd.source.as_str(),
            ) {
                Ok(()) => {
                    backfilled += 1;
                    *by_source.entry(rd.source.as_str()).or_insert(0) += 1;
                }
                Err(e) => {
                    warn!(
                        "date_backfill: update failed for lib {} {}: {:?}",
                        library.id, rel_path, e
                    );
                }
            }
        }
    }
    if backfilled > 0 || unresolved > 0 || more_than_cap {
        info!(
            "date_backfill: library '{}': resolved {} ({:?}), {} unresolved, cap={}, more_remain={}",
            library.name, backfilled, by_source, unresolved, cap, more_than_cap
        );
    }
    backfilled
 }
 /// Per-tick face-detection drain. Pulls a capped batch of hashed-but-
 /// unscanned image_exif rows directly via the FaceDao anti-join and
 /// hands them to the existing detection pass. Runs on every tick (not
 /// just full scans) so the backlog moves at quick-scan cadence.
 /// Per-tick CLIP encoding drain. Mirrors `process_face_backlog`: pull
 /// up to `CLIP_BACKLOG_MAX_PER_TICK` candidates with a known
 /// `content_hash` but no `clip_embedding`, hand them to
 /// `clip_watch::run_clip_encoding_pass` for parallel fan-out, and let
 /// that module write the result back via `backfill_clip_embedding`.
 ///
 /// Idempotent — a row stays in the candidate set until its embedding
 /// lands, so a transient failure (Apollo unreachable, CUDA OOM) just
 /// defers to the next tick. Permanent failures (un-decodable bytes)
 /// retry every tick at this point; future Branch may add a status
 /// column like face_detections has.
 pub fn process_clip_backlog(
    context: &opentelemetry::Context,
    library: &libraries::Library,
    clip_client: &crate::ai::clip_client::ClipClient,
    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
    excluded_dirs: &[String],
 ) {
    if !clip_client.is_enabled() {
        return;
    }
    let cap: i64 = dotenv::var("CLIP_BACKLOG_MAX_PER_TICK")
        .ok()
        .and_then(|s| s.parse().ok())
        .filter(|n: &i64| *n > 0)
        .unwrap_or(32);
    let rows: Vec<(String, String)> = {
        let mut dao = exif_dao.lock().expect("exif dao");
        match dao.list_clip_unencoded_candidates(context, library.id, cap) {
            Ok(r) => r,
            Err(e) => {
                warn!(
                    "clip_watch: list_clip_unencoded_candidates failed for library '{}': {:?}",
                    library.name, e
                );
                return;
            }
        }
    };
    if rows.is_empty() {
        return;
    }
    info!(
        "clip_watch: backlog drain — encoding {} candidate(s) for library '{}' (cap={})",
        rows.len(),
        library.name,
        cap
    );
    let candidates: Vec<crate::clip_watch::ClipCandidate> = rows
        .into_iter()
        .map(
            |(rel_path, content_hash)| crate::clip_watch::ClipCandidate {
                rel_path,
                content_hash,
            },
        )
        .collect();
    crate::clip_watch::run_clip_encoding_pass(
        library,
        excluded_dirs,
        clip_client,
        Arc::clone(exif_dao),
        candidates,
    );
 }
 pub fn process_face_backlog(
    context: &opentelemetry::Context,
    library: &libraries::Library,
    face_client: &crate::ai::face_client::FaceClient,
    face_dao: &Arc<Mutex<Box<dyn faces::FaceDao>>>,
    tag_dao: &Arc<Mutex<Box<dyn tags::TagDao>>>,
    excluded_dirs: &[String],
 ) {
    let cap: i64 = dotenv::var("FACE_BACKLOG_MAX_PER_TICK")
        .ok()
        .and_then(|s| s.parse().ok())
        .filter(|n: &i64| *n > 0)
        .unwrap_or(64);
    let rows: Vec<(String, String)> = {
        let mut dao = face_dao.lock().expect("face dao");
        match dao.list_unscanned_candidates(context, library.id, cap) {
            Ok(r) => r,
            Err(e) => {
                warn!(
                    "face_watch: list_unscanned_candidates failed for library '{}': {:?}",
                    library.name, e
                );
                return;
            }
        }
    };
    if rows.is_empty() {
        return;
    }
    info!(
        "face_watch: backlog drain — running detection on {} candidate(s) for library '{}' (cap={})",
        rows.len(),
        library.name,
        cap
    );
    let candidates: Vec<face_watch::FaceCandidate> = rows
        .into_iter()
        .map(|(rel_path, content_hash)| face_watch::FaceCandidate {
            rel_path,
            content_hash,
        })
        .collect();
    face_watch::run_face_detection_pass(
        library,
        excluded_dirs,
        face_client,
        Arc::clone(face_dao),
        Arc::clone(tag_dao),
        candidates,
    );
 }
 /// Compute content_hash for any image rows the walker just touched
 /// whose stored EXIF row is still hash-less. Called from
 /// `process_new_files` so freshly-ingested files don't have to wait for
 /// the next standalone `backfill_unhashed_backlog` tick before face
 /// detection can key on their bytes.
 ///
 /// Cap is on **successes only**. An earlier version counted errors too,
 /// so a pocket of chronically-unhashable files at the front of the
 /// table (vanished mid-scan, permission denied, etc.) burned the budget
 /// every tick and the rest of the backlog never advanced.
 pub fn backfill_missing_content_hashes(
    context: &opentelemetry::Context,
    files: &[(PathBuf, String)],
    library: &libraries::Library,
    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
 ) {
    let image_paths: Vec<String> = files
        .iter()
        .filter(|(p, _)| !file_types::is_video_file(p))
        .map(|(_, rel)| rel.clone())
        .collect();
    if image_paths.is_empty() {
        return;
    }
    let exif_records = {
        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
        dao.get_exif_batch(context, Some(library.id), &image_paths)
            .unwrap_or_default()
    };
    // Cheap lookup back from rel_path → absolute file_path so
    // content_hash::compute can read the bytes.
    let path_by_rel: HashMap<String, &PathBuf> =
        files.iter().map(|(p, rel)| (rel.clone(), p)).collect();
    let cap: usize = dotenv::var("FACE_HASH_BACKFILL_MAX_PER_TICK")
        .ok()
        .and_then(|s| s.parse().ok())
        .filter(|n: &usize| *n > 0)
        .unwrap_or(2000);
    // Count the unhashed backlog up front so we can surface "still needs
    // backfill: N" in the log — without it, a face-scan that's stuck at
    // 44% looks stalled when really it's chipping through hashes.
    let unhashed_total = exif_records
        .iter()
        .filter(|r| r.content_hash.is_none())
        .count();
    let mut backfilled = 0usize;
    let mut errors = 0usize;
    for record in &exif_records {
        if backfilled >= cap {
            break;
        }
        if record.content_hash.is_some() {
            continue;
        }
        let Some(file_path) = path_by_rel.get(&record.file_path) else {
            // Walked file went missing between the directory scan and now;
            // next tick will retry naturally.
            continue;
        };
        match content_hash::compute(file_path) {
            Ok(id) => {
                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
                if let Err(e) = dao.backfill_content_hash(
                    context,
                    library.id,
                    &record.file_path,
                    &id.content_hash,
                    id.size_bytes,
                ) {
                    warn!(
                        "face_watch: backfill_content_hash failed for {}: {:?}",
                        record.file_path, e
                    );
                    errors += 1;
                } else {
                    backfilled += 1;
                }
            }
            Err(e) => {
                debug!(
                    "face_watch: hash compute failed for {} ({:?})",
                    file_path.display(),
                    e
                );
                errors += 1;
            }
        }
    }
    // Always log when there's an unhashed backlog so an operator
    // looking at "scan stuck at 44%" can see backfill is running and
    // how much remains. Quiet only when there's nothing to do.
    if unhashed_total > 0 || backfilled > 0 || errors > 0 {
        let remaining = unhashed_total.saturating_sub(backfilled);
        info!(
            "face_watch: backfilled {}/{} content_hash for library '{}' ({} error(s); {} still need backfill; cap={})",
            backfilled, unhashed_total, library.name, errors, remaining, cap
        );
    }
 }
 /// Build the face-detection candidate list for a scan tick.
 ///
 /// Returns `(rel_path, content_hash)` for every image file that has a
 /// content_hash recorded in image_exif but no row in face_detections
 /// yet. Re-querying image_exif here picks up rows the EXIF write loop
 /// just inserted alongside any pre-existing rows the watcher walked
 /// over — covers both new uploads and the initial backlog scan.
 pub fn build_face_candidates(
    context: &opentelemetry::Context,
    library: &libraries::Library,
    files: &[(PathBuf, String)],
    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
    face_dao: &Arc<Mutex<Box<dyn faces::FaceDao>>>,
 ) -> Vec<face_watch::FaceCandidate> {
    // Restrict to image files; videos aren't face-scanned in v1 (kamadak
    // doesn't even register them in image_exif).
    let image_paths: Vec<String> = files
        .iter()
        .filter(|(p, _)| !file_types::is_video_file(p))
        .map(|(_, rel)| rel.clone())
        .collect();
    if image_paths.is_empty() {
        return Vec::new();
    }
    let exif_records = {
        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
        dao.get_exif_batch(context, Some(library.id), &image_paths)
            .unwrap_or_default()
    };
    // rel_path → content_hash (only rows with a hash; without one we have
    // nothing to key face data against).
    let mut hash_by_path: HashMap<String, String> = HashMap::with_capacity(exif_records.len());
    for record in exif_records {
        if let Some(h) = record.content_hash {
            hash_by_path.insert(record.file_path, h);
        }
    }
    let mut candidates = Vec::new();
    let mut dao = face_dao.lock().expect("face dao");
    for rel_path in image_paths {
        let Some(hash) = hash_by_path.get(&rel_path) else {
            continue;
        };
        match dao.already_scanned(context, hash) {
            Ok(true) => continue,
            Ok(false) => candidates.push(face_watch::FaceCandidate {
                rel_path,
                content_hash: hash.clone(),
            }),
            Err(e) => {
                warn!("face_watch: already_scanned errored for {}: {:?}", hash, e);
            }
        }
    }
    candidates
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::fs;
    use std::sync::{Arc, Mutex};
    use diesel::prelude::*;
    use tempfile::TempDir;
    use crate::database::models::{InsertImageExif, InsertLibrary};
    use crate::database::test::in_memory_db_connection;
    use crate::database::{ExifDao, SqliteExifDao, schema};
    use crate::faces::{FaceDao, SqliteFaceDao};
    use crate::libraries::Library;
    fn ctx() -> opentelemetry::Context {
        opentelemetry::Context::new()
    }
    /// Everything `setup` hands back to a test: tempdir, library, shared
    /// connection, and the two DAOs. Aliased to keep clippy's
    /// type-complexity lint satisfied.
    type SetupFixture = (
        TempDir,
        Library,
        Arc<Mutex<diesel::SqliteConnection>>,
        Arc<Mutex<Box<dyn ExifDao>>>,
        Arc<Mutex<Box<dyn FaceDao>>>,
    );
    /// Build a tempdir-backed library + DAOs sharing a single in-memory
    /// SQLite connection (so cross-table joins like
    /// `list_unscanned_candidates` see consistent state).
    fn setup() -> SetupFixture {
        let tmp = TempDir::new().expect("tempdir");
        let mut conn = in_memory_db_connection();
        // Migration seeds library id=1 with a placeholder root; rewrite it
        // to point at the tempdir so `<root>/<rel_path>` resolves to real
        // files this test creates.
        diesel::update(schema::libraries::table.filter(schema::libraries::id.eq(1)))
            .set(schema::libraries::root_path.eq(tmp.path().to_string_lossy().to_string()))
            .execute(&mut conn)
            .expect("rewrite library 1 root");
        // Add a second library so cross-library skip cases have somewhere
        // to put their rows.
        diesel::insert_into(schema::libraries::table)
            .values(InsertLibrary {
                name: "other",
                root_path: "/tmp/other-test-lib",
                created_at: 0,
                enabled: true,
                excluded_dirs: None,
            })
            .execute(&mut conn)
            .expect("seed second library");
        let library = Library {
            id: 1,
            name: "main".to_string(),
            root_path: tmp.path().to_string_lossy().to_string(),
            enabled: true,
            excluded_dirs: Vec::new(),
        };
        let shared = Arc::new(Mutex::new(conn));
        let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(
            SqliteExifDao::from_shared(Arc::clone(&shared)),
        )));
        let face_dao: Arc<Mutex<Box<dyn FaceDao>>> = Arc::new(Mutex::new(Box::new(
            SqliteFaceDao::from_connection(Arc::clone(&shared)),
        )));
        (tmp, library, shared, exif_dao, face_dao)
    }
    fn insert_exif(
        exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
        lib_id: i32,
        rel: &str,
        content_hash: Option<&str>,
    ) {
        let mut dao = exif_dao.lock().unwrap();
        dao.store_exif(
            &ctx(),
            InsertImageExif {
                library_id: lib_id,
                file_path: rel.to_string(),
                camera_make: None,
                camera_model: None,
                lens_model: None,
                width: None,
                height: None,
                orientation: None,
                gps_latitude: None,
                gps_longitude: None,
                gps_altitude: None,
                focal_length: None,
                aperture: None,
                shutter_speed: None,
                iso: None,
                date_taken: None,
                created_time: 0,
                last_modified: 0,
                content_hash: content_hash.map(|s| s.to_string()),
                size_bytes: None,
                phash_64: None,
                dhash_64: None,
                date_taken_source: None,
            },
        )
        .expect("insert");
    }
    fn write_image(root: &std::path::Path, rel: &str, bytes: &[u8]) {
        let abs = root.join(rel);
        if let Some(parent) = abs.parent() {
            fs::create_dir_all(parent).expect("mkdir");
        }
        fs::write(abs, bytes).expect("write file");
    }
    #[test]
    fn backfill_unhashed_backlog_hashes_missing_rows_in_this_library() {
        let (tmp, library, _conn, exif_dao, _face_dao) = setup();
        write_image(tmp.path(), "a.jpg", b"alpha-bytes");
        write_image(tmp.path(), "b.jpg", b"bravo-bytes");
        insert_exif(&exif_dao, 1, "a.jpg", None);
        insert_exif(&exif_dao, 1, "b.jpg", None);
        let backfilled = backfill_unhashed_backlog(&ctx(), &library, &exif_dao);
        assert_eq!(backfilled, 2);
        let mut dao = exif_dao.lock().unwrap();
        let rows = dao
            .get_exif_batch(&ctx(), Some(1), &["a.jpg".to_string(), "b.jpg".to_string()])
            .unwrap();
        assert_eq!(rows.len(), 2);
        for r in rows {
            assert!(
                r.content_hash.is_some(),
                "row {} should have a hash",
                r.file_path
            );
        }
    }
    #[test]
    fn backfill_unhashed_backlog_skips_other_libraries_and_missing_files() {
        let (tmp, library, _conn, exif_dao, _face_dao) = setup();
        write_image(tmp.path(), "exists.jpg", b"hello");
        // Row for this library whose file is missing on disk:
        insert_exif(&exif_dao, 1, "ghost.jpg", None);
        insert_exif(&exif_dao, 1, "exists.jpg", None);
        // Row in the other library — must be skipped (different lib_id).
        insert_exif(&exif_dao, 2, "other.jpg", None);
        let backfilled = backfill_unhashed_backlog(&ctx(), &library, &exif_dao);
        assert_eq!(backfilled, 1, "only the existing in-library file hashes");
        let mut dao = exif_dao.lock().unwrap();
        let other = dao
            .get_exif_batch(&ctx(), Some(2), &["other.jpg".to_string()])
            .unwrap();
        assert_eq!(other.len(), 1);
        assert!(
            other[0].content_hash.is_none(),
            "other-library row must remain unhashed"
        );
        let ghost = dao
            .get_exif_batch(&ctx(), Some(1), &["ghost.jpg".to_string()])
            .unwrap();
        assert_eq!(ghost.len(), 1);
        assert!(
            ghost[0].content_hash.is_none(),
            "missing-on-disk row stays unhashed (reconciliation removes it later)"
        );
    }
    #[test]
    fn backfill_unhashed_backlog_respects_per_tick_cap() {
        // Env-var-driven cap; the function reads it on every call, so we
        // can set it just for this test and unset before returning.
        // Serial guard: tests in the same binary may share env, but each
        // backfill call re-reads — and we only care that the cap shape
        // (success count <= cap, more_remain logged) holds.
        unsafe {
            std::env::set_var("FACE_HASH_BACKFILL_MAX_PER_TICK", "2");
        }
        let (tmp, library, _conn, exif_dao, _face_dao) = setup();
        for i in 0..5 {
            let rel = format!("img_{}.jpg", i);
            write_image(tmp.path(), &rel, format!("bytes-{}", i).as_bytes());
            insert_exif(&exif_dao, 1, &rel, None);
        }
        let backfilled = backfill_unhashed_backlog(&ctx(), &library, &exif_dao);
        assert_eq!(backfilled, 2, "cap=2 must bound the per-tick successes");
        unsafe {
            std::env::remove_var("FACE_HASH_BACKFILL_MAX_PER_TICK");
        }
    }
    #[test]
    fn backfill_missing_content_hashes_skips_videos_and_hashed_rows() {
        let (tmp, library, _conn, exif_dao, _face_dao) = setup();
        // Two image rows (one already hashed, one not), one video.
        write_image(tmp.path(), "fresh.jpg", b"fresh-pixels");
        write_image(tmp.path(), "already.jpg", b"already-pixels");
        write_image(tmp.path(), "clip.mp4", b"video-bytes");
        insert_exif(&exif_dao, 1, "fresh.jpg", None);
        insert_exif(&exif_dao, 1, "already.jpg", Some("pre-existing-hash"));
        insert_exif(&exif_dao, 1, "clip.mp4", None);
        let files: Vec<(PathBuf, String)> = vec![
            (tmp.path().join("fresh.jpg"), "fresh.jpg".to_string()),
            (tmp.path().join("already.jpg"), "already.jpg".to_string()),
            (tmp.path().join("clip.mp4"), "clip.mp4".to_string()),
        ];
        backfill_missing_content_hashes(&ctx(), &files, &library, &exif_dao);
        let mut dao = exif_dao.lock().unwrap();
        let rows = dao
            .get_exif_batch(
                &ctx(),
                Some(1),
                &[
                    "fresh.jpg".to_string(),
                    "already.jpg".to_string(),
                    "clip.mp4".to_string(),
                ],
            )
            .unwrap();
        let by_path: HashMap<String, Option<String>> = rows
            .into_iter()
            .map(|r| (r.file_path, r.content_hash))
            .collect();
        assert!(
            by_path["fresh.jpg"].is_some(),
            "fresh image must get a hash"
        );
        assert_eq!(
            by_path["already.jpg"].as_deref(),
            Some("pre-existing-hash"),
            "already-hashed image left untouched"
        );
        assert!(
            by_path["clip.mp4"].is_none(),
            "video skipped (not face-scanned, no hash needed via this path)"
        );
    }
    #[test]
    fn build_face_candidates_filters_videos_unhashed_and_already_scanned() {
        let (tmp, library, _conn, exif_dao, face_dao) = setup();
        // Seed image_exif with: hashed unscanned, hashed scanned, unhashed,
        // and a video. Files don't need to exist on disk — the function
        // doesn't read them, only the DB rows.
        insert_exif(&exif_dao, 1, "fresh.jpg", Some("hash-fresh"));
        insert_exif(&exif_dao, 1, "scanned.jpg", Some("hash-scanned"));
        insert_exif(&exif_dao, 1, "unhashed.jpg", None);
        insert_exif(&exif_dao, 1, "clip.mp4", Some("hash-video"));
        // Mark `scanned.jpg`'s hash as already detected.
        {
            let mut dao = face_dao.lock().unwrap();
            dao.mark_status(&ctx(), 1, "hash-scanned", "scanned.jpg", "no_faces", "test")
                .expect("mark scanned");
        }
        let files: Vec<(PathBuf, String)> = vec![
            (tmp.path().join("fresh.jpg"), "fresh.jpg".to_string()),
            (tmp.path().join("scanned.jpg"), "scanned.jpg".to_string()),
            (tmp.path().join("unhashed.jpg"), "unhashed.jpg".to_string()),
            (tmp.path().join("clip.mp4"), "clip.mp4".to_string()),
        ];
        let candidates = build_face_candidates(&ctx(), &library, &files, &exif_dao, &face_dao);
        assert_eq!(
            candidates.len(),
            1,
            "exactly fresh.jpg should be a candidate"
        );
        assert_eq!(candidates[0].rel_path, "fresh.jpg");
        assert_eq!(candidates[0].content_hash, "hash-fresh");
    }
 }
@@ -0,0 +1,243 @@
 //! Backfill `image_exif.phash_64` + `dhash_64` for image rows that
 //! were ingested before perceptual hashing was wired into the watcher.
 //!
 //! The watcher computes perceptual hashes for new images as they're
 //! ingested, so this binary is a one-shot for the historical backlog.
 //! Idempotent — only rows with a non-null content_hash and a null
 //! phash are processed, so re-runs are safe and pick up where they
 //! left off (e.g. after a crash or interrupt).
 //!
 //! Image-only by design: `get_rows_missing_perceptual_hash` filters by
 //! file extension at the DB layer so videos and other non-decodable
 //! media are skipped without round-tripping `image_hasher`. Files that
 //! can't be opened (missing on disk, permission errors) are quietly
 //! left as null and counted as "missing"; on next run, if the file is
 //! restored, the row will surface again.
 use std::path::Path;
 use std::sync::{Arc, Mutex};
 use std::time::Instant;
 use clap::Parser;
 use log::{error, warn};
 use rayon::prelude::*;
 use image_api::bin_progress;
 use image_api::database::{ExifDao, SqliteExifDao, connect};
 use image_api::libraries::{self, Library};
 use image_api::perceptual_hash;
 #[derive(Parser, Debug)]
 #[command(name = "backfill_perceptual_hash")]
 #[command(about = "Compute pHash + dHash for image_exif rows missing one")]
 struct Args {
    /// Max rows to hash per batch. The process loops until no rows remain.
    #[arg(long, default_value_t = 256)]
    batch_size: i64,
    /// Rayon parallelism override. 0 uses the default thread pool size.
    #[arg(long, default_value_t = 0)]
    parallelism: usize,
    /// Dry-run: log what would be hashed without writing to the DB.
    #[arg(long)]
    dry_run: bool,
 }
 fn main() -> anyhow::Result<()> {
    env_logger::init();
    dotenv::dotenv().ok();
    let args = Args::parse();
    if args.parallelism > 0 {
        rayon::ThreadPoolBuilder::new()
            .num_threads(args.parallelism)
            .build_global()
            .expect("Unable to configure rayon thread pool");
    }
    let base_path = dotenv::var("BASE_PATH").ok();
    let mut seed_conn = connect();
    if let Some(base) = base_path.as_deref() {
        libraries::seed_or_patch_from_env(&mut seed_conn, base);
    }
    let libs = libraries::load_all(&mut seed_conn);
    drop(seed_conn);
    if libs.is_empty() {
        anyhow::bail!("No libraries configured; cannot backfill perceptual hashes");
    }
    let libs_by_id: std::collections::HashMap<i32, Library> =
        libs.into_iter().map(|lib| (lib.id, lib)).collect();
    println!(
        "Configured libraries: {}",
        libs_by_id
            .values()
            .map(|l| format!("{} -> {}", l.name, l.root_path))
            .collect::<Vec<_>>()
            .join(", ")
    );
    let dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
    let ctx = opentelemetry::Context::new();
    let mut total_hashed = 0u64;
    let mut total_missing = 0u64;
    let mut total_decode_failures = 0u64;
    let mut total_errors = 0u64;
    let start = Instant::now();
    let pb = bin_progress::spinner("perceptual-hashing");
    loop {
        let rows = {
            let mut guard = dao.lock().expect("Unable to lock ExifDao");
            guard
                .get_rows_missing_perceptual_hash(&ctx, args.batch_size)
                .map_err(|e| anyhow::anyhow!("DB error: {:?}", e))?
        };
        if rows.is_empty() {
            break;
        }
        let batch_size = rows.len();
        pb.set_message(format!(
            "batch of {} (hashed={} decode_fail={} missing={} errors={})",
            batch_size, total_hashed, total_decode_failures, total_missing, total_errors
        ));
        // Compute perceptual hashes in parallel — CPU-bound, decoder
        // releases the GIL-equivalent. rayon's default thread pool
        // matches the host's logical-core count which is the right
        // ceiling for image_hasher's DCT pass.
        let results: Vec<(i32, String, FilePerceptualResult)> = rows
            .into_par_iter()
            .map(|(library_id, rel_path)| {
                let abs = libs_by_id
                    .get(&library_id)
                    .map(|lib| Path::new(&lib.root_path).join(&rel_path));
                match abs {
                    Some(abs_path) if abs_path.exists() => {
                        match perceptual_hash::compute(&abs_path) {
                            Some(id) => (library_id, rel_path, FilePerceptualResult::Ok(id)),
                            None => (library_id, rel_path, FilePerceptualResult::DecodeFailed),
                        }
                    }
                    Some(_) => (library_id, rel_path, FilePerceptualResult::MissingOnDisk),
                    None => {
                        warn!("Row refers to unknown library_id {}", library_id);
                        (library_id, rel_path, FilePerceptualResult::MissingOnDisk)
                    }
                }
            })
            .collect();
        // Persist sequentially — SQLite writes serialize anyway.
        if !args.dry_run {
            let mut guard = dao.lock().expect("Unable to lock ExifDao");
            for (library_id, rel_path, result) in &results {
                match result {
                    FilePerceptualResult::Ok(id) => {
                        match guard.backfill_perceptual_hash(
                            &ctx,
                            *library_id,
                            rel_path,
                            Some(id.phash_64),
                            Some(id.dhash_64),
                        ) {
                            Ok(_) => {
                                total_hashed += 1;
                                pb.inc(1);
                            }
                            Err(e) => {
                                pb.println(format!("persist error for {}: {:?}", rel_path, e));
                                total_errors += 1;
                            }
                        }
                    }
                    FilePerceptualResult::DecodeFailed => {
                        // Persist phash_64=0/dhash_64=0 as a "tried,
                        // unhashable" sentinel so this row leaves the
                        // `phash_64 IS NULL` candidate set and the
                        // backfill doesn't infinite-loop on a queue of
                        // unbreakable formats (HEIC, RAW, CMYK JPEGs,
                        // truncated bytes). The all-zero hash is
                        // explicitly excluded from clustering by
                        // is_informative_hash in duplicates.rs, so it
                        // won't pollute group output — it just becomes
                        // invisible to the duplicate finder.
                        log::debug!(
                            "perceptual decode failed for {} (lib {}); marking unhashable",
                            rel_path,
                            library_id
                        );
                        match guard.backfill_perceptual_hash(
                            &ctx,
                            *library_id,
                            rel_path,
                            Some(0),
                            Some(0),
                        ) {
                            Ok(_) => {
                                total_decode_failures += 1;
                            }
                            Err(e) => {
                                pb.println(format!(
                                    "persist error (decode-fail sentinel) for {}: {:?}",
                                    rel_path, e
                                ));
                                total_errors += 1;
                            }
                        }
                    }
                    FilePerceptualResult::MissingOnDisk => {
                        total_missing += 1;
                    }
                }
            }
        } else {
            for (_, rel_path, result) in &results {
                match result {
                    FilePerceptualResult::Ok(id) => {
                        pb.println(format!(
                            "[dry-run] {} -> phash={:016x} dhash={:016x}",
                            rel_path, id.phash_64, id.dhash_64
                        ));
                        total_hashed += 1;
                        pb.inc(1);
                    }
                    FilePerceptualResult::DecodeFailed => {
                        total_decode_failures += 1;
                    }
                    FilePerceptualResult::MissingOnDisk => {
                        total_missing += 1;
                    }
                }
            }
            pb.println(format!(
                "[dry-run] processed one batch of {}. Stopping — a real run would continue \
                 until no NULL phash_64 image rows remain.",
                results.len()
            ));
            break;
        }
    }
    pb.finish_and_clear();
    println!(
        "Done. hashed={}, decode_failed={}, skipped (missing on disk)={}, errors={}, elapsed={:.1}s",
        total_hashed,
        total_decode_failures,
        total_missing,
        total_errors,
        start.elapsed().as_secs_f64()
    );
    if total_errors > 0 {
        error!("Backfill completed with {} persist errors", total_errors);
    }
    Ok(())
 }
 enum FilePerceptualResult {
    Ok(perceptual_hash::PerceptualIdentity),
    DecodeFailed,
    MissingOnDisk,
 }
@@ -1,7 +1,7 @@
 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Parser;
-use image_api::ai::ollama::OllamaClient;
+use image_api::ai::LocalLlm;
 use image_api::bin_progress;
 use image_api::database::calendar_dao::{InsertCalendarEvent, SqliteCalendarEventDao};
 use image_api::parsers::ical_parser::parse_ics_file;
@@ -44,22 +44,10 @@ async fn main() -> Result<()> {
    let context = opentelemetry::Context::current();
-    let ollama = if args.generate_embeddings {
+    // LocalLlm dispatches per LLM_BACKEND, so embeddings written here land
-        let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
+    // in the same vector space the query side searches.
-            .or_else(|_| dotenv::var("OLLAMA_URL"))
+    let llm = if args.generate_embeddings {
-            .unwrap_or_else(|_| "http://localhost:11434".to_string());
+        Some(LocalLlm::from_env())
        let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
        let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
            .or_else(|_| dotenv::var("OLLAMA_MODEL"))
            .unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
        let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
        Some(OllamaClient::new(
            primary_url,
            fallback_url,
            primary_model,
            fallback_model,
        ))
    } else {
        None
    };
@@ -90,7 +78,7 @@ async fn main() -> Result<()> {
        }
        // Generate embedding if requested (blocking call)
-        let embedding = if let Some(ref ollama_client) = ollama {
+        let embedding = if let Some(ref llm) = llm {
            let text = format!(
                "{} {} {}",
                event.summary,
@@ -100,7 +88,7 @@ async fn main() -> Result<()> {
            match tokio::task::block_in_place(|| {
                tokio::runtime::Handle::current()
-                    .block_on(async { ollama_client.generate_embedding(&text).await })
+                    .block_on(async { llm.embed_document(&text).await })
            }) {
                Ok(emb) => Some(emb),
                Err(e) => {
@@ -1,7 +1,7 @@
 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Parser;
-use image_api::ai::ollama::OllamaClient;
+use image_api::ai::LocalLlm;
 use image_api::bin_progress;
 use image_api::database::search_dao::{InsertSearchRecord, SqliteSearchHistoryDao};
 use image_api::parsers::search_html_parser::parse_search_html;
@@ -38,16 +38,9 @@ async fn main() -> Result<()> {
    info!("Found {} search records", searches.len());
-    let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
+    // LocalLlm dispatches per LLM_BACKEND, so embeddings written here land
-        .or_else(|_| dotenv::var("OLLAMA_URL"))
+    // in the same vector space the query side searches.
-        .unwrap_or_else(|_| "http://localhost:11434".to_string());
+    let llm = LocalLlm::from_env();
    let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
    let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
        .or_else(|_| dotenv::var("OLLAMA_MODEL"))
        .unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
    let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
    let ollama = OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model);
    let context = opentelemetry::Context::current();
    let mut inserted_count = 0usize;
@@ -67,12 +60,11 @@ async fn main() -> Result<()> {
        let pb_for_warn = pb.clone();
        let embeddings_result = tokio::task::spawn({
-            let ollama_client = ollama.clone();
+            let llm = llm.clone();
            async move {
                // Generate embeddings in parallel for the batch
                let mut embeddings = Vec::new();
                for query in &queries {
-                    match ollama_client.generate_embedding(query).await {
+                    match llm.embed_document(query).await {
                        Ok(emb) => embeddings.push(Some(emb)),
                        Err(e) => {
                            pb_for_warn.println(format!("embedding failed for '{}': {}", query, e));
@@ -14,6 +14,7 @@ use image_api::database::{
    SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao,
    connect,
 };
 use image_api::faces::{FaceDao, SqliteFaceDao};
 use image_api::file_types::{IMAGE_EXTENSIONS, VIDEO_EXTENSIONS};
 use image_api::libraries::{self, Library};
 use image_api::tags::{SqliteTagDao, TagDao};
@@ -182,6 +183,11 @@ async fn main() -> anyhow::Result<()> {
        Arc::new(Mutex::new(Box::new(SqliteTagDao::default())));
    let knowledge_dao: Arc<Mutex<Box<dyn KnowledgeDao>>> =
        Arc::new(Mutex::new(Box::new(SqliteKnowledgeDao::new())));
    let face_dao: Arc<Mutex<Box<dyn FaceDao>>> =
        Arc::new(Mutex::new(Box::new(SqliteFaceDao::new())));
    let persona_dao: Arc<Mutex<Box<dyn image_api::database::PersonaDao>>> = Arc::new(Mutex::new(
        Box::new(image_api::database::SqlitePersonaDao::new()),
    ));
    // Pass the full library set so `resolve_full_path` probes every root,
    // even when --library restricts the walk. A rel_path shared across
@@ -189,6 +195,7 @@ async fn main() -> anyhow::Result<()> {
    let generator = InsightGenerator::new(
        ollama,
        None,
        None,
        sms_client,
        apollo_client,
        insight_dao.clone(),
@@ -198,7 +205,9 @@ async fn main() -> anyhow::Result<()> {
        location_dao,
        search_dao,
        tag_dao,
        face_dao,
        knowledge_dao,
        persona_dao,
        all_libs.clone(),
    );
@@ -327,10 +336,13 @@ async fn main() -> anyhow::Result<()> {
                args.top_p,
                args.top_k,
                args.min_p,
                None, // enable_thinking: leave model/template default
                args.max_iterations,
                None,
                Vec::new(),
                Vec::new(),
                1, // operator user_id — populate_knowledge is single-user offline tool
                "default".to_string(),
            )
            .await
        {
@@ -0,0 +1,273 @@
 //! Probe binary for CLIP semantic search.
 //!
 //! No DB writes. Walks a library's `image_exif` rows, encodes a sample
 //! via Apollo's `/encode_image`, encodes the user's --query via
 //! `/encode_text`, and prints the top-K most similar photos by cosine
 //! similarity so the operator can eyeball quality before committing to
 //! the persistence phase (column populated by backlog drain, search
 //! endpoint, UI).
 //!
 //! Usage:
 //!     cargo run --release --bin probe_clip_search -- \
 //!         --library 1 --limit 200 --query "a beach at sunset" --top 10
 //!
 //! Env: standard ImageApi `.env`. Requires either
 //! `APOLLO_CLIP_API_BASE_URL` or `APOLLO_API_BASE_URL` to be set.
 use std::path::{Path, PathBuf};
 use std::sync::{Arc, Mutex};
 use std::time::Instant;
 use clap::Parser;
 use log::{info, warn};
 use image_api::ai::clip_client::{ClipClient, ClipError, EncodeImageMeta};
 use image_api::database::{ExifDao, SqliteExifDao, connect};
 use image_api::exif;
 use image_api::file_types;
 use image_api::libraries::{self, Library};
 #[derive(Parser, Debug)]
 #[command(name = "probe_clip_search")]
 #[command(about = "Top-K CLIP semantic search over a sample of image_exif rows")]
 struct Args {
    /// Library id to sample from.
    #[arg(long)]
    library: i32,
    /// Max files to encode. CPU inference is slow (~1-3 s per photo at
    /// ViT-L/14); start small and grow once GPU is sorted.
    #[arg(long, default_value_t = 50)]
    limit: usize,
    /// Natural-language query. Empty triggers an error from Apollo.
    #[arg(long)]
    query: String,
    /// How many top results to print.
    #[arg(long, default_value_t = 10)]
    top: usize,
    /// Offset into the library's rel_path listing.
    #[arg(long, default_value_t = 0)]
    offset: i64,
    /// How many DB rows to scan before giving up on hitting the limit.
    #[arg(long, default_value_t = 5000)]
    max_scan: i64,
 }
 /// Same as `face_watch::read_image_bytes_for_detect` (which is pub(crate)).
 /// Inlined for the throwaway probe.
 fn read_image_bytes(path: &Path) -> std::io::Result<Vec<u8>> {
    if file_types::needs_ffmpeg_thumbnail(path)
        && let Some(preview) = exif::extract_embedded_jpeg_preview(path)
    {
        return Ok(preview);
    }
    std::fs::read(path)
 }
 /// Decode a base64'd LE float32 vector to a `Vec<f32>`.
 fn decode_f32_vec(b64: &str) -> anyhow::Result<Vec<f32>> {
    use base64::Engine;
    let bytes = base64::engine::general_purpose::STANDARD.decode(b64.as_bytes())?;
    if bytes.len() % 4 != 0 {
        anyhow::bail!("embedding byte length {} not divisible by 4", bytes.len());
    }
    let mut out = Vec::with_capacity(bytes.len() / 4);
    for chunk in bytes.chunks_exact(4) {
        out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
    }
    Ok(out)
 }
 /// Plain dot product. Apollo L2-normalizes both sides, so this is cosine sim.
 fn dot(a: &[f32], b: &[f32]) -> f32 {
    a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
 }
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    env_logger::init();
    dotenv::dotenv().ok();
    let args = Args::parse();
    if args.query.trim().is_empty() {
        anyhow::bail!("--query must not be empty");
    }
    let client = ClipClient::from_env();
    if !client.is_enabled() {
        anyhow::bail!(
            "ClipClient disabled: set APOLLO_CLIP_API_BASE_URL or APOLLO_API_BASE_URL in .env"
        );
    }
    match client.health().await {
        Ok(h) => info!(
            "clip engine: loaded={} device={} model={} dim={}",
            h.loaded, h.device, h.model_version, h.embedding_dim
        ),
        Err(e) => warn!("health probe failed (continuing): {e}"),
    }
    let mut seed_conn = connect();
    if let Some(base) = dotenv::var("BASE_PATH").ok().as_deref() {
        libraries::seed_or_patch_from_env(&mut seed_conn, base);
    }
    let libs = libraries::load_all(&mut seed_conn);
    drop(seed_conn);
    let lib: Library = libs
        .into_iter()
        .find(|l| l.id == args.library)
        .ok_or_else(|| anyhow::anyhow!("library id {} not found", args.library))?;
    info!(
        "probing library #{} ({}) at {}",
        lib.id, lib.name, lib.root_path
    );
    let dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
    let ctx = opentelemetry::Context::new();
    // Encode the query up-front so the long image-encode loop doesn't
    // race a slow query encode. Fails fast on a misspelled query.
    let query_resp = client
        .encode_text(&args.query)
        .await
        .map_err(|e| anyhow::anyhow!("encode_text: {e}"))?;
    let query_vec = decode_f32_vec(&query_resp.embedding)?;
    info!(
        "query encoded ({}d, {}ms): {:?}",
        query_resp.embedding_dim, query_resp.duration_ms, args.query
    );
    // Page through (id, rel_path), filter to images on disk, encode up
    // to `limit`. Each encoded photo gets scored against the query and
    // kept in a top-K heap.
    const PAGE: i64 = 500;
    let mut offset = args.offset;
    let mut scanned: i64 = 0;
    let mut encoded = 0usize;
    let mut perm_fail = 0usize;
    let mut transient_fail = 0usize;
    let root = PathBuf::from(&lib.root_path);
    let started = Instant::now();
    // (similarity, rel_path) — we keep all scored results and sort at
    // the end. With limit≤few-hundred this is trivial.
    let mut scores: Vec<(f32, String)> = Vec::with_capacity(args.limit);
    'outer: loop {
        if scanned >= args.max_scan {
            warn!(
                "scan cap ({}) reached before hitting limit ({}); bump --max-scan to scan deeper",
                args.max_scan, args.limit
            );
            break;
        }
        let rows = {
            let mut guard = dao.lock().expect("dao lock");
            guard
                .list_rel_paths_for_library_page(&ctx, lib.id, PAGE, offset)
                .map_err(|e| anyhow::anyhow!("list rel_paths: {:?}", e))?
        };
        if rows.is_empty() {
            info!("no more rows after offset {}", offset);
            break;
        }
        offset += rows.len() as i64;
        scanned += rows.len() as i64;
        for (_id, rel_path) in rows {
            if encoded >= args.limit {
                break 'outer;
            }
            let abs = root.join(&rel_path);
            if !file_types::is_image_file(&abs) || !abs.exists() {
                continue;
            }
            let bytes = match read_image_bytes(&abs) {
                Ok(b) => b,
                Err(e) => {
                    warn!("read {rel_path}: {e}");
                    continue;
                }
            };
            let meta = EncodeImageMeta {
                content_hash: String::new(),
                library_id: lib.id,
                rel_path: rel_path.clone(),
            };
            let call_start = Instant::now();
            match client.encode_image(bytes, meta).await {
                Ok(resp) => {
                    encoded += 1;
                    let vec = match decode_f32_vec(&resp.embedding) {
                        Ok(v) => v,
                        Err(e) => {
                            warn!("decode {rel_path}: {e}");
                            continue;
                        }
                    };
                    if vec.len() != query_vec.len() {
                        warn!(
                            "dim mismatch for {rel_path}: image={} query={}",
                            vec.len(),
                            query_vec.len()
                        );
                        continue;
                    }
                    let sim = dot(&vec, &query_vec);
                    scores.push((sim, rel_path.clone()));
                    if encoded.is_multiple_of(10) {
                        info!(
                            "progress: {} encoded, {:.1}s elapsed",
                            encoded,
                            started.elapsed().as_secs_f32()
                        );
                    }
                    let _ = call_start;
                }
                Err(ClipError::Permanent(e)) => {
                    perm_fail += 1;
                    warn!("permanent encode failure for {rel_path}: {e}");
                }
                Err(ClipError::Transient(e)) => {
                    transient_fail += 1;
                    warn!("transient encode failure for {rel_path}: {e}");
                }
                Err(ClipError::Disabled) => {
                    anyhow::bail!("clip client became disabled mid-run; impossible");
                }
            }
        }
    }
    scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
    let elapsed = started.elapsed();
    println!();
    println!(
        "── top {} for query: {:?} ──",
        args.top.min(scores.len()),
        args.query
    );
    for (i, (sim, path)) in scores.iter().take(args.top).enumerate() {
        println!("[{:>2}] sim={:.3}  {}", i + 1, sim, path);
    }
    println!();
    println!("── summary ─────────────────────────────────────");
    println!("query                : {:?}", args.query);
    println!("scanned rows         : {scanned}");
    println!("encoded photos       : {encoded}");
    println!("permanent failures   : {perm_fail}");
    println!("transient failures   : {transient_fail}");
    println!("elapsed              : {:.1}s", elapsed.as_secs_f32());
    if encoded > 0 {
        println!(
            "throughput           : {:.2} photos/s ({:.0}ms/photo avg)",
            encoded as f32 / elapsed.as_secs_f32().max(0.001),
            elapsed.as_millis() as f32 / encoded as f32
        );
    }
    Ok(())
 }
@@ -0,0 +1,465 @@
 //! Re-embed stored corpora through `LocalLlm`, i.e. the same
 //! `LLM_BACKEND` dispatch the query side uses. The original import /
 //! backfill tools always embedded via Ollama, so a deploy running
 //! `LLM_BACKEND=llamacpp` queries vector spaces the corpora may not live
 //! in. Three tables share the problem and are all covered here:
 //!
 //! - `daily_conversation_summaries` — re-embeds
 //!   `strip_summary_boilerplate(summary)` (what the original job fed the
 //!   embedder); also rewrites `model_version`.
 //! - `calendar_events` — re-embeds "summary description location" exactly
 //!   as `import_calendar` does; rows without an embedding are skipped (the
 //!   import only embeds under `--generate-embeddings`).
 //! - `search_history` — re-embeds the raw query text.
 //! - `entities` (knowledge graph) — re-embeds "name description" exactly as
 //!   `tool_store_entity` does; embedding-less rows are skipped (embedding
 //!   is best-effort at store time).
 //!
 //! Source text is untouched — only vectors are rewritten. The old↔new
 //! cosine report doubles as a diagnostic: ~1.0 means both backends already
 //! shared a space (re-embedding was a no-op); low values confirm the
 //! mismatch this tool exists to fix.
 use anyhow::{Context, Result};
 use clap::Parser;
 use diesel::prelude::*;
 use diesel::sql_query;
 use diesel::sqlite::SqliteConnection;
 use image_api::ai::{LocalLlm, strip_summary_boilerplate};
 use image_api::bin_progress;
 use std::env;
 #[derive(Parser, Debug)]
 #[command(author, version, about = "Re-embed stored corpora via the configured LLM_BACKEND", long_about = None)]
 struct Args {
    /// Comma-separated tables to process: summaries, calendar, search, entities
    #[arg(long, default_value = "summaries,calendar,search,entities")]
    tables: String,
    /// Only process the first N rows per table (smoke test)
    #[arg(long)]
    limit: Option<usize>,
    /// Compute embeddings and report old↔new similarity without writing
    #[arg(long, default_value_t = false)]
    dry_run: bool,
 }
 #[derive(QueryableByName)]
 struct SummaryRow {
    #[diesel(sql_type = diesel::sql_types::Integer)]
    id: i32,
    #[diesel(sql_type = diesel::sql_types::Text)]
    summary: String,
    #[diesel(sql_type = diesel::sql_types::Binary)]
    embedding: Vec<u8>,
    #[diesel(sql_type = diesel::sql_types::Text)]
    model_version: String,
 }
 #[derive(QueryableByName)]
 struct CalendarRow {
    #[diesel(sql_type = diesel::sql_types::Integer)]
    id: i32,
    #[diesel(sql_type = diesel::sql_types::Text)]
    summary: String,
    #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
    description: Option<String>,
    #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
    location: Option<String>,
    #[diesel(sql_type = diesel::sql_types::Binary)]
    embedding: Vec<u8>,
 }
 #[derive(QueryableByName)]
 struct SearchRow {
    #[diesel(sql_type = diesel::sql_types::BigInt)]
    id: i64,
    #[diesel(sql_type = diesel::sql_types::Text)]
    query: String,
    #[diesel(sql_type = diesel::sql_types::Binary)]
    embedding: Vec<u8>,
 }
 #[derive(QueryableByName)]
 struct EntityRow {
    #[diesel(sql_type = diesel::sql_types::Integer)]
    id: i32,
    #[diesel(sql_type = diesel::sql_types::Text)]
    name: String,
    #[diesel(sql_type = diesel::sql_types::Text)]
    description: String,
    #[diesel(sql_type = diesel::sql_types::Binary)]
    embedding: Vec<u8>,
 }
 /// One unit of re-embed work, normalized across tables.
 struct WorkItem {
    /// Row key, as i64 so both i32 ids and rowids fit.
    id: i64,
    /// Text fed to the embedder — must match what the original writer used.
    text: String,
    /// Existing vector bytes, for the old↔new similarity report.
    old_embedding: Vec<u8>,
 }
 fn deserialize_vector(bytes: &[u8]) -> Option<Vec<f32>> {
    if !bytes.len().is_multiple_of(4) {
        return None;
    }
    Some(
        bytes
            .chunks_exact(4)
            .map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
            .collect(),
    )
 }
 fn serialize_vector(vec: &[f32]) -> Vec<u8> {
    vec.iter().flat_map(|f| f.to_le_bytes()).collect()
 }
 fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
    if a.len() != b.len() {
        return 0.0;
    }
    let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
    let mag_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
    let mag_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
    if mag_a == 0.0 || mag_b == 0.0 {
        return 0.0;
    }
    dot / (mag_a * mag_b)
 }
 /// Embed `text`, halving it on "input too large" errors until it fits the
 /// server's physical batch (`--ubatch-size`). Mirrors the silent truncation
 /// Ollama applied when these corpora were first embedded — llama-server
 /// returns a 500 instead — except here it's surfaced via the returned flag.
 /// Returns `(embedding, truncated)`.
 async fn embed_with_truncation(llm: &LocalLlm, text: &str) -> Result<(Vec<f32>, bool)> {
    let mut text = text.to_string();
    let mut truncated = false;
    loop {
        match llm.embed_document(&text).await {
            Ok(emb) => return Ok((emb, truncated)),
            Err(e)
                if e.to_string().contains("too large to process") && text.chars().count() > 64 =>
            {
                let keep = text.chars().count() / 2;
                text = text.chars().take(keep).collect();
                truncated = true;
            }
            Err(e) => return Err(e),
        }
    }
 }
 /// Re-embed `items`, writing each new vector via `update`. Returns the
 /// old↔new cosines for the similarity report.
 async fn reembed_table(
    conn: &mut SqliteConnection,
    llm: &LocalLlm,
    label: &str,
    items: Vec<WorkItem>,
    dry_run: bool,
    update: impl Fn(&mut SqliteConnection, i64, Vec<u8>) -> Result<()>,
 ) -> Result<Vec<f32>> {
    println!("\n[{}] re-embedding {} rows...", label, items.len());
    let pb = bin_progress::determinate(items.len() as u64, format!("re-embedding {}", label));
    let mut sims: Vec<f32> = Vec::with_capacity(items.len());
    let mut updated = 0usize;
    let mut failed = 0usize;
    let mut truncated_count = 0usize;
    for item in &items {
        let new_emb = match embed_with_truncation(llm, &item.text).await {
            Ok((e, truncated)) => {
                if truncated {
                    truncated_count += 1;
                    pb.println(format!(
                        "⚠ {} id={}: input exceeded the embed server's batch size, \
                         truncated before embedding",
                        label, item.id
                    ));
                }
                e
            }
            Err(e) => {
                pb.inc(1);
                failed += 1;
                eprintln!("✗ {} id={}: {}", label, item.id, e);
                continue;
            }
        };
        // The whole pipeline (DAO checks, stored corpora) assumes
        // EMBEDDING_DIM dims. A mismatch means the active embed slot is not
        // serving the configured model — stop rather than corrupt the table.
        anyhow::ensure!(
            new_emb.len() == image_api::ai::embedding_dim(),
            "backend returned {}-dim embedding (expected {}) — '{}' does not \
             match the configured EMBEDDING_DIM",
            new_emb.len(),
            image_api::ai::embedding_dim(),
            llm.embedding_model_version()
        );
        if let Some(old_emb) = deserialize_vector(&item.old_embedding) {
            sims.push(cosine_similarity(&old_emb, &new_emb));
        }
        if !dry_run {
            update(conn, item.id, serialize_vector(&new_emb))
                .with_context(|| format!("updating {} id={}", label, item.id))?;
        }
        updated += 1;
        pb.inc(1);
    }
    pb.finish_and_clear();
    println!(
        "[{}] {} re-embedded ({} truncated), {} failed",
        label, updated, truncated_count, failed
    );
    Ok(sims)
 }
 fn report_similarity(label: &str, mut sims: Vec<f32>) {
    if sims.is_empty() {
        println!("[{}] no old↔new pairs to compare", label);
        return;
    }
    sims.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
    let mean: f32 = sims.iter().sum::<f32>() / sims.len() as f32;
    let median = sims[sims.len() / 2];
    println!(
        "[{}] old↔new cosine over identical text: min={:.3} median={:.3} mean={:.3} max={:.3}",
        label,
        sims.first().unwrap(),
        median,
        mean,
        sims.last().unwrap()
    );
    if median > 0.98 {
        println!(
            "[{}] → old and new backends agree (~same vector space); poor search \
             results are coming from something else (prefixes, thresholds, corpus).",
            label
        );
    } else if median > 0.9 {
        println!(
            "[{}] → same model family but measurably different vectors \
             (quantization / runtime drift); re-embedding was worthwhile.",
            label
        );
    } else {
        println!(
            "[{}] → vector-space mismatch confirmed — queries were searching a \
             different space than the corpus. This re-embed should fix it.",
            label
        );
    }
 }
 #[tokio::main]
 async fn main() -> Result<()> {
    dotenv::dotenv().ok();
    env_logger::init();
    let args = Args::parse();
    let tables: Vec<&str> = args.tables.split(',').map(|t| t.trim()).collect();
    for t in &tables {
        anyhow::ensure!(
            matches!(*t, "summaries" | "calendar" | "search" | "entities"),
            "unknown table '{}' — expected summaries, calendar, search, entities",
            t
        );
    }
    let database_url = env::var("DATABASE_URL").unwrap_or_else(|_| "auth.db".to_string());
    println!("Database: {}", database_url);
    let mut conn = SqliteConnection::establish(&database_url)
        .with_context(|| format!("connecting to {}", database_url))?;
    let llm = LocalLlm::from_env();
    let model_version = llm.embedding_model_version();
    println!("Embedding via '{}'", model_version);
    if args.dry_run {
        println!("DRY RUN — no rows will be written");
    }
    if tables.contains(&"summaries") {
        let mut rows: Vec<SummaryRow> = sql_query(
            "SELECT id, summary, embedding, model_version
             FROM daily_conversation_summaries ORDER BY date",
        )
        .load(&mut conn)
        .context("loading daily summaries")?;
        if let Some(limit) = args.limit {
            rows.truncate(limit);
        }
        if let Some(first) = rows.first() {
            println!(
                "\n[summaries] previous model_version '{}' → '{}'",
                first.model_version, model_version
            );
        }
        let items = rows
            .into_iter()
            .map(|r| WorkItem {
                id: r.id as i64,
                text: strip_summary_boilerplate(&r.summary),
                old_embedding: r.embedding,
            })
            .collect();
        let mv = model_version.clone();
        let sims = reembed_table(
            &mut conn,
            &llm,
            "summaries",
            items,
            args.dry_run,
            move |conn, id, emb| {
                sql_query(
                    "UPDATE daily_conversation_summaries
                     SET embedding = ?1, model_version = ?2 WHERE id = ?3",
                )
                .bind::<diesel::sql_types::Binary, _>(emb)
                .bind::<diesel::sql_types::Text, _>(&mv)
                .bind::<diesel::sql_types::Integer, _>(id as i32)
                .execute(conn)?;
                Ok(())
            },
        )
        .await?;
        report_similarity("summaries", sims);
    }
    if tables.contains(&"calendar") {
        let mut rows: Vec<CalendarRow> = sql_query(
            "SELECT id, summary, description, location, embedding
             FROM calendar_events WHERE embedding IS NOT NULL ORDER BY id",
        )
        .load(&mut conn)
        .context("loading calendar events")?;
        if let Some(limit) = args.limit {
            rows.truncate(limit);
        }
        let items = rows
            .into_iter()
            .map(|r| WorkItem {
                id: r.id as i64,
                // Same text construction as import_calendar.
                text: format!(
                    "{} {} {}",
                    r.summary,
                    r.description.as_deref().unwrap_or(""),
                    r.location.as_deref().unwrap_or("")
                ),
                old_embedding: r.embedding,
            })
            .collect();
        let sims = reembed_table(
            &mut conn,
            &llm,
            "calendar",
            items,
            args.dry_run,
            |conn, id, emb| {
                sql_query("UPDATE calendar_events SET embedding = ?1 WHERE id = ?2")
                    .bind::<diesel::sql_types::Binary, _>(emb)
                    .bind::<diesel::sql_types::Integer, _>(id as i32)
                    .execute(conn)?;
                Ok(())
            },
        )
        .await?;
        report_similarity("calendar", sims);
    }
    if tables.contains(&"search") {
        let mut rows: Vec<SearchRow> = sql_query(
            "SELECT rowid AS id, query, embedding
             FROM search_history ORDER BY rowid",
        )
        .load(&mut conn)
        .context("loading search history")?;
        if let Some(limit) = args.limit {
            rows.truncate(limit);
        }
        let items = rows
            .into_iter()
            .map(|r| WorkItem {
                id: r.id,
                text: r.query,
                old_embedding: r.embedding,
            })
            .collect();
        let sims = reembed_table(
            &mut conn,
            &llm,
            "search",
            items,
            args.dry_run,
            |conn, id, emb| {
                sql_query("UPDATE search_history SET embedding = ?1 WHERE rowid = ?2")
                    .bind::<diesel::sql_types::Binary, _>(emb)
                    .bind::<diesel::sql_types::BigInt, _>(id)
                    .execute(conn)?;
                Ok(())
            },
        )
        .await?;
        report_similarity("search", sims);
    }
    if tables.contains(&"entities") {
        let mut rows: Vec<EntityRow> = sql_query(
            "SELECT id, name, description, embedding
             FROM entities WHERE embedding IS NOT NULL ORDER BY id",
        )
        .load(&mut conn)
        .context("loading knowledge entities")?;
        if let Some(limit) = args.limit {
            rows.truncate(limit);
        }
        let items = rows
            .into_iter()
            .map(|r| WorkItem {
                id: r.id as i64,
                // Same text construction as tool_store_entity.
                text: format!("{} {}", r.name, r.description),
                old_embedding: r.embedding,
            })
            .collect();
        let sims = reembed_table(
            &mut conn,
            &llm,
            "entities",
            items,
            args.dry_run,
            |conn, id, emb| {
                sql_query("UPDATE entities SET embedding = ?1 WHERE id = ?2")
                    .bind::<diesel::sql_types::Binary, _>(emb)
                    .bind::<diesel::sql_types::Integer, _>(id as i32)
                    .execute(conn)?;
                Ok(())
            },
        )
        .await?;
        report_similarity("entities", sims);
    }
    println!(
        "\n{}",
        if args.dry_run {
            "Dry run complete"
        } else {
            "Done"
        }
    );
    Ok(())
 }
@@ -0,0 +1,382 @@
 //! `/photos/search?q=<text>` — CLIP semantic photo search.
 //!
 //! The route lives outside `files.rs` to keep that 1500+ line module
 //! focused on EXIF / tag listing. The flow is:
 //!
 //! 1. Parse query params (`q`, `limit`, `threshold`, optional `library`).
 //! 2. Call Apollo's `/api/internal/clip/encode_text` to get the query
 //!    vector (L2-normalized 768-d f32 for ViT-L/14).
 //! 3. Load every `(content_hash, clip_embedding)` for the scope from
 //!    `image_exif` via `ExifDao::list_clip_index`. ~28–43 MB for a 14k
 //!    library at ViT-L/14; loaded fresh per request — fast enough for
 //!    v1, optimize via an AppState cache later if needed.
 //! 4. Dot product (= cosine since both sides are L2-normalized), filter
 //!    above `threshold`, top-K by score.
 //! 5. Resolve each surviving hash back to a `(library_id, rel_path)` so
 //!    the frontend can render the photo / hand off to the carousel.
 //!
 //! Response shape is intentionally minimal — paths + score — so the
 //! frontend can reuse existing PhotoGrid rendering by joining against
 //! `/api/photos/match` (or calling `/image/metadata` lazily). Don't
 //! bake camera/EXIF metadata into this route; it would force a fan-out
 //! per result and balloon the response.
 use crate::AppState;
 use crate::ai::clip_client::ClipError;
 use crate::database::ExifDao;
 use actix_web::{HttpResponse, Result as ActixResult, web};
 use base64::Engine;
 use serde::{Deserialize, Serialize};
 use std::sync::Mutex;
 #[derive(Debug, Deserialize)]
 pub struct SearchQuery {
    /// Natural-language query. Required; empty triggers 400.
    pub q: String,
    /// Max results to return in this page. Capped to 200 server-side.
    /// Defaults to 20. Pair with `offset` for pagination.
    #[serde(default = "default_limit")]
    pub limit: usize,
    /// Zero-based offset into the sorted-and-filtered result set. The
    /// scoring loop still runs over the full embedding matrix on every
    /// page (cheap at personal-library scale — sub-100ms — and avoids
    /// stateful pagination cursors). Defaults to 0.
    #[serde(default)]
    pub offset: usize,
    /// Cosine-similarity floor below which results are dropped.
    /// 0.20 is the rough "this is plausibly relevant" line for OpenAI
    /// CLIP; tunable per call when sweeping. Defaults to 0.20.
    #[serde(default = "default_threshold")]
    pub threshold: f32,
    /// Optional single-library scope. Legacy param — new clients pass
    /// `library_ids` instead so multi-select scopes (Apollo's HUD library
    /// chips, FileViewer-React's library picker) actually filter. Kept
    /// for back-compat; `library_ids` wins when both are supplied.
    pub library: Option<i32>,
    /// Optional multi-library scope, comma-separated id list
    /// (`?library_ids=1,3`). Empty / omitted = every enabled library
    /// (the historical default). Apollo and FileViewer-React both send
    /// this when 2+ libraries are selected; the single-library case
    /// works through either param interchangeably.
    pub library_ids: Option<String>,
    /// Optional model-version filter. Defaults to the live engine's
    /// version (queried lazily). Forces a strict join so mid-flight
    /// model swaps can't mix geometries in a single response.
    #[serde(default)]
    pub model_version: Option<String>,
 }
 fn default_limit() -> usize {
    20
 }
 fn default_threshold() -> f32 {
    0.20
 }
 #[derive(Debug, Serialize)]
 pub struct SearchHit {
    pub library_id: i32,
    pub rel_path: String,
    pub content_hash: String,
    /// Cosine similarity in [-1, 1]. In practice OpenAI CLIP returns
    /// 0.10–0.40 for the typical photo library.
    pub score: f32,
 }
 #[derive(Debug, Serialize)]
 pub struct SearchResponse {
    pub query: String,
    pub model_version: String,
    pub threshold: f32,
    /// Total embeddings scored (= every photo in scope with a stored
    /// embedding). Same value across pages of the same query.
    pub considered: usize,
    /// Count of results above threshold, before pagination. Lets the
    /// client decide whether a "Load more" button is meaningful and
    /// stop fetching when ``offset + results.len() >= total_matching``.
    pub total_matching: usize,
    pub offset: usize,
    pub results: Vec<SearchHit>,
 }
 #[derive(Debug, Serialize)]
 struct SearchError {
    error: String,
 }
 /// Decode a stored `clip_embedding` BLOB back into a `Vec<f32>`. Returns
 /// `None` on malformed bytes — those rows get skipped rather than
 /// failing the whole query.
 fn decode_embedding(bytes: &[u8]) -> Option<Vec<f32>> {
    if bytes.is_empty() || !bytes.len().is_multiple_of(4) {
        return None;
    }
    let mut out = Vec::with_capacity(bytes.len() / 4);
    for chunk in bytes.chunks_exact(4) {
        out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
    }
    Some(out)
 }
 #[inline]
 fn dot(a: &[f32], b: &[f32]) -> f32 {
    a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
 }
 /// Failure modes of [`score_photos`]. Carries enough to let each caller pick
 /// an appropriate HTTP status (the CLIP service being down is a 502, a
 /// disabled feature is a 503, a rejected query is a 400, a DB failure 500).
 pub enum ScoreError {
    /// CLIP search isn't configured at all (no Apollo endpoint).
    Disabled,
    /// The query was rejected by the encoder (client error).
    Rejected(String),
    /// The CLIP service is transiently unavailable (upstream error).
    Unavailable(String),
    /// The encoder returned an embedding we couldn't decode.
    MalformedEmbedding,
    /// A database / index load failure.
    Internal(String),
 }
 /// Result of scoring the whole library against a query embedding: the
 /// resolved model version, how many embeddings were considered, and every
 /// `(score, content_hash)` above threshold, sorted by descending score.
 /// Pagination and path resolution are the caller's job (see [`resolve_hits`])
 /// so this core can be reused for both the plain search endpoint and the
 /// unified endpoint (which filters by hash before paginating).
 pub struct ScoredPhotos {
    pub model_version: String,
    pub considered: usize,
    /// `(cosine_score, content_hash)` pairs, descending by score.
    pub hits: Vec<(f32, String)>,
 }
 /// Encode `q_text` via CLIP and score it against every stored embedding in
 /// the given library scope. Returns all matches above `threshold`, sorted by
 /// descending similarity. Pure of HTTP concerns so it's shared by
 /// `search_photos` and the unified search endpoint.
 pub async fn score_photos(
    state: &AppState,
    exif_dao: &Mutex<Box<dyn ExifDao>>,
    q_text: &str,
    library_ids: &[i32],
    threshold: f32,
    model_version: Option<&str>,
 ) -> Result<ScoredPhotos, ScoreError> {
    if !state.clip_client.is_enabled() {
        return Err(ScoreError::Disabled);
    }
    // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms on CPU.
    let query_resp = match state.clip_client.encode_text(q_text).await {
        Ok(r) => r,
        Err(ClipError::Permanent(e)) => return Err(ScoreError::Rejected(e.to_string())),
        Err(ClipError::Transient(e)) => return Err(ScoreError::Unavailable(e.to_string())),
        Err(ClipError::Disabled) => return Err(ScoreError::Disabled),
    };
    // decode_embedding works on raw bytes; the wire format is b64.
    let query_bytes = base64::engine::general_purpose::STANDARD
        .decode(query_resp.embedding.as_bytes())
        .unwrap_or_default();
    let query_vec = decode_embedding(&query_bytes).ok_or(ScoreError::MalformedEmbedding)?;
    // 2. Pull the (hash, embedding) matrix under the dao lock, release
    // before scoring. The caller-supplied `model_version` (or the live
    // engine's) forces a strict join so a mid-flight model swap can't mix
    // geometries.
    let ctx = opentelemetry::Context::current();
    let rows: Vec<(String, Vec<u8>)> = {
        let mut dao = exif_dao.lock().expect("exif dao");
        dao.list_clip_index(
            &ctx,
            library_ids,
            model_version.or(Some(&query_resp.model_version)),
        )
        .map_err(|e| {
            log::warn!("clip_search: list_clip_index failed: {:?}", e);
            ScoreError::Internal("failed to load search index".into())
        })?
    };
    let considered = rows.len();
    // 3. Score. Keep all matches and sort at the end (~microseconds at 14k).
    let mut hits: Vec<(f32, String)> = Vec::with_capacity(considered);
    for (hash, blob) in rows {
        let Some(emb) = decode_embedding(&blob) else {
            continue;
        };
        if emb.len() != query_vec.len() {
            continue;
        }
        let sim = dot(&emb, &query_vec);
        if sim < threshold {
            continue;
        }
        hits.push((sim, hash));
    }
    hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
    Ok(ScoredPhotos {
        model_version: query_resp.model_version,
        considered,
        hits,
    })
 }
 /// Resolve a page of `(score, content_hash)` pairs back to [`SearchHit`]s
 /// (each carrying `library_id` + `rel_path`). Hashes that no longer resolve
 /// to a row are skipped. Shared by both endpoints.
 pub fn resolve_hits(
    exif_dao: &Mutex<Box<dyn ExifDao>>,
    scored: &[(f32, String)],
 ) -> Vec<SearchHit> {
    if scored.is_empty() {
        return Vec::new();
    }
    let ctx = opentelemetry::Context::current();
    let hashes: Vec<String> = scored.iter().map(|(_, h)| h.clone()).collect();
    let mut dao = exif_dao.lock().expect("exif dao");
    let path_map = dao
        .get_rel_paths_for_hashes(&ctx, &hashes)
        .unwrap_or_else(|e| {
            log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e);
            std::collections::HashMap::new()
        });
    let mut results = Vec::with_capacity(scored.len());
    for (score, hash) in scored {
        let row = match dao.find_by_content_hash(&ctx, hash) {
            Ok(Some(r)) => r,
            Ok(None) => continue,
            Err(e) => {
                log::warn!("clip_search: find_by_content_hash failed for {hash}: {e:?}");
                continue;
            }
        };
        // Prefer get_rel_paths_for_hashes's first entry (shares image_exif's
        // natural order), falling back to the ImageExif row.
        let rel_path = path_map
            .get(hash)
            .and_then(|paths| paths.first().cloned())
            .unwrap_or(row.file_path);
        results.push(SearchHit {
            library_id: row.library_id,
            rel_path,
            content_hash: hash.clone(),
            score: *score,
        });
    }
    results
 }
 /// Parse the `library_ids` (multi) / `library` (single) scope params into a
 /// deduped id list. Empty = "every enabled library". Shared so the unified
 /// endpoint scopes CLIP identically.
 pub fn parse_library_scope(
    library_ids: Option<&str>,
    library: Option<i32>,
 ) -> Result<Vec<i32>, String> {
    if let Some(raw) = library_ids {
        let mut out: Vec<i32> = Vec::new();
        for piece in raw.split(',') {
            let trimmed = piece.trim();
            if trimmed.is_empty() {
                continue;
            }
            match trimmed.parse::<i32>() {
                Ok(id) => {
                    if !out.contains(&id) {
                        out.push(id);
                    }
                }
                Err(_) => return Err(format!("invalid library_ids entry: {trimmed:?}")),
            }
        }
        Ok(out)
    } else if let Some(id) = library {
        Ok(vec![id])
    } else {
        Ok(Vec::new())
    }
 }
 pub async fn search_photos(
    state: web::Data<AppState>,
    exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
    query: web::Query<SearchQuery>,
 ) -> ActixResult<HttpResponse> {
    let q_text = query.q.trim().to_string();
    if q_text.is_empty() {
        return Ok(HttpResponse::BadRequest().json(SearchError {
            error: "query parameter `q` is required".into(),
        }));
    }
    let limit = query.limit.clamp(1, 200);
    let offset = query.offset;
    let threshold = query.threshold.clamp(-1.0, 1.0);
    let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) {
        Ok(ids) => ids,
        Err(msg) => return Ok(HttpResponse::BadRequest().json(SearchError { error: msg })),
    };
    let scored = match score_photos(
        &state,
        &exif_dao,
        &q_text,
        &library_ids,
        threshold,
        query.model_version.as_deref(),
    )
    .await
    {
        Ok(s) => s,
        Err(e) => return Ok(score_error_response(e)),
    };
    let total_matching = scored.hits.len();
    // Pagination — slice the sorted list at `[offset, offset+limit)`. Offsets
    // past the end produce empty pages so "load more" stops naturally.
    let page: Vec<(f32, String)> = if offset >= total_matching {
        Vec::new()
    } else {
        let end = (offset + limit).min(total_matching);
        scored.hits[offset..end].to_vec()
    };
    let results = resolve_hits(&exif_dao, &page);
    Ok(HttpResponse::Ok().json(SearchResponse {
        query: q_text,
        model_version: scored.model_version,
        threshold,
        considered: scored.considered,
        total_matching,
        offset,
        results,
    }))
 }
 /// Map a [`ScoreError`] to the HTTP response `search_photos` historically
 /// returned for each failure mode. Reused by the unified endpoint.
 pub fn score_error_response(e: ScoreError) -> HttpResponse {
    match e {
        ScoreError::Disabled => HttpResponse::ServiceUnavailable().json(SearchError {
            error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(),
        }),
        ScoreError::Rejected(msg) => HttpResponse::BadRequest().json(SearchError {
            error: format!("query rejected: {msg}"),
        }),
        ScoreError::Unavailable(msg) => HttpResponse::BadGateway().json(SearchError {
            error: format!("CLIP service unavailable: {msg}"),
        }),
        ScoreError::MalformedEmbedding => HttpResponse::BadGateway().json(SearchError {
            error: "CLIP service returned a malformed query embedding".into(),
        }),
        ScoreError::Internal(msg) => {
            HttpResponse::InternalServerError().json(SearchError { error: msg })
        }
    }
 }
@@ -0,0 +1,246 @@
 //! CLIP-encoding pass for the file watcher.
 //!
 //! `process_clip_backlog` in `backfill.rs` calls [`run_clip_encoding_pass`]
 //! with the page of candidates returned by
 //! `ExifDao::list_clip_unencoded_candidates`. We walk those, fan out K
 //! parallel encode calls to Apollo, and persist the resulting embeddings
 //! into `image_exif.clip_embedding` / `clip_model_version`.
 //!
 //! Unlike the face pipeline, CLIP has no marker rows — a permanent
 //! failure (un-decodable bytes) leaves the row's `clip_embedding` NULL
 //! and the drain will retry on the next tick. For personal-library
 //! scale this is fine; the per-tick cap bounds the wasted work, and
 //! `file_types::is_image_file` filters out videos / non-media client-
 //! side so most permanent failures are decoded-but-corrupt files (rare).
 //!
 //! The watcher thread isn't in any pre-existing async context, so we
 //! build a short-lived tokio runtime per pass and `block_on` the join
 //! of K encode futures. Concurrency knob: `CLIP_ENCODE_CONCURRENCY`
 //! (default 4 — lower than faces because Apollo's CLIP path doesn't
 //! release the GIL between preprocess and forward as cleanly).
 use crate::ai::clip_client::{ClipClient, ClipError, EncodeImageMeta};
 use crate::database::ExifDao;
 use crate::exif;
 use crate::file_types;
 use crate::libraries::Library;
 use crate::memories::PathExcluder;
 use log::{debug, info, warn};
 use std::path::Path;
 use std::sync::{Arc, Mutex};
 use tokio::sync::Semaphore;
 /// One file the watcher would like to CLIP-encode. Built from the DAO
 /// `list_clip_unencoded_candidates` result — needs the `content_hash`
 /// for traceability in Apollo's log lines, even though the embedding
 /// itself is keyed on `(library_id, rel_path)` for the back-write.
 #[derive(Debug, Clone)]
 pub struct ClipCandidate {
    pub rel_path: String,
    pub content_hash: String,
 }
 /// Synchronous entry point. Returns once every candidate has been
 /// processed (or definitively skipped). No-op when the client is
 /// disabled so the caller can call unconditionally.
 pub fn run_clip_encoding_pass(
    library: &Library,
    excluded_dirs: &[String],
    clip_client: &ClipClient,
    exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
    candidates: Vec<ClipCandidate>,
 ) {
    if !clip_client.is_enabled() {
        return;
    }
    if candidates.is_empty() {
        return;
    }
    let base = Path::new(&library.root_path);
    let filtered = filter_excluded(base, excluded_dirs, candidates, Some(&library.name));
    if filtered.is_empty() {
        return;
    }
    let concurrency: usize = std::env::var("CLIP_ENCODE_CONCURRENCY")
        .ok()
        .and_then(|s| s.parse().ok())
        .filter(|n: &usize| *n > 0)
        .unwrap_or(4);
    info!(
        "clip_watch: encoding {} candidate(s) for library '{}' (concurrency {})",
        filtered.len(),
        library.name,
        concurrency
    );
    let rt = match tokio::runtime::Builder::new_multi_thread()
        .worker_threads(2)
        .enable_all()
        .build()
    {
        Ok(rt) => rt,
        Err(e) => {
            warn!("clip_watch: failed to build tokio runtime: {e}");
            return;
        }
    };
    let library_id = library.id;
    let library_root = library.root_path.clone();
    rt.block_on(async move {
        let sem = Arc::new(Semaphore::new(concurrency));
        let mut handles = Vec::with_capacity(filtered.len());
        for cand in filtered {
            let permit_sem = sem.clone();
            let clip_client = clip_client.clone();
            let exif_dao = exif_dao.clone();
            let library_root = library_root.clone();
            handles.push(tokio::spawn(async move {
                let _permit = permit_sem.acquire().await.expect("clip semaphore");
                process_one(library_id, &library_root, cand, &clip_client, exif_dao).await;
            }));
        }
        for h in handles {
            let _ = h.await;
        }
    });
 }
 async fn process_one(
    library_id: i32,
    library_root: &str,
    cand: ClipCandidate,
    clip_client: &ClipClient,
    exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
 ) {
    let abs = Path::new(library_root).join(&cand.rel_path);
    let bytes = match read_image_bytes_for_encode(&abs) {
        Ok(b) => b,
        Err(e) => {
            // Same rationale as face_watch: don't mark — the file may
            // have been moved/renamed mid-scan; let the next pass retry.
            warn!(
                "clip_watch: read failed for {} (lib {}): {}",
                cand.rel_path, library_id, e
            );
            return;
        }
    };
    let meta = EncodeImageMeta {
        content_hash: cand.content_hash.clone(),
        library_id,
        rel_path: cand.rel_path.clone(),
    };
    let ctx = opentelemetry::Context::current();
    match clip_client.encode_image(bytes, meta).await {
        Ok(resp) => {
            let emb_bytes = match resp.decode_embedding() {
                Ok(b) => b,
                Err(e) => {
                    warn!("clip_watch: bad embedding for {}: {:?}", cand.rel_path, e);
                    return;
                }
            };
            let mut dao = exif_dao.lock().expect("exif dao");
            if let Err(e) = dao.backfill_clip_embedding(
                &ctx,
                library_id,
                &cand.rel_path,
                &emb_bytes,
                &resp.model_version,
            ) {
                warn!(
                    "clip_watch: backfill_clip_embedding failed for {}: {:?}",
                    cand.rel_path, e
                );
                return;
            }
            debug!(
                "clip_watch: {} → dim={} ({}ms, {})",
                cand.rel_path, resp.embedding_dim, resp.duration_ms, resp.model_version
            );
        }
        Err(ClipError::Permanent(e)) => {
            // No marker — the row sits with NULL embedding and the drain
            // retries next pass. For personal-library scale the cost of
            // re-attempting permanently-broken files is bounded by the
            // per-tick cap. If this becomes a recurring noise source,
            // add a `clip_status` column with `failed` semantics like
            // face_detections has.
            warn!(
                "clip_watch: permanent failure on {} (will retry next pass): {}",
                cand.rel_path, e
            );
        }
        Err(ClipError::Transient(e)) => {
            debug!(
                "clip_watch: transient on {}: {} (will retry next pass)",
                cand.rel_path, e
            );
        }
        Err(ClipError::Disabled) => {
            // Defensive — the entry-point already checked is_enabled().
        }
    }
 }
 /// Drop candidates whose paths land in an excluded dir or whose
 /// extension isn't an image. Mirrors `face_watch::filter_excluded` so
 /// the two backlogs stay shape-consistent. Library name is passed
 /// purely for the log line that surfaces an exclusion hit.
 pub fn filter_excluded(
    base: &Path,
    excluded_dirs: &[String],
    candidates: Vec<ClipCandidate>,
    library_name: Option<&str>,
 ) -> Vec<ClipCandidate> {
    let excluder = if excluded_dirs.is_empty() {
        None
    } else {
        Some(PathExcluder::new(base, excluded_dirs))
    };
    candidates
        .into_iter()
        .filter(|c| {
            let abs = base.join(&c.rel_path);
            if !file_types::is_image_file(&abs) {
                debug!(
                    "clip_watch: skipping non-image '{}' (lib {})",
                    c.rel_path,
                    library_name.unwrap_or("<unknown>")
                );
                return false;
            }
            if let Some(ex) = excluder.as_ref()
                && ex.is_excluded(&abs)
            {
                debug!(
                    "clip_watch: skipping excluded '{}' (lib {})",
                    c.rel_path,
                    library_name.unwrap_or("<unknown>")
                );
                return false;
            }
            true
        })
        .collect()
 }
 /// Read image bytes for CLIP encoding. Same logic as
 /// `face_watch::read_image_bytes_for_detect` — RAW / HEIC files don't
 /// decode in Apollo's PIL pipeline, so we pull the embedded JPEG
 /// preview the thumbnail pipeline already extracts. Plain JPEG / PNG /
 /// WebP go through a direct read.
 pub fn read_image_bytes_for_encode(path: &Path) -> std::io::Result<Vec<u8>> {
    if file_types::needs_ffmpeg_thumbnail(path)
        && let Some(preview) = exif::extract_embedded_jpeg_preview(path)
    {
        return Ok(preview);
    }
    std::fs::read(path)
 }
@@ -50,15 +50,55 @@ pub fn thumbnail_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
    thumbs_dir.join(shard).join(format!("{}.jpg", hash))
 }
 /// Hash-keyed large-preview path: `<thumbs_dir>/_large/<hash[..2]>/<hash>.jpg`.
 /// Kept under the same root as 200px thumbs so deployments don't need a
 /// second env var, but namespaced under `_large/` so the existing 200px
 /// shards don't collide with the larger derivative.
 pub fn large_preview_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
    let shard = shard_prefix(hash);
    thumbs_dir
        .join("_large")
        .join(shard)
        .join(format!("{}.jpg", hash))
 }
 /// Hash-keyed xlarge-preview path: `<thumbs_dir>/_xlarge/<hash[..2]>/<hash>.jpg`.
 pub fn xlarge_preview_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
    let shard = shard_prefix(hash);
    thumbs_dir
        .join("_xlarge")
        .join(shard)
        .join(format!("{}.jpg", hash))
 }
 /// Hash-keyed HLS output directory: `<video_dir>/<hash[..2]>/<hash>/`.
 /// The playlist lives at `playlist.m3u8` inside this directory and its
-/// segments are co-located so HLS relative references Just Work.
+/// segments are co-located so HLS relative references Just Work. See
-#[allow(dead_code)]
+/// [`crate::video::hls_paths`] for the filename constants and the
 /// per-file helpers built on this dir.
 pub fn hls_dir(video_dir: &Path, hash: &str) -> PathBuf {
    let shard = shard_prefix(hash);
    video_dir.join(shard).join(hash)
 }
 /// Library-scoped legacy mirrored path:
 /// `<derivative_dir>/<library_id>/<rel_path>`. Used as the fallback when
 /// `content_hash` isn't available — the library prefix prevents the
 /// "lib1 wrote `vacation/IMG.jpg` first, lib2 sees thumb_path.exists()
 /// and serves the wrong image" failure mode.
 ///
 /// Existing single-library deployments may already have thumbnails at the
 /// bare-legacy `<derivative_dir>/<rel_path>` shape; serving code is
 /// expected to check both this scoped path and the bare-legacy path so
 /// nothing 404s during the transition.
 pub fn library_scoped_legacy_path(
    derivative_dir: &Path,
    library_id: i32,
    rel_path: impl AsRef<Path>,
 ) -> PathBuf {
    derivative_dir.join(library_id.to_string()).join(rel_path)
 }
 fn shard_prefix(hash: &str) -> &str {
    let end = hash
        .char_indices()
@@ -101,8 +141,24 @@ mod tests {
        let p = thumbnail_path(thumbs, "abcdef0123");
        assert_eq!(p, PathBuf::from("/tmp/thumbs/ab/abcdef0123.jpg"));
        let l = large_preview_path(thumbs, "abcdef0123");
        assert_eq!(l, PathBuf::from("/tmp/thumbs/_large/ab/abcdef0123.jpg"));
        let video = Path::new("/tmp/video");
        let d = hls_dir(video, "1234deadbeef");
        assert_eq!(d, PathBuf::from("/tmp/video/12/1234deadbeef"));
    }
    #[test]
    fn library_scoped_legacy_path_prefixes_with_library_id() {
        let thumbs = Path::new("/tmp/thumbs");
        let p = library_scoped_legacy_path(thumbs, 7, "vacation/IMG.jpg");
        assert_eq!(p, PathBuf::from("/tmp/thumbs/7/vacation/IMG.jpg"));
        // Same rel_path, different library — different output. This is
        // the whole point: lib 1 and lib 2 don't clobber each other.
        let p1 = library_scoped_legacy_path(thumbs, 1, "vacation/IMG.jpg");
        let p2 = library_scoped_legacy_path(thumbs, 2, "vacation/IMG.jpg");
        assert_ne!(p1, p2);
    }
 }
@@ -165,6 +165,15 @@ pub struct FilesRequest {
    /// Optional library filter. Accepts a library id (e.g. "1") or name
    /// (e.g. "main"). When omitted, results span all libraries.
    pub library: Option<String>,
    /// When true, include rows soft-marked as duplicates of another file
    /// (i.e. `image_exif.duplicate_of_hash IS NOT NULL`). Default false —
    /// the standard /photos listing hides demoted siblings so the grid
    /// silently shrinks after a resolve. The Apollo duplicates modal
    /// passes `true` so it can show both survivors and demoted members
    /// inside a group.
    #[serde(default)]
    pub include_duplicates: Option<bool>,
 }
 #[derive(Copy, Clone, Deserialize, PartialEq, Debug)]
@@ -185,6 +194,8 @@ pub enum MediaType {
 #[serde(rename_all = "lowercase")]
 pub enum PhotoSize {
    Full,
    XLarge,
    Large,
    Thumb,
 }
@@ -277,6 +288,16 @@ pub struct ExifMetadata {
    pub gps: Option<GpsCoordinates>,
    pub capture_settings: Option<CaptureSettings>,
    pub date_taken: Option<i64>,
    /// Which step of the canonical-date waterfall populated `date_taken`:
    /// `"exif" | "exiftool" | "filename" | "fs_time" | "manual"`. NULL when
    /// `date_taken` itself is NULL.
    pub date_taken_source: Option<String>,
    /// When `date_taken_source = "manual"`, the prior `date_taken` snapshot.
    /// Used by the UI's revert affordance and to label "manually overridden;
    /// originally X" in the details modal.
    pub original_date_taken: Option<i64>,
    /// When `date_taken_source = "manual"`, the prior source.
    pub original_date_taken_source: Option<String>,
 }
 #[derive(Debug, Serialize)]
@@ -361,6 +382,9 @@ impl From<ImageExif> for ExifMetadata {
                None
            },
            date_taken: exif.date_taken,
            date_taken_source: exif.date_taken_source,
            original_date_taken: exif.original_date_taken,
            original_date_taken_source: exif.original_date_taken_source,
        }
    }
 }
@@ -222,11 +222,12 @@ impl CalendarEventDao for SqliteCalendarEventDao {
            // Validate embedding dimensions if provided
            if let Some(ref emb) = event.embedding
-                && emb.len() != 768
+                && emb.len() != crate::ai::embedding_dim()
            {
                return Err(anyhow::anyhow!(
-                    "Invalid embedding dimensions: {} (expected 768)",
+                    "Invalid embedding dimensions: {} (expected {})",
-                    emb.len()
+                    emb.len(),
                    crate::ai::embedding_dim()
                ));
            }
@@ -274,7 +275,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
                source_file: event.source_file,
            })
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
    fn store_events_batch(
@@ -293,7 +294,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
                for event in events {
                    // Validate embedding if provided
                    if let Some(ref emb) = event.embedding
-                        && emb.len() != 768
+                        && emb.len() != crate::ai::embedding_dim()
                    {
                        log::warn!(
                            "Skipping event with invalid embedding dimensions: {}",
@@ -348,7 +349,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
            Ok(inserted)
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
    fn find_events_in_range(
@@ -373,7 +374,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
            .map(|rows| rows.into_iter().map(|r| r.to_calendar_event()).collect())
            .map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn find_similar_events(
@@ -385,10 +386,11 @@ impl CalendarEventDao for SqliteCalendarEventDao {
        trace_db_call(context, "query", "find_similar_events", |_span| {
            let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");
-            if query_embedding.len() != 768 {
+            if query_embedding.len() != crate::ai::embedding_dim() {
                return Err(anyhow::anyhow!(
-                    "Invalid query embedding dimensions: {} (expected 768)",
+                    "Invalid query embedding dimensions: {} (expected {})",
-                    query_embedding.len()
+                    query_embedding.len(),
                    crate::ai::embedding_dim()
                ));
            }
@@ -429,7 +431,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
            Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn find_relevant_events_hybrid(
@@ -461,10 +463,11 @@ impl CalendarEventDao for SqliteCalendarEventDao {
            // Step 2: If query embedding provided, rank by semantic similarity
            if let Some(query_emb) = query_embedding {
-                if query_emb.len() != 768 {
+                if query_emb.len() != crate::ai::embedding_dim() {
                    return Err(anyhow::anyhow!(
-                        "Invalid query embedding dimensions: {} (expected 768)",
+                        "Invalid query embedding dimensions: {} (expected {})",
-                        query_emb.len()
+                        query_emb.len(),
                        crate::ai::embedding_dim()
                    ));
                }
@@ -500,7 +503,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
                Ok(events_in_range.into_iter().take(limit).map(|r| r.to_calendar_event()).collect())
            }
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn event_exists(
@@ -528,7 +531,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
            Ok(result.count > 0)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
@@ -551,6 +554,6 @@ impl CalendarEventDao for SqliteCalendarEventDao {
            Ok(result.count)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }
@@ -75,6 +75,11 @@ pub trait DailySummaryDao: Sync + Send {
        context: &opentelemetry::Context,
        contact: &str,
    ) -> Result<i64, DbError>;
    /// Cheap presence check — returns true iff at least one daily summary row
    /// exists. Used by gating logic that only needs "is the table empty?",
    /// avoiding a `COUNT(*)` full scan on large corpora.
    fn has_any_summaries(&mut self, context: &opentelemetry::Context) -> Result<bool, DbError>;
 }
 pub struct SqliteDailySummaryDao {
@@ -145,10 +150,11 @@ impl DailySummaryDao for SqliteDailySummaryDao {
                .expect("Unable to get DailySummaryDao");
            // Validate embedding dimensions
-            if summary.embedding.len() != 768 {
+            if summary.embedding.len() != crate::ai::embedding_dim() {
                return Err(anyhow::anyhow!(
-                    "Invalid embedding dimensions: {} (expected 768)",
+                    "Invalid embedding dimensions: {} (expected {})",
-                    summary.embedding.len()
+                    summary.embedding.len(),
                    crate::ai::embedding_dim()
                ));
            }
@@ -185,7 +191,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
                model_version: summary.model_version,
            })
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
    fn find_similar_summaries(
@@ -197,10 +203,11 @@ impl DailySummaryDao for SqliteDailySummaryDao {
        trace_db_call(context, "query", "find_similar_summaries", |_span| {
            let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao");
-            if query_embedding.len() != 768 {
+            if query_embedding.len() != crate::ai::embedding_dim() {
                return Err(anyhow::anyhow!(
-                    "Invalid query embedding dimensions: {} (expected 768)",
+                    "Invalid query embedding dimensions: {} (expected {})",
-                    query_embedding.len()
+                    query_embedding.len(),
                    crate::ai::embedding_dim()
                ));
            }
@@ -281,7 +288,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
            Ok(top_results)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn find_similar_summaries_with_time_weight(
@@ -294,10 +301,11 @@ impl DailySummaryDao for SqliteDailySummaryDao {
        trace_db_call(context, "query", "find_similar_summaries_with_time_weight", |_span| {
            let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao");
-            if query_embedding.len() != 768 {
+            if query_embedding.len() != crate::ai::embedding_dim() {
                return Err(anyhow::anyhow!(
-                    "Invalid query embedding dimensions: {} (expected 768)",
+                    "Invalid query embedding dimensions: {} (expected {})",
-                    query_embedding.len()
+                    query_embedding.len(),
                    crate::ai::embedding_dim()
                ));
            }
@@ -403,7 +411,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
            Ok(top_results)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn summary_exists(
@@ -430,7 +438,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
            Ok(count > 0)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_summary_count(
@@ -452,7 +460,31 @@ impl DailySummaryDao for SqliteDailySummaryDao {
            .map(|r| r.count)
            .map_err(|e| anyhow::anyhow!("Count query error: {:?}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn has_any_summaries(&mut self, context: &opentelemetry::Context) -> Result<bool, DbError> {
        trace_db_call(context, "query", "has_any_summaries", |_span| {
            let mut conn = self
                .connection
                .lock()
                .expect("Unable to get DailySummaryDao");
            #[derive(QueryableByName)]
            struct ProbeResult {
                #[diesel(sql_type = diesel::sql_types::Integer)]
                #[allow(dead_code)]
                one: i32,
            }
            let rows: Vec<ProbeResult> =
                diesel::sql_query("SELECT 1 as one FROM daily_conversation_summaries LIMIT 1")
                    .load(conn.deref_mut())
                    .map_err(|e| anyhow::anyhow!("Failed to probe daily summaries: {}", e))?;
            Ok(!rows.is_empty())
        })
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }
@@ -0,0 +1,681 @@
 use diesel::prelude::*;
 use diesel::sqlite::SqliteConnection;
 use std::ops::DerefMut;
 use std::sync::{Arc, Mutex};
 use crate::database::models::{
    InsertInsightGenerationJob, InsightGenerationJob, InsightGenerationType, InsightJobStatus,
 };
 use crate::database::schema;
 use crate::database::{DbError, DbErrorKind, connect};
 use crate::otel::trace_db_call;
 /// Tracks async insight generation jobs. Each call to `create_job` inserts
 /// a new row; the application layer prevents concurrent running jobs by
 /// cancelling the old one before creating a new one.
 pub trait InsightGenerationJobDao: Sync + Send {
    /// Insert a new running job. Always creates a new row (no upsert).
    /// Cleans up terminal-state rows for the same key first.
    fn create_job(
        &mut self,
        context: &opentelemetry::Context,
        library_id: i32,
        file_path: &str,
        generation_type: InsightGenerationType,
    ) -> Result<i32, DbError>;
    /// Mark a job as completed with the resulting insight id. Only updates
    /// if the job is still in "running" status (prevents overwriting a
    /// cancelled job with a late-completing task).
    fn complete_job(
        &mut self,
        context: &opentelemetry::Context,
        job_id: i32,
        insight_id: i32,
    ) -> Result<(), DbError>;
    /// Mark a job as failed with an error message. Only updates if the job
    /// is still in "running" status.
    fn fail_job(
        &mut self,
        context: &opentelemetry::Context,
        job_id: i32,
        error_message: &str,
    ) -> Result<(), DbError>;
    /// Cancel a specific job by id. Only updates if the job is still
    /// in "running" status. Returns true if a row was updated.
    fn cancel_job(
        &mut self,
        context: &opentelemetry::Context,
        job_id: i32,
    ) -> Result<bool, DbError>;
    /// Cancel all running jobs for a given file. Returns the number of
    /// jobs cancelled.
    fn cancel_active_jobs(
        &mut self,
        context: &opentelemetry::Context,
        library_id: i32,
        file_path: &str,
    ) -> Result<usize, DbError>;
    /// Find the latest running job for a given file. Returns None if no
    /// running job exists.
    fn get_active_job(
        &mut self,
        context: &opentelemetry::Context,
        library_id: i32,
        file_path: &str,
    ) -> Result<Option<InsightGenerationJob>, DbError>;
    /// Find any job by id regardless of status.
    fn get_job_by_id(
        &mut self,
        context: &opentelemetry::Context,
        job_id: i32,
    ) -> Result<Option<InsightGenerationJob>, DbError>;
    /// Mark all jobs still in "running" status as "failed" with a recovery
    /// error message. Returns the number of jobs recovered.
    fn recover_orphaned_jobs(&mut self, context: &opentelemetry::Context)
    -> Result<usize, DbError>;
 }
 pub struct SqliteInsightGenerationJobDao {
    connection: Arc<Mutex<SqliteConnection>>,
 }
 impl Default for SqliteInsightGenerationJobDao {
    fn default() -> Self {
        Self::new()
    }
 }
 impl SqliteInsightGenerationJobDao {
    pub fn new() -> Self {
        Self {
            connection: Arc::new(Mutex::new(connect())),
        }
    }
    #[cfg(test)]
    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
        Self { connection: conn }
    }
 }
 impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
    fn create_job(
        &mut self,
        context: &opentelemetry::Context,
        library_id: i32,
        file_path: &str,
        generation_type: InsightGenerationType,
    ) -> Result<i32, DbError> {
        trace_db_call(context, "insert", "create_job", |_span| {
            use schema::insight_generation_jobs::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock InsightGenerationJobDao");
            let now = std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .expect("Time went backwards")
                .as_secs() as i64;
            let new_job = InsertInsightGenerationJob {
                library_id,
                path: file_path.to_string(),
                gen_type: generation_type.to_string(),
                status: InsightJobStatus::Running.to_string(),
                started_at: now,
            };
            diesel::insert_into(dsl::insight_generation_jobs)
                .values(&new_job)
                .execute(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Failed to insert job: {}", e))?;
            dsl::insight_generation_jobs
                .filter(
                    dsl::library_id
                        .eq(library_id)
                        .and(dsl::file_path.eq(file_path))
                        .and(dsl::generation_type.eq(generation_type.as_str()))
                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
                )
                .select(dsl::id)
                .order(dsl::id.desc())
                .first::<i32>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Failed to get job id: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn complete_job(
        &mut self,
        context: &opentelemetry::Context,
        job_id: i32,
        insight_id: i32,
    ) -> Result<(), DbError> {
        trace_db_call(context, "update", "complete_job", |_span| {
            use schema::insight_generation_jobs::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock InsightGenerationJobDao");
            let now = std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .expect("Time went backwards")
                .as_secs() as i64;
            // Only update if still running — prevents cancelled job from
            // being overwritten by a late-completing task.
            diesel::update(
                dsl::insight_generation_jobs.filter(
                    dsl::id
                        .eq(job_id)
                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
                ),
            )
            .set((
                dsl::status.eq(InsightJobStatus::Completed.as_str()),
                dsl::completed_at.eq(Some(now)),
                dsl::result_insight_id.eq(Some(insight_id)),
            ))
            .execute(connection.deref_mut())
            .map(|_| ())
            .map_err(|e| anyhow::anyhow!("Failed to complete job: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
    fn fail_job(
        &mut self,
        context: &opentelemetry::Context,
        job_id: i32,
        error_message: &str,
    ) -> Result<(), DbError> {
        trace_db_call(context, "update", "fail_job", |_span| {
            use schema::insight_generation_jobs::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock InsightGenerationJobDao");
            let now = std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .expect("Time went backwards")
                .as_secs() as i64;
            // Only update if still running.
            diesel::update(
                dsl::insight_generation_jobs.filter(
                    dsl::id
                        .eq(job_id)
                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
                ),
            )
            .set((
                dsl::status.eq(InsightJobStatus::Failed.as_str()),
                dsl::completed_at.eq(Some(now)),
                dsl::error_message.eq(Some(error_message.to_string())),
            ))
            .execute(connection.deref_mut())
            .map(|_| ())
            .map_err(|e| anyhow::anyhow!("Failed to fail job: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
    fn cancel_job(
        &mut self,
        context: &opentelemetry::Context,
        job_id: i32,
    ) -> Result<bool, DbError> {
        trace_db_call(context, "update", "cancel_job", |_span| {
            use schema::insight_generation_jobs::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock InsightGenerationJobDao");
            let now = std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .expect("Time went backwards")
                .as_secs() as i64;
            let rows = diesel::update(
                dsl::insight_generation_jobs.filter(
                    dsl::id
                        .eq(job_id)
                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
                ),
            )
            .set((
                dsl::status.eq(InsightJobStatus::Cancelled.as_str()),
                dsl::completed_at.eq(Some(now)),
                dsl::error_message.eq(Some("cancelled by user".to_string())),
            ))
            .execute(connection.deref_mut())
            .map_err(|e| anyhow::anyhow!("Failed to cancel job: {}", e))?;
            Ok(rows > 0)
        })
        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
    fn cancel_active_jobs(
        &mut self,
        context: &opentelemetry::Context,
        library_id: i32,
        file_path: &str,
    ) -> Result<usize, DbError> {
        trace_db_call(context, "update", "cancel_active_jobs", |_span| {
            use schema::insight_generation_jobs::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock InsightGenerationJobDao");
            let now = std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .expect("Time went backwards")
                .as_secs() as i64;
            let rows = diesel::update(
                dsl::insight_generation_jobs.filter(
                    dsl::library_id
                        .eq(library_id)
                        .and(dsl::file_path.eq(file_path))
                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
                ),
            )
            .set((
                dsl::status.eq(InsightJobStatus::Cancelled.as_str()),
                dsl::completed_at.eq(Some(now)),
                dsl::error_message.eq(Some("cancelled by newer request".to_string())),
            ))
            .execute(connection.deref_mut())
            .map_err(|e| anyhow::anyhow!("Failed to cancel active jobs: {}", e))?;
            Ok(rows)
        })
        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
    fn get_active_job(
        &mut self,
        context: &opentelemetry::Context,
        library_id: i32,
        file_path: &str,
    ) -> Result<Option<InsightGenerationJob>, DbError> {
        trace_db_call(context, "query", "get_active_job", |_span| {
            use schema::insight_generation_jobs::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock InsightGenerationJobDao");
            dsl::insight_generation_jobs
                .filter(
                    dsl::library_id
                        .eq(library_id)
                        .and(dsl::file_path.eq(file_path))
                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
                )
                .order(dsl::id.desc())
                .first::<InsightGenerationJob>(connection.deref_mut())
                .optional()
                .map_err(|e| anyhow::anyhow!("Failed to get active job: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_job_by_id(
        &mut self,
        context: &opentelemetry::Context,
        job_id: i32,
    ) -> Result<Option<InsightGenerationJob>, DbError> {
        trace_db_call(context, "query", "get_job_by_id", |_span| {
            use schema::insight_generation_jobs::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock InsightGenerationJobDao");
            dsl::insight_generation_jobs
                .filter(dsl::id.eq(job_id))
                .first::<InsightGenerationJob>(connection.deref_mut())
                .optional()
                .map_err(|e| anyhow::anyhow!("Failed to get job: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn recover_orphaned_jobs(
        &mut self,
        context: &opentelemetry::Context,
    ) -> Result<usize, DbError> {
        trace_db_call(context, "update", "recover_orphaned_jobs", |_span| {
            use schema::insight_generation_jobs::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock InsightGenerationJobDao");
            let now = std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .expect("Time went backwards")
                .as_secs() as i64;
            let rows = diesel::update(
                dsl::insight_generation_jobs
                    .filter(dsl::status.eq(InsightJobStatus::Running.as_str())),
            )
            .set((
                dsl::status.eq(InsightJobStatus::Failed.as_str()),
                dsl::completed_at.eq(Some(now)),
                dsl::error_message.eq(Some("server crashed while running".to_string())),
            ))
            .execute(connection.deref_mut())
            .map_err(|e| anyhow::anyhow!("Failed to recover orphaned jobs: {}", e))?;
            Ok(rows)
        })
        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use diesel::Connection;
    use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
    const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
    fn setup_dao() -> SqliteInsightGenerationJobDao {
        let mut conn = SqliteConnection::establish(":memory:")
            .expect("Unable to create in-memory db connection");
        conn.run_pending_migrations(DB_MIGRATIONS)
            .expect("Failure running DB migrations");
        SqliteInsightGenerationJobDao::from_connection(Arc::new(Mutex::new(conn)))
    }
    fn ctx() -> opentelemetry::Context {
        opentelemetry::Context::new()
    }
    #[test]
    fn create_job_inserts_new_row() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let job_id_1 = dao
            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
            .unwrap();
        let job_id_2 = dao
            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
            .unwrap();
        assert_ne!(job_id_1, job_id_2, "each create_job call inserts a new row");
    }
    #[test]
    fn complete_job_sets_result() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let job_id = dao
            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
            .unwrap();
        dao.complete_job(&ctx, job_id, 42).unwrap();
        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
        assert_eq!(job.status, InsightJobStatus::Completed.as_str());
        assert_eq!(job.result_insight_id, Some(42));
        assert!(job.completed_at.is_some());
    }
    #[test]
    fn fail_job_sets_error() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let job_id = dao
            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Agentic)
            .unwrap();
        dao.fail_job(&ctx, job_id, "model timeout").unwrap();
        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
        assert_eq!(job.status, InsightJobStatus::Failed.as_str());
        assert_eq!(job.error_message.as_deref(), Some("model timeout"));
        assert!(job.completed_at.is_some());
    }
    #[test]
    fn get_active_job_returns_none_when_completed() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let job_id = dao
            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
            .unwrap();
        // Job is running
        let active = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
        assert!(active.is_some());
        assert_eq!(active.unwrap().id, job_id);
        // Complete it
        dao.complete_job(&ctx, job_id, 1).unwrap();
        // No longer active
        let active = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
        assert!(active.is_none());
    }
    #[test]
    fn cancel_active_jobs() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let job_id = dao
            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
            .unwrap();
        let cancelled = dao.cancel_active_jobs(&ctx, 1, "photos/test.jpg").unwrap();
        assert_eq!(cancelled, 1, "should cancel 1 running job");
        // Job is no longer active
        let active = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
        assert!(active.is_none());
        // Job exists with cancelled status
        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
        assert_eq!(job.status, InsightJobStatus::Cancelled.as_str());
        // Cancelling again returns 0 (nothing to cancel)
        let cancelled2 = dao.cancel_active_jobs(&ctx, 1, "photos/test.jpg").unwrap();
        assert_eq!(cancelled2, 0, "should return 0 when no running job");
    }
    #[test]
    fn get_active_job_scoped_by_library() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let job_id_1 = dao
            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
            .unwrap();
        let job_id_2 = dao
            .create_job(&ctx, 2, "photos/test.jpg", InsightGenerationType::Standard)
            .unwrap();
        assert_ne!(
            job_id_1, job_id_2,
            "different libraries should have separate jobs"
        );
        // Complete lib1's job
        dao.complete_job(&ctx, job_id_1, 1).unwrap();
        // lib1 has no active job
        let active1 = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
        assert!(active1.is_none());
        // lib2 still has active job
        let active2 = dao.get_active_job(&ctx, 2, "photos/test.jpg").unwrap();
        assert!(active2.is_some());
        assert_eq!(active2.unwrap().id, job_id_2);
    }
    #[test]
    fn get_job_by_id_finds_any_status() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let job_id = dao
            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
            .unwrap();
        // Find while running
        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
        assert_eq!(job.status, InsightJobStatus::Running.as_str());
        // Complete it
        dao.complete_job(&ctx, job_id, 99).unwrap();
        // Still findable
        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
        assert_eq!(job.status, InsightJobStatus::Completed.as_str());
        assert_eq!(job.result_insight_id, Some(99));
    }
    #[test]
    fn recover_orphaned_jobs() {
        let mut dao = setup_dao();
        let ctx = ctx();
        // Create two running jobs
        let job_id_1 = dao
            .create_job(&ctx, 1, "photos/a.jpg", InsightGenerationType::Standard)
            .unwrap();
        let job_id_2 = dao
            .create_job(&ctx, 1, "photos/b.jpg", InsightGenerationType::Agentic)
            .unwrap();
        // Complete one
        dao.complete_job(&ctx, job_id_1, 1).unwrap();
        // Recover should only affect the running job
        let recovered = dao.recover_orphaned_jobs(&ctx).unwrap();
        assert_eq!(recovered, 1, "should recover exactly 1 running job");
        // job_id_1 is still completed
        let job1 = dao.get_job_by_id(&ctx, job_id_1).unwrap().unwrap();
        assert_eq!(job1.status, InsightJobStatus::Completed.as_str());
        // job_id_2 is now failed with recovery message
        let job2 = dao.get_job_by_id(&ctx, job_id_2).unwrap().unwrap();
        assert_eq!(job2.status, InsightJobStatus::Failed.as_str());
        assert_eq!(
            job2.error_message.as_deref(),
            Some("server crashed while running")
        );
        // Second recovery is a no-op
        let recovered2 = dao.recover_orphaned_jobs(&ctx).unwrap();
        assert_eq!(recovered2, 0, "no running jobs remain");
    }
    #[test]
    fn complete_job_noop_when_cancelled() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let job_id = dao
            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
            .unwrap();
        dao.cancel_job(&ctx, job_id).unwrap();
        // Late-completing task tries to mark as completed — should be a no-op
        dao.complete_job(&ctx, job_id, 42).unwrap();
        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
        assert_eq!(
            job.status,
            InsightJobStatus::Cancelled.as_str(),
            "cancelled status must not be overwritten by late complete"
        );
        assert_eq!(
            job.result_insight_id, None,
            "insight_id must stay None when complete is a no-op"
        );
    }
    #[test]
    fn fail_job_noop_when_cancelled() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let job_id = dao
            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Agentic)
            .unwrap();
        dao.cancel_job(&ctx, job_id).unwrap();
        // Late-failing task tries to mark as failed — should be a no-op
        dao.fail_job(&ctx, job_id, "timeout after 120s").unwrap();
        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
        assert_eq!(
            job.status,
            InsightJobStatus::Cancelled.as_str(),
            "cancelled status must not be overwritten by late fail"
        );
        assert_eq!(
            job.error_message.as_deref(),
            Some("cancelled by user"),
            "error_message must reflect the cancel, not the late fail"
        );
    }
    #[test]
    fn cancel_job_by_id() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let job_id = dao
            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
            .unwrap();
        let cancelled = dao.cancel_job(&ctx, job_id).unwrap();
        assert!(cancelled, "should cancel running job");
        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
        assert_eq!(job.status, InsightJobStatus::Cancelled.as_str());
        assert!(job.completed_at.is_some());
        // Cancelling again is a no-op
        let cancelled2 = dao.cancel_job(&ctx, job_id).unwrap();
        assert!(!cancelled2, "already cancelled job should return false");
    }
 }
@@ -21,6 +21,22 @@ pub trait InsightDao: Sync + Send {
        file_path: &str,
    ) -> Result<Option<PhotoInsight>, DbError>;
    /// Library-scoped variant of `get_insight`. The default `get_insight`
    /// finds any `is_current=true` row matching `file_path` across
    /// libraries — fine for the photo-grid metadata fetch (cross-library
    /// merge), wrong for the chat path: a regenerate on lib1 flips lib1's
    /// row to `is_current=false` and inserts a new lib1 row, but
    /// lib2's untouched `is_current=true` row for the same rel_path
    /// would still satisfy the path-only query and shadow the regen on
    /// the next history fetch. Always pass a library_id when you have
    /// one (chat / insight write paths always do).
    fn get_current_insight_for_library(
        &mut self,
        context: &opentelemetry::Context,
        library_id: i32,
        file_path: &str,
    ) -> Result<Option<PhotoInsight>, DbError>;
    /// Return the most recent current insight whose rel_path is one of
    /// `paths`. Used for content-hash sharing: the caller expands a
    /// single file into all rel_paths with the same content_hash, then
@@ -31,7 +47,6 @@ pub trait InsightDao: Sync + Send {
        paths: &[String],
    ) -> Result<Option<PhotoInsight>, DbError>;
    #[allow(dead_code)]
    fn get_insight_history(
        &mut self,
        context: &opentelemetry::Context,
@@ -66,6 +81,17 @@ pub trait InsightDao: Sync + Send {
        approved: bool,
    ) -> Result<(), DbError>;
    /// Rate a specific insight version by primary key, regardless of
    /// `is_current`. Used by the per-file history view to approve/reject
    /// previously generated (superseded) versions, which the path-based
    /// `rate_insight` (current row only) cannot reach.
    fn rate_insight_by_id(
        &mut self,
        context: &opentelemetry::Context,
        insight_id: i32,
        approved: bool,
    ) -> Result<(), DbError>;
    fn get_approved_insights(
        &mut self,
        context: &opentelemetry::Context,
@@ -74,13 +100,15 @@ pub trait InsightDao: Sync + Send {
    /// Replace the `training_messages` JSON blob on the current row for
    /// `(library_id, rel_path)`. Used by chat-turn append mode to persist
    /// the extended conversation without inserting a new insight version.
    /// Returns the number of rows affected (0 if no current row matched,
    /// indicating a concurrent regenerate/reconcile flipped `is_current`).
    fn update_training_messages(
        &mut self,
        context: &opentelemetry::Context,
        library_id: i32,
        file_path: &str,
        training_messages_json: &str,
-    ) -> Result<(), DbError>;
+    ) -> Result<usize, DbError>;
 }
 pub struct SqliteInsightDao {
@@ -111,13 +139,30 @@ impl InsightDao for SqliteInsightDao {
    fn store_insight(
        &mut self,
        context: &opentelemetry::Context,
-        insight: InsertPhotoInsight,
+        mut insight: InsertPhotoInsight,
    ) -> Result<PhotoInsight, DbError> {
        trace_db_call(context, "insert", "store_insight", |_span| {
            use schema::photo_insights::dsl::*;
            let mut connection = self.connection.lock().expect("Unable to get InsightDao");
            // Eagerly populate content_hash so this insight follows the
            // bytes (CLAUDE.md "Multi-library data model"). Caller-
            // supplied hash wins; otherwise look it up from image_exif
            // for the (library_id, rel_path) tuple. None is acceptable —
            // reconciliation backfills it once the hash lands.
            if insight.content_hash.is_none() {
                use schema::image_exif as ie;
                insight.content_hash = ie::table
                    .filter(ie::library_id.eq(insight.library_id))
                    .filter(ie::rel_path.eq(&insight.file_path))
                    .filter(ie::content_hash.is_not_null())
                    .select(ie::content_hash)
                    .first::<Option<String>>(connection.deref_mut())
                    .ok()
                    .flatten();
            }
            // Mark all existing insights for this file as no longer current
            diesel::update(
                photo_insights
@@ -126,13 +171,13 @@ impl InsightDao for SqliteInsightDao {
            )
            .set(is_current.eq(false))
            .execute(connection.deref_mut())
-            .map_err(|_| anyhow::anyhow!("Update is_current error"))?;
+            .map_err(|e| anyhow::anyhow!("Failed to flip is_current: {}", e))?;
            // Insert the new insight as current
            diesel::insert_into(photo_insights)
                .values(&insight)
                .execute(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Insert error"))?;
+                .map_err(|e| anyhow::anyhow!("Failed to insert insight: {}", e))?;
            // Retrieve the inserted record (is_current = true)
            photo_insights
@@ -140,9 +185,12 @@ impl InsightDao for SqliteInsightDao {
                .filter(rel_path.eq(&insight.file_path))
                .filter(is_current.eq(true))
                .first::<PhotoInsight>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Failed to retrieve inserted insight: {}", e))
        })
        .map_err(|e| {
            log::error!("store_insight failed: {}", e);
            DbError::new(DbErrorKind::InsertError)
        })
        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }
    fn get_insight(
@@ -160,9 +208,36 @@ impl InsightDao for SqliteInsightDao {
                .filter(is_current.eq(true))
                .first::<PhotoInsight>(connection.deref_mut())
                .optional()
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_current_insight_for_library(
        &mut self,
        context: &opentelemetry::Context,
        lib_id: i32,
        path: &str,
    ) -> Result<Option<PhotoInsight>, DbError> {
        trace_db_call(
            context,
            "query",
            "get_current_insight_for_library",
            |_span| {
                use schema::photo_insights::dsl::*;
                let mut connection = self.connection.lock().expect("Unable to get InsightDao");
                photo_insights
                    .filter(library_id.eq(lib_id))
                    .filter(rel_path.eq(path))
                    .filter(is_current.eq(true))
                    .first::<PhotoInsight>(connection.deref_mut())
                    .optional()
                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
            },
        )
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_insight_for_paths(
@@ -184,9 +259,9 @@ impl InsightDao for SqliteInsightDao {
                .order(generated_at.desc())
                .first::<PhotoInsight>(connection.deref_mut())
                .optional()
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_insight_history(
@@ -203,9 +278,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(rel_path.eq(path))
                .order(generated_at.desc())
                .load::<PhotoInsight>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_insight_by_id(
@@ -222,9 +297,9 @@ impl InsightDao for SqliteInsightDao {
                .find(insight_id)
                .first::<PhotoInsight>(connection.deref_mut())
                .optional()
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn delete_insight(
@@ -240,9 +315,9 @@ impl InsightDao for SqliteInsightDao {
            diesel::delete(photo_insights.filter(rel_path.eq(path)))
                .execute(connection.deref_mut())
                .map(|_| ())
-                .map_err(|_| anyhow::anyhow!("Delete error"))
+                .map_err(|e| anyhow::anyhow!("Delete error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_all_insights(
@@ -258,9 +333,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(is_current.eq(true))
                .order(generated_at.desc())
                .load::<PhotoInsight>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn rate_insight(
@@ -282,9 +357,29 @@ impl InsightDao for SqliteInsightDao {
            .set(approved.eq(Some(is_approved)))
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|_| anyhow::anyhow!("Update error"))
+            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
    fn rate_insight_by_id(
        &mut self,
        context: &opentelemetry::Context,
        target_id: i32,
        is_approved: bool,
    ) -> Result<(), DbError> {
        trace_db_call(context, "update", "rate_insight_by_id", |_span| {
            use schema::photo_insights::dsl::*;
            let mut connection = self.connection.lock().expect("Unable to get InsightDao");
            diesel::update(photo_insights.find(target_id))
                .set(approved.eq(Some(is_approved)))
                .execute(connection.deref_mut())
                .map(|_| ())
                .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
    fn get_approved_insights(
@@ -301,9 +396,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(training_messages.is_not_null())
                .order(generated_at.desc())
                .load::<PhotoInsight>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn update_training_messages(
@@ -312,7 +407,7 @@ impl InsightDao for SqliteInsightDao {
        lib_id: i32,
        path: &str,
        training_messages_json: &str,
-    ) -> Result<(), DbError> {
+    ) -> Result<usize, DbError> {
        trace_db_call(context, "update", "update_training_messages", |_span| {
            use schema::photo_insights::dsl::*;
@@ -326,9 +421,95 @@ impl InsightDao for SqliteInsightDao {
            )
            .set(training_messages.eq(Some(training_messages_json.to_string())))
            .execute(connection.deref_mut())
-            .map(|_| ())
+            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
            .map_err(|_| anyhow::anyhow!("Update error"))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::database::test::in_memory_db_connection;
    fn dao() -> SqliteInsightDao {
        let conn = Arc::new(Mutex::new(in_memory_db_connection()));
        SqliteInsightDao::from_connection(conn)
    }
    /// Build an insight insert with sensible defaults; tests override the
    /// fields they care about (path, generated_at, model).
    fn insert(path: &str, generated_at: i64, model: &str) -> InsertPhotoInsight {
        InsertPhotoInsight {
            library_id: 1,
            file_path: path.to_string(),
            title: format!("title for {model}"),
            summary: "summary".to_string(),
            generated_at,
            model_version: model.to_string(),
            is_current: true,
            training_messages: None,
            backend: "local".to_string(),
            fewshot_source_ids: None,
            content_hash: None,
            num_ctx: None,
            temperature: None,
            top_p: None,
            top_k: None,
            min_p: None,
            system_prompt: None,
            persona_id: None,
            prompt_eval_count: None,
            eval_count: None,
        }
    }
    #[test]
    fn get_insight_history_returns_all_versions_newest_first() {
        let cx = opentelemetry::Context::new();
        let mut dao = dao();
        // store_insight flips prior rows to is_current=false, so three
        // generations for the same path leave a 3-row history.
        dao.store_insight(&cx, insert("a.jpg", 100, "m1")).unwrap();
        dao.store_insight(&cx, insert("a.jpg", 200, "m2")).unwrap();
        dao.store_insight(&cx, insert("a.jpg", 300, "m3")).unwrap();
        // A different path must not leak into the history.
        dao.store_insight(&cx, insert("b.jpg", 250, "other"))
            .unwrap();
        let history = dao.get_insight_history(&cx, "a.jpg").unwrap();
        assert_eq!(history.len(), 3);
        assert_eq!(
            history.iter().map(|i| i.generated_at).collect::<Vec<_>>(),
            vec![300, 200, 100],
            "history should be newest-first"
        );
        // Exactly one version is current (the latest generation).
        let current: Vec<_> = history.iter().filter(|i| i.is_current).collect();
        assert_eq!(current.len(), 1);
        assert_eq!(current[0].generated_at, 300);
    }
    #[test]
    fn rate_insight_by_id_rates_only_the_targeted_version() {
        let cx = opentelemetry::Context::new();
        let mut dao = dao();
        dao.store_insight(&cx, insert("a.jpg", 100, "m1")).unwrap();
        dao.store_insight(&cx, insert("a.jpg", 200, "m2")).unwrap();
        // History is newest-first: [200 (current), 100 (superseded)].
        let history = dao.get_insight_history(&cx, "a.jpg").unwrap();
        let old_version = history.iter().find(|i| i.generated_at == 100).unwrap();
        assert!(!old_version.is_current);
        dao.rate_insight_by_id(&cx, old_version.id, true).unwrap();
        let history = dao.get_insight_history(&cx, "a.jpg").unwrap();
        let old = history.iter().find(|i| i.generated_at == 100).unwrap();
        let current = history.iter().find(|i| i.generated_at == 200).unwrap();
        assert_eq!(old.approved, Some(true), "targeted version is rated");
        assert_eq!(current.approved, None, "current version is untouched");
    }
 }
@@ -216,11 +216,12 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
            // Validate embedding dimensions if provided (rare for location data)
            if let Some(ref emb) = location.embedding
-                && emb.len() != 768
+                && emb.len() != crate::ai::embedding_dim()
            {
                return Err(anyhow::anyhow!(
-                    "Invalid embedding dimensions: {} (expected 768)",
+                    "Invalid embedding dimensions: {} (expected {})",
-                    emb.len()
+                    emb.len(),
                    crate::ai::embedding_dim()
                ));
            }
@@ -273,7 +274,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
                source_file: location.source_file,
            })
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
    fn store_locations_batch(
@@ -292,7 +293,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
                for location in locations {
                    // Validate embedding if provided (rare)
                    if let Some(ref emb) = location.embedding
-                        && emb.len() != 768
+                        && emb.len() != crate::ai::embedding_dim()
                    {
                        log::warn!(
                            "Skipping location with invalid embedding dimensions: {}",
@@ -350,7 +351,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
            Ok(inserted)
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
    fn find_nearest_location(
@@ -385,7 +386,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
            Ok(results.into_iter().next().map(|r| r.to_location_record()))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn find_locations_in_range(
@@ -413,7 +414,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
            .map(|rows| rows.into_iter().map(|r| r.to_location_record()).collect())
            .map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn find_locations_near_point(
@@ -468,7 +469,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
            Ok(filtered)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn location_exists(
@@ -502,7 +503,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
            Ok(result.count > 0)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
@@ -525,6 +526,6 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
            Ok(result.count)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }
@@ -1,9 +1,76 @@
 use crate::database::schema::{
-    entities, entity_facts, entity_photo_links, favorites, image_exif, libraries, photo_insights,
+    entities, entity_facts, entity_photo_links, favorites, image_exif, insight_generation_jobs,
-    users, video_preview_clips,
+    libraries, personas, photo_insights, precomputed_reels, user_ai_prefs, users,
    video_preview_clips,
 };
 use serde::Serialize;
 /// Possible statuses for an insight generation job.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, FromSqlRow)]
 #[serde(rename_all = "snake_case")]
 pub enum InsightJobStatus {
    Running,
    Completed,
    Failed,
    Cancelled,
 }
 impl InsightJobStatus {
    pub fn as_str(&self) -> &'static str {
        match self {
            Self::Running => "running",
            Self::Completed => "completed",
            Self::Failed => "failed",
            Self::Cancelled => "cancelled",
        }
    }
    pub fn parse(s: &str) -> Self {
        match s {
            "running" => Self::Running,
            "completed" => Self::Completed,
            "failed" => Self::Failed,
            "cancelled" => Self::Cancelled,
            other => {
                log::warn!(
                    "Unknown InsightJobStatus value: {:?}, treating as failed",
                    other
                );
                Self::Failed
            }
        }
    }
 }
 impl std::fmt::Display for InsightJobStatus {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.write_str(self.as_str())
    }
 }
 /// Type of insight generation (standard vs agentic).
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
 #[serde(rename_all = "snake_case")]
 pub enum InsightGenerationType {
    Standard,
    Agentic,
 }
 impl InsightGenerationType {
    pub fn as_str(&self) -> &'static str {
        match self {
            Self::Standard => "standard",
            Self::Agentic => "agentic",
        }
    }
 }
 impl std::fmt::Display for InsightGenerationType {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.write_str(self.as_str())
    }
 }
 #[derive(Insertable)]
 #[diesel(table_name = users)]
 pub struct InsertUser<'a> {
@@ -59,6 +126,16 @@ pub struct InsertImageExif {
    pub last_modified: i64,
    pub content_hash: Option<String>,
    pub size_bytes: Option<i64>,
    /// 64-bit pHash (DCT) packed as i64. NULL for videos and decode failures.
    pub phash_64: Option<i64>,
    /// 64-bit dHash (gradient). NULL for videos and decode failures.
    pub dhash_64: Option<i64>,
    /// Which step of the canonical-date waterfall populated `date_taken`:
    /// `"exif"` | `"exiftool"` | `"filename"` | `"fs_time"`. NULL when
    /// `date_taken` is NULL (no source resolved it). The per-tick backfill
    /// drain re-resolves rows whose source is `"fs_time"` once exiftool
    /// has had a chance to run.
    pub date_taken_source: Option<String>,
 }
 // Field order matches the post-migration column order in `image_exif`.
@@ -86,6 +163,33 @@ pub struct ImageExif {
    pub last_modified: i64,
    pub content_hash: Option<String>,
    pub size_bytes: Option<i64>,
    pub phash_64: Option<i64>,
    pub dhash_64: Option<i64>,
    /// When non-null, this row is a soft-marked duplicate of the file
    /// whose `content_hash` matches this value. The default `/photos`
    /// listing filters such rows out.
    pub duplicate_of_hash: Option<String>,
    /// Unix seconds at which the resolve was committed.
    pub duplicate_decided_at: Option<i64>,
    /// Which step of the canonical-date waterfall populated `date_taken`.
    /// Plus `"manual"` when the operator has set it via POST /image/exif/date.
    pub date_taken_source: Option<String>,
    /// Snapshot of the prior `date_taken` taken on first manual override.
    /// NULL when no override is active. POST /image/exif/date/clear restores
    /// `date_taken` from this column and nulls it back out.
    pub original_date_taken: Option<i64>,
    /// Snapshot of the prior `date_taken_source` taken on first manual
    /// override. NULL when no override is active.
    pub original_date_taken_source: Option<String>,
    /// L2-normalized CLIP image embedding (raw little-endian float32 bytes;
    /// length depends on the model — 768×4 for ViT-L/14, 512×4 for ViT-B/32).
    /// NULL until Apollo's CLIP service has encoded this photo via the
    /// backfill drain. Used by `/photos/search` for semantic queries.
    pub clip_embedding: Option<Vec<u8>>,
    /// Which CLIP model produced `clip_embedding` (e.g. `"ViT-L/14"`). A
    /// swap of `APOLLO_CLIP_MODEL` re-eligibilizes rows whose stored
    /// version differs so the drain rebuilds them.
    pub clip_model_version: Option<String>,
 }
 #[derive(Insertable)]
@@ -108,6 +212,22 @@ pub struct InsertPhotoInsight {
    /// generation). Used downstream to filter out contaminated rows when
    /// assembling an unbiased training / evaluation set.
    pub fewshot_source_ids: Option<String>,
    /// Bytes-keyed identity. When present, this insight is considered
    /// to belong to the content rather than the path — see CLAUDE.md
    /// "Multi-library data model". The DAO populates this from
    /// `image_exif.content_hash` at insert time when known; rows
    /// inserted before the hash is available stay null and the
    /// reconciliation pass backfills them.
    pub content_hash: Option<String>,
    pub num_ctx: Option<i32>,
    pub temperature: Option<f32>,
    pub top_p: Option<f32>,
    pub top_k: Option<i32>,
    pub min_p: Option<f32>,
    pub system_prompt: Option<String>,
    pub persona_id: Option<String>,
    pub prompt_eval_count: Option<i32>,
    pub eval_count: Option<i32>,
 }
 #[derive(Serialize, Queryable, Clone, Debug)]
@@ -126,6 +246,16 @@ pub struct PhotoInsight {
    /// `"local"` (Ollama with images) | `"hybrid"` (local vision + OpenRouter chat).
    pub backend: String,
    pub fewshot_source_ids: Option<String>,
    pub content_hash: Option<String>,
    pub num_ctx: Option<i32>,
    pub temperature: Option<f32>,
    pub top_p: Option<f32>,
    pub top_k: Option<i32>,
    pub min_p: Option<f32>,
    pub system_prompt: Option<String>,
    pub persona_id: Option<String>,
    pub prompt_eval_count: Option<i32>,
    pub eval_count: Option<i32>,
 }
 // --- Libraries ---
@@ -136,6 +266,20 @@ pub struct LibraryRow {
    pub name: String,
    pub root_path: String,
    pub created_at: i64,
    /// Operator kill switch. `false` = the watcher skips this library
    /// entirely (no probe, no ingest, no maintenance) and orphan-GC
    /// treats it as out-of-scope for the all-online consensus rule.
    /// Toggle via SQL today — there is intentionally no HTTP endpoint
    /// for library mutation (see CLAUDE.md "Multi-library data model").
    pub enabled: bool,
    /// Per-library excluded paths/patterns, stored comma-separated
    /// (same shape as the global `EXCLUDED_DIRS` env var). NULL = no
    /// extra excludes for this library; the global env var still
    /// applies. The runtime `Library` struct parses this into a
    /// `Vec<String>` and the walker applies the union of (global,
    /// library) excludes when scanning. Use case: mount a parent
    /// directory while another library covers a child subtree.
    pub excluded_dirs: Option<String>,
 }
 #[derive(Insertable)]
@@ -144,6 +288,8 @@ pub struct InsertLibrary<'a> {
    pub name: &'a str,
    pub root_path: &'a str,
    pub created_at: i64,
    pub enabled: bool,
    pub excluded_dirs: Option<&'a str>,
 }
 // --- Knowledge memory models ---
@@ -186,6 +332,44 @@ pub struct InsertEntityFact {
    pub confidence: f32,
    pub status: String,
    pub created_at: i64,
    /// Which persona authored this fact. Shared entities, persona-tagged
    /// facts: each persona accumulates its own voice over the same
    /// real-world referents. Defaults to `'default'` for legacy rows
    /// (see migration 2026-05-09-000000).
    pub persona_id: String,
    /// Author's user_id. Required for the composite FK to
    /// `personas(user_id, persona_id)` (migration 2026-05-10-000000) and
    /// for cross-user fact isolation: two users with the same 'default'
    /// persona must not see each other's facts. Always paired with
    /// `persona_id` — they're a unit.
    pub user_id: i32,
    /// Real-world period the fact is/was true (unix seconds). NULL on
    /// either side = unbounded — `valid_from IS NULL` reads as
    /// "always-true-back-to-the-beginning", `valid_until IS NULL` as
    /// "still-true-now-or-unknown". Distinguishes valid time from
    /// transaction time (`created_at` is when we recorded the fact,
    /// not when it was true in the world). See migration
    /// 2026-05-10-000100.
    pub valid_from: Option<i64>,
    pub valid_until: Option<i64>,
    /// Points at the entity_facts.id that replaced this one. Set by
    /// the supersede endpoint; status flips to 'superseded' in the
    /// same transaction. See migration 2026-05-10-000200.
    pub superseded_by: Option<i32>,
    /// Provenance for model audit — see migration 2026-05-10-000300.
    /// `created_by_model` is the LLM identifier (e.g. "qwen2.5:7b",
    /// "anthropic/claude-sonnet-4") or NULL for legacy / manual rows.
    /// `created_by_backend` is "local" / "hybrid" / "manual" / NULL.
    pub created_by_model: Option<String>,
    pub created_by_backend: Option<String>,
    /// Audit trail for mutations after creation — see migration
    /// 2026-05-10-000500. `last_modified_*` stamp on any update
    /// (status flip, valid-time edit, supersede, manual PATCH);
    /// `last_modified_at` is unix seconds. NULL on rows that have
    /// never been touched since creation.
    pub last_modified_by_model: Option<String>,
    pub last_modified_by_backend: Option<String>,
    pub last_modified_at: Option<i64>,
 }
 #[derive(Serialize, Queryable, Clone, Debug)]
@@ -200,6 +384,16 @@ pub struct EntityFact {
    pub confidence: f32,
    pub status: String,
    pub created_at: i64,
    pub persona_id: String,
    pub user_id: i32,
    pub valid_from: Option<i64>,
    pub valid_until: Option<i64>,
    pub superseded_by: Option<i32>,
    pub created_by_model: Option<String>,
    pub created_by_backend: Option<String>,
    pub last_modified_by_model: Option<String>,
    pub last_modified_by_backend: Option<String>,
    pub last_modified_at: Option<i64>,
 }
 #[derive(Insertable)]
@@ -222,6 +416,45 @@ pub struct EntityPhotoLink {
    pub role: String,
 }
 // --- Personas ---
 #[derive(Insertable)]
 #[diesel(table_name = personas)]
 pub struct InsertPersona<'a> {
    pub user_id: i32,
    pub persona_id: &'a str,
    pub name: &'a str,
    pub system_prompt: &'a str,
    pub is_built_in: bool,
    pub include_all_memories: bool,
    pub created_at: i64,
    pub updated_at: i64,
    /// "Strict mode" — agent reads only see facts with status =
    /// 'reviewed' (human-verified). Default false. See migration
    /// 2026-05-10-000400.
    pub reviewed_only_facts: bool,
    /// Gate for the agent's update_fact / supersede_fact tools.
    /// Default false — fresh personas let the agent create but not
    /// alter or replace. Operator opts in once a model has earned
    /// trust. See migration 2026-05-10-000500.
    pub allow_agent_corrections: bool,
 }
 #[derive(Serialize, Queryable, Clone, Debug)]
 pub struct Persona {
    pub id: i32,
    pub user_id: i32,
    pub persona_id: String,
    pub name: String,
    pub system_prompt: String,
    pub is_built_in: bool,
    pub include_all_memories: bool,
    pub created_at: i64,
    pub updated_at: i64,
    pub reviewed_only_facts: bool,
    pub allow_agent_corrections: bool,
 }
 #[derive(Insertable)]
 #[diesel(table_name = video_preview_clips)]
 pub struct InsertVideoPreviewClip {
@@ -246,3 +479,83 @@ pub struct VideoPreviewClip {
    pub created_at: String,
    pub updated_at: String,
 }
 #[derive(Insertable)]
 #[diesel(table_name = insight_generation_jobs)]
 pub struct InsertInsightGenerationJob {
    pub library_id: i32,
    #[diesel(column_name = file_path)]
    pub path: String,
    #[diesel(column_name = generation_type)]
    pub gen_type: String,
    pub status: String,
    pub started_at: i64,
 }
 #[derive(Queryable, Serialize, Clone, Debug)]
 pub struct InsightGenerationJob {
    pub id: i32,
    pub library_id: i32,
    #[diesel(column_name = file_path)]
    pub path: String,
    #[diesel(column_name = generation_type)]
    pub gen_type: String,
    pub status: String,
    pub started_at: i64,
    pub completed_at: Option<i64>,
    pub result_insight_id: Option<i32>,
    pub error_message: Option<String>,
 }
 // --- Precomputed reels -------------------------------------------------------
 #[derive(Insertable)]
 #[diesel(table_name = precomputed_reels)]
 pub struct InsertablePrecomputedReel {
    pub span: String,
    pub library_key: String,
    pub cache_key: String,
    pub output_path: String,
    pub title: String,
    pub media_count: i32,
    pub render_version: i32,
    pub tz_offset_minutes: i32,
    pub voice: Option<String>,
    pub generated_at: i64,
 }
 #[derive(Serialize, Queryable, Clone, Debug)]
 pub struct PrecomputedReel {
    pub id: i32,
    pub span: String,
    pub library_key: String,
    pub cache_key: String,
    pub output_path: String,
    pub title: String,
    pub media_count: i32,
    pub render_version: i32,
    pub tz_offset_minutes: i32,
    pub voice: Option<String>,
    pub generated_at: i64,
 }
 // --- User AI preferences (Section E) ----------------------------------------
 #[derive(Queryable, Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)]
 #[diesel(table_name = user_ai_prefs)]
 pub struct UserAiPrefs {
    pub id: i32,
    pub voice: Option<String>,
    pub tz_offset_minutes: Option<i32>,
    pub library: Option<String>,
    pub updated_at: i64,
 }
 #[derive(Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)]
 #[diesel(table_name = user_ai_prefs)]
 pub struct UpsertUserAiPrefs {
    pub voice: Option<String>,
    pub tz_offset_minutes: Option<i32>,
    pub library: Option<String>,
    pub updated_at: i64,
 }
@@ -0,0 +1,447 @@
 #![allow(dead_code)]
 use diesel::prelude::*;
 use diesel::sqlite::SqliteConnection;
 use std::ops::DerefMut;
 use std::sync::{Arc, Mutex};
 use crate::database::models::{InsertPersona, Persona};
 use crate::database::schema;
 use crate::database::{DbError, DbErrorKind, connect};
 use crate::otel::trace_db_call;
 /// Patch shape for update_persona. None = leave field alone. Built-ins are
 /// allowed to flip `include_all_memories` but should reject name/prompt
 /// edits at the handler layer (built-in copy lives in the migration).
 pub struct PersonaPatch {
    pub name: Option<String>,
    pub system_prompt: Option<String>,
    pub include_all_memories: Option<bool>,
    pub reviewed_only_facts: Option<bool>,
    pub allow_agent_corrections: Option<bool>,
 }
 /// One row of a bulk migration upload. Fields named to match the JSON
 /// shape the mobile client uploads (`POST /personas/migrate`).
 pub struct ImportPersona {
    pub persona_id: String,
    pub name: String,
    pub system_prompt: String,
    pub is_built_in: bool,
    pub created_at: i64,
 }
 pub trait PersonaDao: Sync + Send {
    fn list_personas(
        &mut self,
        cx: &opentelemetry::Context,
        user_id: i32,
    ) -> Result<Vec<Persona>, DbError>;
    fn get_persona(
        &mut self,
        cx: &opentelemetry::Context,
        user_id: i32,
        persona_id: &str,
    ) -> Result<Option<Persona>, DbError>;
    fn create_persona(
        &mut self,
        cx: &opentelemetry::Context,
        user_id: i32,
        persona_id: &str,
        name: &str,
        system_prompt: &str,
        is_built_in: bool,
        include_all_memories: bool,
    ) -> Result<Persona, DbError>;
    fn update_persona(
        &mut self,
        cx: &opentelemetry::Context,
        user_id: i32,
        persona_id: &str,
        patch: PersonaPatch,
    ) -> Result<Option<Persona>, DbError>;
    fn delete_persona(
        &mut self,
        cx: &opentelemetry::Context,
        user_id: i32,
        persona_id: &str,
    ) -> Result<bool, DbError>;
    /// Idempotent bulk import. INSERT OR IGNORE on (user_id, persona_id)
    /// — re-uploading the same set is a no-op. Returns the number of rows
    /// actually inserted (skipped duplicates don't count).
    fn bulk_import(
        &mut self,
        cx: &opentelemetry::Context,
        user_id: i32,
        personas: &[ImportPersona],
    ) -> Result<usize, DbError>;
 }
 pub struct SqlitePersonaDao {
    connection: Arc<Mutex<SqliteConnection>>,
 }
 impl Default for SqlitePersonaDao {
    fn default() -> Self {
        Self::new()
    }
 }
 impl SqlitePersonaDao {
    pub fn new() -> Self {
        Self {
            connection: Arc::new(Mutex::new(connect())),
        }
    }
    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
        Self { connection: conn }
    }
 }
 impl PersonaDao for SqlitePersonaDao {
    fn list_personas(
        &mut self,
        cx: &opentelemetry::Context,
        uid: i32,
    ) -> Result<Vec<Persona>, DbError> {
        trace_db_call(cx, "query", "list_personas", |_span| {
            use schema::personas::dsl::*;
            let mut conn = self.connection.lock().expect("PersonaDao lock");
            personas
                .filter(user_id.eq(uid))
                .order(created_at.asc())
                .load::<Persona>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_persona(
        &mut self,
        cx: &opentelemetry::Context,
        uid: i32,
        pid: &str,
    ) -> Result<Option<Persona>, DbError> {
        trace_db_call(cx, "query", "get_persona", |_span| {
            use schema::personas::dsl::*;
            let mut conn = self.connection.lock().expect("PersonaDao lock");
            personas
                .filter(user_id.eq(uid))
                .filter(persona_id.eq(pid))
                .first::<Persona>(conn.deref_mut())
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn create_persona(
        &mut self,
        cx: &opentelemetry::Context,
        uid: i32,
        pid: &str,
        nm: &str,
        prompt: &str,
        builtin: bool,
        include_all: bool,
    ) -> Result<Persona, DbError> {
        trace_db_call(cx, "insert", "create_persona", |_span| {
            use schema::personas::dsl::*;
            let mut conn = self.connection.lock().expect("PersonaDao lock");
            let now = chrono::Utc::now().timestamp_millis();
            diesel::insert_into(personas)
                .values(InsertPersona {
                    user_id: uid,
                    persona_id: pid,
                    name: nm,
                    system_prompt: prompt,
                    is_built_in: builtin,
                    include_all_memories: include_all,
                    created_at: now,
                    updated_at: now,
                    reviewed_only_facts: false,
                    allow_agent_corrections: false,
                })
                .execute(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Insert error: {}", e))?;
            personas
                .filter(user_id.eq(uid))
                .filter(persona_id.eq(pid))
                .first::<Persona>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
    fn update_persona(
        &mut self,
        cx: &opentelemetry::Context,
        uid: i32,
        pid: &str,
        patch: PersonaPatch,
    ) -> Result<Option<Persona>, DbError> {
        trace_db_call(cx, "update", "update_persona", |_span| {
            use schema::personas::dsl::*;
            let mut conn = self.connection.lock().expect("PersonaDao lock");
            let now = chrono::Utc::now().timestamp_millis();
            // Apply each field as its own UPDATE — keeps types simple
            // (Diesel's tuple updates don't compose cleanly across optional
            // columns) and matches the pattern already in use for entities
            // (knowledge_dao.rs::update_entity).
            if let Some(ref new_name) = patch.name {
                diesel::update(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
                    .set((name.eq(new_name), updated_at.eq(now)))
                    .execute(conn.deref_mut())
                    .map_err(|e| anyhow::anyhow!("Update name error: {}", e))?;
            }
            if let Some(ref new_prompt) = patch.system_prompt {
                diesel::update(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
                    .set((system_prompt.eq(new_prompt), updated_at.eq(now)))
                    .execute(conn.deref_mut())
                    .map_err(|e| anyhow::anyhow!("Update prompt error: {}", e))?;
            }
            if let Some(new_include_all) = patch.include_all_memories {
                diesel::update(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
                    .set((include_all_memories.eq(new_include_all), updated_at.eq(now)))
                    .execute(conn.deref_mut())
                    .map_err(|e| anyhow::anyhow!("Update include_all error: {}", e))?;
            }
            if let Some(new_reviewed_only) = patch.reviewed_only_facts {
                diesel::update(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
                    .set((
                        reviewed_only_facts.eq(new_reviewed_only),
                        updated_at.eq(now),
                    ))
                    .execute(conn.deref_mut())
                    .map_err(|e| anyhow::anyhow!("Update reviewed_only_facts error: {}", e))?;
            }
            if let Some(new_allow_corrections) = patch.allow_agent_corrections {
                diesel::update(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
                    .set((
                        allow_agent_corrections.eq(new_allow_corrections),
                        updated_at.eq(now),
                    ))
                    .execute(conn.deref_mut())
                    .map_err(|e| anyhow::anyhow!("Update allow_agent_corrections error: {}", e))?;
            }
            personas
                .filter(user_id.eq(uid))
                .filter(persona_id.eq(pid))
                .first::<Persona>(conn.deref_mut())
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
    fn delete_persona(
        &mut self,
        cx: &opentelemetry::Context,
        uid: i32,
        pid: &str,
    ) -> Result<bool, DbError> {
        trace_db_call(cx, "delete", "delete_persona", |_span| {
            use schema::personas::dsl::*;
            let mut conn = self.connection.lock().expect("PersonaDao lock");
            let n = diesel::delete(personas.filter(user_id.eq(uid)).filter(persona_id.eq(pid)))
                .execute(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Delete error: {}", e))?;
            Ok(n > 0)
        })
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn bulk_import(
        &mut self,
        cx: &opentelemetry::Context,
        uid: i32,
        rows: &[ImportPersona],
    ) -> Result<usize, DbError> {
        trace_db_call(cx, "insert", "bulk_import_personas", |_span| {
            let mut conn = self.connection.lock().expect("PersonaDao lock");
            let now = chrono::Utc::now().timestamp_millis();
            let mut inserted = 0usize;
            // INSERT OR IGNORE on the (user_id, persona_id) UNIQUE so
            // re-running migrate is a no-op for personas already on the
            // server.
            for p in rows {
                let n = diesel::sql_query(
                    "INSERT OR IGNORE INTO personas (user_id, persona_id, name, system_prompt, \
                     is_built_in, include_all_memories, created_at, updated_at) \
                     VALUES (?, ?, ?, ?, ?, 0, ?, ?)",
                )
                .bind::<diesel::sql_types::Integer, _>(uid)
                .bind::<diesel::sql_types::Text, _>(&p.persona_id)
                .bind::<diesel::sql_types::Text, _>(&p.name)
                .bind::<diesel::sql_types::Text, _>(&p.system_prompt)
                .bind::<diesel::sql_types::Bool, _>(p.is_built_in)
                .bind::<diesel::sql_types::BigInt, _>(p.created_at)
                .bind::<diesel::sql_types::BigInt, _>(now)
                .execute(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Insert error: {}", e))?;
                inserted += n;
            }
            Ok(inserted)
        })
        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::database::test::in_memory_db_connection;
    fn dao_with_user(username: &str) -> (SqlitePersonaDao, i32) {
        use crate::database::schema::users::dsl as u;
        let conn = Arc::new(Mutex::new(in_memory_db_connection()));
        diesel::insert_into(u::users)
            .values((u::username.eq(username), u::password.eq("x")))
            .execute(conn.lock().unwrap().deref_mut())
            .unwrap();
        let user_id: i32 = u::users
            .filter(u::username.eq(username))
            .select(u::id)
            .first(conn.lock().unwrap().deref_mut())
            .unwrap();
        (SqlitePersonaDao::from_connection(conn), user_id)
    }
    #[test]
    fn create_and_list_round_trip() {
        let cx = opentelemetry::Context::new();
        let (mut dao, uid) = dao_with_user("alice");
        // The migration seeds 3 built-ins for any existing user; alice
        // was created post-migration so she starts empty.
        let p = dao
            .create_persona(&cx, uid, "custom-1", "Custom A", "prompt A", false, false)
            .unwrap();
        assert_eq!(p.persona_id, "custom-1");
        assert_eq!(p.user_id, uid);
        assert!(!p.is_built_in);
        let list = dao.list_personas(&cx, uid).unwrap();
        assert_eq!(list.len(), 1);
        assert_eq!(list[0].persona_id, "custom-1");
    }
    #[test]
    fn unique_constraint_blocks_duplicate_persona_id() {
        let cx = opentelemetry::Context::new();
        let (mut dao, uid) = dao_with_user("bob");
        dao.create_persona(&cx, uid, "x", "X", "p", false, false)
            .unwrap();
        let err = dao.create_persona(&cx, uid, "x", "X2", "p2", false, false);
        assert!(
            err.is_err(),
            "second insert with same persona_id should fail"
        );
    }
    #[test]
    fn bulk_import_is_idempotent() {
        let cx = opentelemetry::Context::new();
        let (mut dao, uid) = dao_with_user("carol");
        let rows = vec![
            ImportPersona {
                persona_id: "custom-a".into(),
                name: "A".into(),
                system_prompt: "p1".into(),
                is_built_in: false,
                created_at: 1,
            },
            ImportPersona {
                persona_id: "custom-b".into(),
                name: "B".into(),
                system_prompt: "p2".into(),
                is_built_in: false,
                created_at: 2,
            },
        ];
        let first = dao.bulk_import(&cx, uid, &rows).unwrap();
        assert_eq!(first, 2);
        let second = dao.bulk_import(&cx, uid, &rows).unwrap();
        assert_eq!(second, 0, "re-import should insert nothing");
        assert_eq!(dao.list_personas(&cx, uid).unwrap().len(), 2);
    }
    #[test]
    fn dao_update_does_not_block_built_ins() {
        // Documenting contract: the DAO is intentionally permissive —
        // `update_persona` will apply name/system_prompt edits to ANY
        // row, including built-ins. The guard against editing built-in
        // identity (name + systemPrompt) lives in the HTTP handler
        // (src/personas.rs::update_persona). If you find yourself
        // wanting to add the guard here too, prefer that — defence in
        // depth — but keep this test passing so anyone who removes
        // the handler guard gets a failing call site, not silent data
        // corruption.
        let cx = opentelemetry::Context::new();
        let (mut dao, uid) = dao_with_user("eve");
        dao.create_persona(&cx, uid, "default", "Default", "old", true, false)
            .unwrap();
        let updated = dao
            .update_persona(
                &cx,
                uid,
                "default",
                PersonaPatch {
                    name: Some("Renamed".into()),
                    system_prompt: Some("new prompt".into()),
                    include_all_memories: None,
                    reviewed_only_facts: None,
                    allow_agent_corrections: None,
                },
            )
            .unwrap()
            .unwrap();
        assert_eq!(updated.name, "Renamed");
        assert_eq!(updated.system_prompt, "new prompt");
        assert!(
            updated.is_built_in,
            "is_built_in flag should be unchanged by patch"
        );
    }
    #[test]
    fn update_toggles_include_all_memories() {
        let cx = opentelemetry::Context::new();
        let (mut dao, uid) = dao_with_user("dan");
        dao.create_persona(&cx, uid, "j", "Journal", "p", true, false)
            .unwrap();
        let updated = dao
            .update_persona(
                &cx,
                uid,
                "j",
                PersonaPatch {
                    name: None,
                    system_prompt: None,
                    include_all_memories: Some(true),
                    reviewed_only_facts: None,
                    allow_agent_corrections: None,
                },
            )
            .unwrap()
            .unwrap();
        assert!(updated.include_all_memories);
    }
 }
@@ -0,0 +1,439 @@
 use diesel::prelude::*;
 use diesel::sqlite::SqliteConnection;
 use std::ops::DerefMut;
 use std::sync::{Arc, Mutex};
 use crate::database::models::{InsertablePrecomputedReel, PrecomputedReel};
 use crate::database::schema;
 use crate::database::{DbError, DbErrorKind, connect};
 use crate::otel::trace_db_call;
 /// Ledger for precomputed memory reels. The nightly agentic job writes a
 /// row after each successful render; the `GET /reels/precomputed` handler
 /// reads it to gate on freshness and serve the cached MP4.
 pub trait PrecomputedReelDao: Sync + Send {
    /// Insert a precomputed reel row. Returns the new row's id.
    /// Written by the nightly agentic job (Section D).
    #[allow(dead_code)]
    fn record_reel(
        &mut self,
        context: &opentelemetry::Context,
        row: &InsertablePrecomputedReel,
    ) -> Result<i32, DbError>;
    /// Find the latest precomputed reel for the given (span, library_key).
    fn latest_for(
        &mut self,
        context: &opentelemetry::Context,
        span: &str,
        library_key: &str,
    ) -> Result<Option<PrecomputedReel>, DbError>;
    /// Return true when a fresh precomputed reel exists for the given
    /// (span, library_key, render_version) that was generated at or after
    /// `min_generated_at`. Used as a fast existence gate before falling
    /// back to `latest_for` (avoids a second query path).
    fn exists_fresh(
        &mut self,
        context: &opentelemetry::Context,
        span: &str,
        library_key: &str,
        render_version: i32,
        min_generated_at: i64,
    ) -> Result<bool, DbError>;
    /// Delete all but the newest `keep` rows for (span, library_key), returning
    /// the deleted rows so the caller can unlink their output files. Used by the
    /// nightly job to retire superseded reels (e.g. yesterday's daily).
    #[allow(dead_code)]
    fn prune_superseded(
        &mut self,
        context: &opentelemetry::Context,
        span: &str,
        library_key: &str,
        keep: usize,
    ) -> Result<Vec<PrecomputedReel>, DbError>;
    /// Every cache_key currently in the ledger. Used by the on-disk cache sweep
    /// to protect files a ledger row still points at.
    #[allow(dead_code)]
    fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result<Vec<String>, DbError>;
 }
 pub struct SqlitePrecomputedReelDao {
    connection: Arc<Mutex<SqliteConnection>>,
 }
 impl Default for SqlitePrecomputedReelDao {
    fn default() -> Self {
        Self::new()
    }
 }
 impl SqlitePrecomputedReelDao {
    pub fn new() -> Self {
        Self {
            connection: Arc::new(Mutex::new(connect())),
        }
    }
    #[cfg(test)]
    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
        Self { connection: conn }
    }
 }
 impl PrecomputedReelDao for SqlitePrecomputedReelDao {
    fn record_reel(
        &mut self,
        context: &opentelemetry::Context,
        row: &InsertablePrecomputedReel,
    ) -> Result<i32, DbError> {
        trace_db_call(context, "insert", "record_reel", |_span| {
            use schema::precomputed_reels::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock PrecomputedReelDao");
            diesel::insert_into(dsl::precomputed_reels)
                .values(row)
                .execute(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Failed to insert reel: {}", e))?;
            dsl::precomputed_reels
                .order(dsl::id.desc())
                .select(dsl::id)
                .first::<i32>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Failed to get reel id: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
    fn latest_for(
        &mut self,
        context: &opentelemetry::Context,
        span: &str,
        library_key: &str,
    ) -> Result<Option<PrecomputedReel>, DbError> {
        trace_db_call(context, "query", "latest_for", |_span| {
            use schema::precomputed_reels::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock PrecomputedReelDao");
            dsl::precomputed_reels
                .filter(dsl::span.eq(span))
                .filter(dsl::library_key.eq(library_key))
                .order(dsl::generated_at.desc())
                .first::<PrecomputedReel>(connection.deref_mut())
                .optional()
                .map_err(|e| anyhow::anyhow!("Failed to get latest reel: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn exists_fresh(
        &mut self,
        context: &opentelemetry::Context,
        span: &str,
        library_key: &str,
        render_version: i32,
        min_generated_at: i64,
    ) -> Result<bool, DbError> {
        trace_db_call(context, "query", "exists_fresh", |_span| {
            use schema::precomputed_reels::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock PrecomputedReelDao");
            let count: i64 = dsl::precomputed_reels
                .filter(dsl::span.eq(span))
                .filter(dsl::library_key.eq(library_key))
                .filter(dsl::render_version.eq(render_version))
                .filter(dsl::generated_at.ge(min_generated_at))
                .count()
                .get_result(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Failed to check fresh reel: {}", e))?;
            Ok(count > 0)
        })
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn prune_superseded(
        &mut self,
        context: &opentelemetry::Context,
        span: &str,
        library_key: &str,
        keep: usize,
    ) -> Result<Vec<PrecomputedReel>, DbError> {
        trace_db_call(context, "delete", "prune_superseded", |_span| {
            use schema::precomputed_reels::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock PrecomputedReelDao");
            // Newest first; everything past `keep` is superseded. The table
            // holds at most a handful of rows per (span, library), so loading
            // and slicing in Rust is cheaper than a correlated subquery.
            let mut rows: Vec<PrecomputedReel> = dsl::precomputed_reels
                .filter(dsl::span.eq(span))
                .filter(dsl::library_key.eq(library_key))
                .order(dsl::generated_at.desc())
                .load::<PrecomputedReel>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Failed to load reels for prune: {}", e))?;
            let stale = rows.split_off(rows.len().min(keep));
            if !stale.is_empty() {
                let ids: Vec<i32> = stale.iter().map(|r| r.id).collect();
                diesel::delete(dsl::precomputed_reels.filter(dsl::id.eq_any(ids)))
                    .execute(connection.deref_mut())
                    .map_err(|e| anyhow::anyhow!("Failed to delete superseded reels: {}", e))?;
            }
            Ok(stale)
        })
        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
    fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result<Vec<String>, DbError> {
        trace_db_call(context, "query", "all_cache_keys", |_span| {
            use schema::precomputed_reels::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock PrecomputedReelDao");
            dsl::precomputed_reels
                .select(dsl::cache_key)
                .load::<String>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Failed to load cache keys: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use diesel::Connection;
    use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
    const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
    fn setup_dao() -> SqlitePrecomputedReelDao {
        let mut conn = SqliteConnection::establish(":memory:")
            .expect("Unable to create in-memory db connection");
        conn.run_pending_migrations(DB_MIGRATIONS)
            .expect("Failure running DB migrations");
        SqlitePrecomputedReelDao::from_connection(Arc::new(Mutex::new(conn)))
    }
    fn ctx() -> opentelemetry::Context {
        opentelemetry::Context::new()
    }
    fn sample_row() -> InsertablePrecomputedReel {
        InsertablePrecomputedReel {
            span: "day".to_string(),
            library_key: "1".to_string(),
            cache_key: "abc123".to_string(),
            output_path: "/tmp/reel.mp4".to_string(),
            title: "Test Reel".to_string(),
            media_count: 10,
            render_version: 1,
            tz_offset_minutes: 0,
            voice: Some("default".to_string()),
            generated_at: 1_000_000,
        }
    }
    #[test]
    fn record_reel_inserts_and_returns_id() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let row = sample_row();
        let id = dao.record_reel(&ctx, &row).unwrap();
        assert!(id > 0, "should return a positive id");
    }
    #[test]
    fn record_reel_returns_increasing_ids() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let row = sample_row();
        let id1 = dao.record_reel(&ctx, &row).unwrap();
        let id2 = dao.record_reel(&ctx, &row).unwrap();
        assert!(id2 > id1, "each insert should get a higher id");
    }
    #[test]
    fn latest_for_returns_latest() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let row1 = InsertablePrecomputedReel {
            generated_at: 1_000_000,
            ..sample_row()
        };
        let row2 = InsertablePrecomputedReel {
            generated_at: 2_000_000,
            ..sample_row()
        };
        dao.record_reel(&ctx, &row1).unwrap();
        dao.record_reel(&ctx, &row2).unwrap();
        let latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap();
        assert_eq!(latest.generated_at, 2_000_000);
    }
    #[test]
    fn latest_for_scoped_by_span_and_library() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let day_row = InsertablePrecomputedReel {
            span: "day".to_string(),
            library_key: "1".to_string(),
            generated_at: 1_000_000,
            ..sample_row()
        };
        let week_row = InsertablePrecomputedReel {
            span: "week".to_string(),
            library_key: "1".to_string(),
            generated_at: 2_000_000,
            ..sample_row()
        };
        dao.record_reel(&ctx, &day_row).unwrap();
        dao.record_reel(&ctx, &week_row).unwrap();
        let day_latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap();
        assert_eq!(day_latest.span, "day");
        let week_latest = dao.latest_for(&ctx, "week", "1").unwrap().unwrap();
        assert_eq!(week_latest.span, "week");
        // Different library returns None
        let missing = dao.latest_for(&ctx, "day", "99").unwrap();
        assert!(missing.is_none());
    }
    #[test]
    fn latest_for_returns_none_when_no_rows() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let result = dao.latest_for(&ctx, "day", "1").unwrap();
        assert!(result.is_none());
    }
    #[test]
    fn exists_fresh_returns_true_when_present() {
        let mut dao = setup_dao();
        let ctx = ctx();
        dao.record_reel(&ctx, &sample_row()).unwrap();
        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap();
        assert!(exists, "should find the row we just inserted");
    }
    #[test]
    fn exists_fresh_returns_false_when_missing() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap();
        assert!(!exists, "should not find anything in empty table");
    }
    #[test]
    fn exists_fresh_respects_min_generated_at() {
        let mut dao = setup_dao();
        let ctx = ctx();
        dao.record_reel(&ctx, &sample_row()).unwrap();
        // Below the threshold — should exist
        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 500_000).unwrap();
        assert!(exists);
        // Above the threshold — should not exist
        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 2_000_000).unwrap();
        assert!(!exists);
    }
    #[test]
    fn exists_fresh_respects_render_version() {
        let mut dao = setup_dao();
        let ctx = ctx();
        let row_v1 = InsertablePrecomputedReel {
            render_version: 1,
            ..sample_row()
        };
        dao.record_reel(&ctx, &row_v1).unwrap();
        assert!(dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap());
        assert!(!dao.exists_fresh(&ctx, "day", "1", 2, 900_000).unwrap());
    }
    #[test]
    fn prune_superseded_keeps_newest_and_returns_deleted() {
        let mut dao = setup_dao();
        let ctx = ctx();
        // Three day/lib1 reels at increasing timestamps, plus an unrelated one.
        for (i, key) in ["k1", "k2", "k3"].iter().enumerate() {
            dao.record_reel(
                &ctx,
                &InsertablePrecomputedReel {
                    cache_key: key.to_string(),
                    generated_at: 1_000_000 + i as i64 * 1000,
                    ..sample_row()
                },
            )
            .unwrap();
        }
        let other = InsertablePrecomputedReel {
            library_key: "2".to_string(),
            cache_key: "other".to_string(),
            ..sample_row()
        };
        dao.record_reel(&ctx, &other).unwrap();
        // Keep the newest 2 of (day, "1"); k1 (oldest) is superseded.
        let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap();
        assert_eq!(deleted.len(), 1);
        assert_eq!(deleted[0].cache_key, "k1");
        // The newest 2 survive; the other-library row is untouched.
        let keys = dao.all_cache_keys(&ctx).unwrap();
        assert_eq!(keys.len(), 3);
        assert!(keys.contains(&"k2".to_string()));
        assert!(keys.contains(&"k3".to_string()));
        assert!(keys.contains(&"other".to_string()));
        assert!(!keys.contains(&"k1".to_string()));
    }
    #[test]
    fn prune_superseded_noop_when_within_keep() {
        let mut dao = setup_dao();
        let ctx = ctx();
        dao.record_reel(&ctx, &sample_row()).unwrap();
        let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap();
        assert!(deleted.is_empty());
        assert_eq!(dao.all_cache_keys(&ctx).unwrap().len(), 1);
    }
 }
@@ -96,7 +96,7 @@ impl PreviewDao for SqlitePreviewDao {
                .map(|_| ())
                .map_err(|e| anyhow::anyhow!("Insert error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
    fn update_status(
@@ -126,7 +126,7 @@ impl PreviewDao for SqlitePreviewDao {
                .map(|_| ())
                .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
    fn get_preview(
@@ -148,7 +148,7 @@ impl PreviewDao for SqlitePreviewDao {
                Err(e) => Err(anyhow::anyhow!("Query error: {}", e)),
            }
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_previews_batch(
@@ -170,7 +170,7 @@ impl PreviewDao for SqlitePreviewDao {
                .load::<VideoPreviewClip>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_by_status(
@@ -188,7 +188,7 @@ impl PreviewDao for SqlitePreviewDao {
                .load::<VideoPreviewClip>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }
@@ -0,0 +1,380 @@
 //! Reconciliation pass for hash-keyed derived data.
 //!
 //! As `backfill_unhashed_backlog` populates `image_exif.content_hash`
 //! for legacy rows, we want the matching `tagged_photo` and
 //! `photo_insights` rows — which were inserted before the hash was
 //! known — to inherit the hash too. Otherwise reads keep falling back
 //! to the rel_path path even when a hash is now available.
 //!
 //! Two passes:
 //!   1. **Hash backfill** — for every `tagged_photo` / `photo_insights`
 //!      row with NULL `content_hash`, look up the matching
 //!      `image_exif.content_hash` and write it. SQL-only; idempotent;
 //!      a no-op once everything is hashed.
 //!   2. **Insight scalar merge** — when multiple `photo_insights` rows
 //!      share a `content_hash` with `is_current = true`, only the
 //!      earliest `generated_at` keeps `is_current = true` (per the
 //!      "earliest wins" rule in CLAUDE.md → "Multi-library data
 //!      model"). Others are demoted, not deleted, so they remain
 //!      visible in history endpoints.
 //!
 //! Tags are set-valued under the policy (union on read), so there's no
 //! analogous "collapse" pass — duplicate `(tag_id, content_hash)` rows
 //! across libraries are harmless and correctly de-duped at read time
 //! by the existing `DISTINCT` queries.
 //!
 //! The pass operates on the database alone — no filesystem access —
 //! so it doesn't need the library availability gate.
 // The lib doesn't call into this module directly — the watcher (in the
 // bin) does. Dead-code analysis at the lib level can't see that, so
 // suppress at the module level. Tests still exercise every function.
 #![allow(dead_code)]
 use diesel::prelude::*;
 use diesel::sql_query;
 use diesel::sqlite::SqliteConnection;
 use log::{debug, info, warn};
 /// Outcome of a reconciliation tick. Tracked so the watcher can log
 /// progress when something changed and stay quiet when nothing did.
 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
 pub struct ReconcileStats {
    pub tagged_photo_hashes_filled: usize,
    pub photo_insights_hashes_filled: usize,
    pub photo_insights_demoted: usize,
 }
 impl ReconcileStats {
    pub fn changed(&self) -> bool {
        self.tagged_photo_hashes_filled > 0
            || self.photo_insights_hashes_filled > 0
            || self.photo_insights_demoted > 0
    }
 }
 /// Run the reconciliation pass. Idempotent — safe to call on every
 /// watcher tick. Errors are logged but never propagated; reconciliation
 /// is best-effort and a transient DB hiccup must not stall the watcher.
 pub fn run(conn: &mut SqliteConnection) -> ReconcileStats {
    let stats = ReconcileStats {
        tagged_photo_hashes_filled: match backfill_tagged_photo_hashes(conn) {
            Ok(n) => n,
            Err(e) => {
                warn!("reconcile: tagged_photo hash backfill failed: {:?}", e);
                0
            }
        },
        photo_insights_hashes_filled: match backfill_photo_insights_hashes(conn) {
            Ok(n) => n,
            Err(e) => {
                warn!("reconcile: photo_insights hash backfill failed: {:?}", e);
                0
            }
        },
        photo_insights_demoted: match collapse_insight_currents(conn) {
            Ok(n) => n,
            Err(e) => {
                warn!("reconcile: photo_insights scalar merge failed: {:?}", e);
                0
            }
        },
    };
    if stats.changed() {
        info!(
            "reconcile: filled {} tagged_photo hash(es), {} photo_insights hash(es); demoted {} non-current insight row(s)",
            stats.tagged_photo_hashes_filled,
            stats.photo_insights_hashes_filled,
            stats.photo_insights_demoted,
        );
    } else {
        debug!("reconcile: no changes this tick");
    }
    stats
 }
 /// Populate `tagged_photo.content_hash` for any row that still has
 /// NULL by joining on `rel_path` against `image_exif`. tagged_photo
 /// doesn't carry `library_id`, so a path that exists under multiple
 /// libraries with different content is genuinely ambiguous; we pick
 /// any non-null hash for that path. Same trade-off as the migration
 /// backfill — see `migrations/2026-05-01-000000_hash_keyed_derived_data`.
 fn backfill_tagged_photo_hashes(conn: &mut SqliteConnection) -> QueryResult<usize> {
    sql_query(
        "UPDATE tagged_photo \
         SET content_hash = ( \
             SELECT content_hash FROM image_exif \
             WHERE image_exif.rel_path = tagged_photo.rel_path \
               AND image_exif.content_hash IS NOT NULL \
             LIMIT 1 \
         ) \
         WHERE content_hash IS NULL \
           AND EXISTS ( \
               SELECT 1 FROM image_exif \
               WHERE image_exif.rel_path = tagged_photo.rel_path \
                 AND image_exif.content_hash IS NOT NULL \
           )",
    )
    .execute(conn)
 }
 /// Populate `photo_insights.content_hash` from `image_exif`, keyed on
 /// `(library_id, rel_path)`. Unambiguous because photo_insights carries
 /// library_id.
 fn backfill_photo_insights_hashes(conn: &mut SqliteConnection) -> QueryResult<usize> {
    sql_query(
        "UPDATE photo_insights \
         SET content_hash = ( \
             SELECT content_hash FROM image_exif \
             WHERE image_exif.library_id = photo_insights.library_id \
               AND image_exif.rel_path = photo_insights.rel_path \
               AND image_exif.content_hash IS NOT NULL \
             LIMIT 1 \
         ) \
         WHERE content_hash IS NULL \
           AND EXISTS ( \
               SELECT 1 FROM image_exif \
               WHERE image_exif.library_id = photo_insights.library_id \
                 AND image_exif.rel_path = photo_insights.rel_path \
                 AND image_exif.content_hash IS NOT NULL \
           )",
    )
    .execute(conn)
 }
 /// Scalar-merge step: when multiple rows share a `content_hash` and
 /// claim `is_current = true`, demote all but the earliest by
 /// `generated_at` (ties broken by lowest id, deterministic).
 ///
 /// Demoted rows keep their data — only `is_current` flips. Clients that
 /// hit `/insights/history` still see the full sequence; only the
 /// "current" pointer is unique per hash.
 fn collapse_insight_currents(conn: &mut SqliteConnection) -> QueryResult<usize> {
    sql_query(
        "UPDATE photo_insights \
         SET is_current = 0 \
         WHERE is_current = 1 \
           AND content_hash IS NOT NULL \
           AND id NOT IN ( \
               SELECT MIN(p2.id) FROM photo_insights p2 \
               WHERE p2.is_current = 1 \
                 AND p2.content_hash = photo_insights.content_hash \
                 AND p2.generated_at = ( \
                     SELECT MIN(p3.generated_at) FROM photo_insights p3 \
                     WHERE p3.is_current = 1 \
                       AND p3.content_hash = p2.content_hash \
                 ) \
           )",
    )
    .execute(conn)
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::database::test::in_memory_db_connection;
    fn ensure_library(conn: &mut SqliteConnection, library_id: i32) {
        // Migration seeds library id=1; tests that reference id>1 must
        // create those rows themselves, otherwise FK enforcement (added
        // in the tags-edit migration) rejects image_exif inserts.
        diesel::sql_query(
            "INSERT OR IGNORE INTO libraries (id, name, root_path, created_at) \
             VALUES (?, 'test-' || ?, '/tmp/test-' || ?, 0)",
        )
        .bind::<diesel::sql_types::Integer, _>(library_id)
        .bind::<diesel::sql_types::Integer, _>(library_id)
        .bind::<diesel::sql_types::Integer, _>(library_id)
        .execute(conn)
        .unwrap();
    }
    fn insert_image_exif(
        conn: &mut SqliteConnection,
        library_id: i32,
        rel_path: &str,
        content_hash: Option<&str>,
    ) {
        use crate::database::schema::image_exif;
        ensure_library(conn, library_id);
        diesel::sql_query(
            "INSERT INTO image_exif (library_id, rel_path, created_time, last_modified, content_hash) \
             VALUES (?, ?, 0, 0, ?)",
        )
        .bind::<diesel::sql_types::Integer, _>(library_id)
        .bind::<diesel::sql_types::Text, _>(rel_path)
        .bind::<diesel::sql_types::Nullable<diesel::sql_types::Text>, _>(content_hash)
        .execute(conn)
        .unwrap();
        // Keep clippy happy that the import is used.
        let _ = image_exif::table;
    }
    fn insert_tagged_photo(conn: &mut SqliteConnection, rel_path: &str, tag_id: i32) {
        diesel::sql_query(
            "INSERT INTO tagged_photo (rel_path, tag_id, created_time) VALUES (?, ?, 0)",
        )
        .bind::<diesel::sql_types::Text, _>(rel_path)
        .bind::<diesel::sql_types::Integer, _>(tag_id)
        .execute(conn)
        .unwrap();
    }
    fn insert_tag(conn: &mut SqliteConnection, id: i32, name: &str) {
        diesel::sql_query("INSERT INTO tags (id, name, created_time) VALUES (?, ?, 0)")
            .bind::<diesel::sql_types::Integer, _>(id)
            .bind::<diesel::sql_types::Text, _>(name)
            .execute(conn)
            .unwrap();
    }
    fn insert_insight(
        conn: &mut SqliteConnection,
        library_id: i32,
        rel_path: &str,
        generated_at: i64,
        is_current: bool,
    ) -> i32 {
        ensure_library(conn, library_id);
        diesel::sql_query(
            "INSERT INTO photo_insights (library_id, rel_path, title, summary, generated_at, model_version, is_current, backend) \
             VALUES (?, ?, 't', 's', ?, 'v', ?, 'local')",
        )
        .bind::<diesel::sql_types::Integer, _>(library_id)
        .bind::<diesel::sql_types::Text, _>(rel_path)
        .bind::<diesel::sql_types::BigInt, _>(generated_at)
        .bind::<diesel::sql_types::Bool, _>(is_current)
        .execute(conn)
        .unwrap();
        diesel::sql_query("SELECT last_insert_rowid() AS id")
            .get_result::<TestId>(conn)
            .map(|r| r.id)
            .unwrap()
    }
    #[derive(QueryableByName)]
    struct TestId {
        #[diesel(sql_type = diesel::sql_types::Integer)]
        id: i32,
    }
    #[derive(QueryableByName, Debug)]
    struct HashOnly {
        #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
        content_hash: Option<String>,
    }
    #[derive(QueryableByName, Debug)]
    struct CurrentRow {
        #[diesel(sql_type = diesel::sql_types::Integer)]
        id: i32,
        #[diesel(sql_type = diesel::sql_types::Bool)]
        is_current: bool,
    }
    #[test]
    fn backfill_fills_tagged_photo_hash_when_image_exif_has_one() {
        let mut conn = in_memory_db_connection();
        insert_tag(&mut conn, 1, "vacation");
        insert_tagged_photo(&mut conn, "trip/IMG.jpg", 1);
        // No image_exif row yet — backfill no-op.
        let stats = run(&mut conn);
        assert_eq!(stats.tagged_photo_hashes_filled, 0);
        // image_exif row appears with a hash; next reconcile fills it.
        insert_image_exif(&mut conn, 1, "trip/IMG.jpg", Some("hashabc"));
        let stats = run(&mut conn);
        assert_eq!(stats.tagged_photo_hashes_filled, 1);
        let row = diesel::sql_query(
            "SELECT content_hash FROM tagged_photo WHERE rel_path = 'trip/IMG.jpg'",
        )
        .get_result::<HashOnly>(&mut conn)
        .unwrap();
        assert_eq!(row.content_hash.as_deref(), Some("hashabc"));
        // Idempotent: a second run is a no-op.
        let stats = run(&mut conn);
        assert_eq!(stats.tagged_photo_hashes_filled, 0);
    }
    #[test]
    fn backfill_skips_tagged_photo_when_image_exif_has_no_hash() {
        let mut conn = in_memory_db_connection();
        insert_tag(&mut conn, 1, "vacation");
        insert_tagged_photo(&mut conn, "trip/IMG.jpg", 1);
        // image_exif exists but its hash is null.
        insert_image_exif(&mut conn, 1, "trip/IMG.jpg", None);
        let stats = run(&mut conn);
        assert_eq!(stats.tagged_photo_hashes_filled, 0);
    }
    #[test]
    fn backfill_fills_photo_insights_hash_scoped_by_library() {
        let mut conn = in_memory_db_connection();
        // Row in library 1 only — must not be filled by a hash from
        // library 2's same-rel_path entry.
        insert_image_exif(&mut conn, 1, "shared.jpg", Some("hash-lib1"));
        let id1 = insert_insight(&mut conn, 1, "shared.jpg", 100, true);
        let stats = run(&mut conn);
        assert_eq!(stats.photo_insights_hashes_filled, 1);
        let row = diesel::sql_query("SELECT content_hash FROM photo_insights WHERE id = ?")
            .bind::<diesel::sql_types::Integer, _>(id1)
            .get_result::<HashOnly>(&mut conn)
            .unwrap();
        assert_eq!(row.content_hash.as_deref(), Some("hash-lib1"));
    }
    #[test]
    fn collapse_keeps_earliest_is_current_per_hash() {
        let mut conn = in_memory_db_connection();
        // Two libraries, same content_hash via image_exif. Insights
        // were generated independently in each library, both currently
        // is_current = true. The earlier one wins.
        insert_image_exif(&mut conn, 1, "a.jpg", Some("h1"));
        insert_image_exif(&mut conn, 2, "a.jpg", Some("h1"));
        let earlier = insert_insight(&mut conn, 1, "a.jpg", 100, true);
        let later = insert_insight(&mut conn, 2, "a.jpg", 200, true);
        // First pass fills the content_hash; second collapses.
        let stats = run(&mut conn);
        assert_eq!(stats.photo_insights_hashes_filled, 2);
        assert_eq!(stats.photo_insights_demoted, 1);
        let rows = diesel::sql_query("SELECT id, is_current FROM photo_insights ORDER BY id")
            .get_results::<CurrentRow>(&mut conn)
            .unwrap();
        let earlier_row = rows.iter().find(|r| r.id == earlier).unwrap();
        let later_row = rows.iter().find(|r| r.id == later).unwrap();
        assert!(
            earlier_row.is_current,
            "earlier insight should remain current"
        );
        assert!(!later_row.is_current, "later insight should be demoted");
        // Idempotent.
        let stats = run(&mut conn);
        assert_eq!(stats.photo_insights_demoted, 0);
    }
    #[test]
    fn collapse_does_not_demote_a_solo_current_row() {
        let mut conn = in_memory_db_connection();
        insert_image_exif(&mut conn, 1, "a.jpg", Some("h1"));
        let solo = insert_insight(&mut conn, 1, "a.jpg", 100, true);
        let stats = run(&mut conn);
        assert_eq!(stats.photo_insights_demoted, 0);
        let row = diesel::sql_query("SELECT id, is_current FROM photo_insights WHERE id = ?")
            .bind::<diesel::sql_types::Integer, _>(solo)
            .get_result::<CurrentRow>(&mut conn)
            .unwrap();
        assert!(row.is_current);
    }
 }
@@ -57,6 +57,16 @@ diesel::table! {
        confidence -> Float,
        status -> Text,
        created_at -> BigInt,
        persona_id -> Text,
        user_id -> Integer,
        valid_from -> Nullable<BigInt>,
        valid_until -> Nullable<BigInt>,
        superseded_by -> Nullable<Integer>,
        created_by_model -> Nullable<Text>,
        created_by_backend -> Nullable<Text>,
        last_modified_by_model -> Nullable<Text>,
        last_modified_by_backend -> Nullable<Text>,
        last_modified_at -> Nullable<BigInt>,
    }
 }
@@ -121,6 +131,15 @@ diesel::table! {
        last_modified -> BigInt,
        content_hash -> Nullable<Text>,
        size_bytes -> Nullable<BigInt>,
        phash_64 -> Nullable<BigInt>,
        dhash_64 -> Nullable<BigInt>,
        duplicate_of_hash -> Nullable<Text>,
        duplicate_decided_at -> Nullable<BigInt>,
        date_taken_source -> Nullable<Text>,
        original_date_taken -> Nullable<BigInt>,
        original_date_taken_source -> Nullable<Text>,
        clip_embedding -> Nullable<Binary>,
        clip_model_version -> Nullable<Text>,
    }
 }
@@ -130,6 +149,8 @@ diesel::table! {
        name -> Text,
        root_path -> Text,
        created_at -> BigInt,
        enabled -> Bool,
        excluded_dirs -> Nullable<Text>,
    }
 }
@@ -150,6 +171,22 @@ diesel::table! {
    }
 }
 diesel::table! {
    personas (id) {
        id -> Integer,
        user_id -> Integer,
        persona_id -> Text,
        name -> Text,
        system_prompt -> Text,
        is_built_in -> Bool,
        include_all_memories -> Bool,
        created_at -> BigInt,
        updated_at -> BigInt,
        reviewed_only_facts -> Bool,
        allow_agent_corrections -> Bool,
    }
 }
 diesel::table! {
    persons (id) {
        id -> Integer,
@@ -178,6 +215,16 @@ diesel::table! {
        approved -> Nullable<Bool>,
        backend -> Text,
        fewshot_source_ids -> Nullable<Text>,
        content_hash -> Nullable<Text>,
        num_ctx -> Nullable<Integer>,
        temperature -> Nullable<Float>,
        top_p -> Nullable<Float>,
        top_k -> Nullable<Integer>,
        min_p -> Nullable<Float>,
        system_prompt -> Nullable<Text>,
        persona_id -> Nullable<Text>,
        prompt_eval_count -> Nullable<Integer>,
        eval_count -> Nullable<Integer>,
    }
 }
@@ -199,6 +246,7 @@ diesel::table! {
        rel_path -> Text,
        tag_id -> Integer,
        created_time -> BigInt,
        content_hash -> Nullable<Text>,
    }
 }
@@ -218,6 +266,16 @@ diesel::table! {
    }
 }
 diesel::table! {
    user_ai_prefs (id) {
        id -> Integer,
        voice -> Nullable<Text>,
        tz_offset_minutes -> Nullable<Integer>,
        library -> Nullable<Text>,
        updated_at -> BigInt,
    }
 }
 diesel::table! {
    video_preview_clips (id) {
        id -> Integer,
@@ -232,12 +290,44 @@ diesel::table! {
    }
 }
 diesel::table! {
    insight_generation_jobs (id) {
        id -> Integer,
        library_id -> Integer,
        file_path -> Text,
        generation_type -> Text,
        status -> Text,
        started_at -> BigInt,
        completed_at -> Nullable<BigInt>,
        result_insight_id -> Nullable<Integer>,
        error_message -> Nullable<Text>,
    }
 }
 diesel::table! {
    precomputed_reels (id) {
        id -> Integer,
        span -> Text,
        library_key -> Text,
        cache_key -> Text,
        output_path -> Text,
        title -> Text,
        media_count -> Integer,
        render_version -> Integer,
        tz_offset_minutes -> Integer,
        voice -> Nullable<Text>,
        generated_at -> BigInt,
    }
 }
 diesel::joinable!(entity_facts -> photo_insights (source_insight_id));
 diesel::joinable!(entity_photo_links -> entities (entity_id));
 diesel::joinable!(entity_photo_links -> libraries (library_id));
 diesel::joinable!(face_detections -> libraries (library_id));
 diesel::joinable!(face_detections -> persons (person_id));
 diesel::joinable!(image_exif -> libraries (library_id));
 diesel::joinable!(insight_generation_jobs -> libraries (library_id));
 diesel::joinable!(personas -> users (user_id));
 diesel::joinable!(persons -> entities (entity_id));
 diesel::joinable!(photo_insights -> libraries (library_id));
 diesel::joinable!(tagged_photo -> tags (tag_id));
@@ -252,13 +342,17 @@ diesel::allow_tables_to_appear_in_same_query!(
    face_detections,
    favorites,
    image_exif,
    insight_generation_jobs,
    libraries,
    location_history,
    personas,
    persons,
    photo_insights,
    precomputed_reels,
    search_history,
    tagged_photo,
    tags,
    user_ai_prefs,
    users,
    video_preview_clips,
 );
@@ -189,10 +189,11 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                .expect("Unable to get SearchHistoryDao");
            // Validate embedding dimensions (REQUIRED for searches)
-            if search.embedding.len() != 768 {
+            if search.embedding.len() != crate::ai::embedding_dim() {
                return Err(anyhow::anyhow!(
-                    "Invalid embedding dimensions: {} (expected 768)",
+                    "Invalid embedding dimensions: {} (expected {})",
-                    search.embedding.len()
+                    search.embedding.len(),
                    crate::ai::embedding_dim()
                ));
            }
@@ -227,7 +228,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                source_file: search.source_file,
            })
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
    fn store_searches_batch(
@@ -245,7 +246,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
            conn.transaction::<_, anyhow::Error, _>(|conn| {
                for search in searches {
                    // Validate embedding (REQUIRED)
-                    if search.embedding.len() != 768 {
+                    if search.embedding.len() != crate::ai::embedding_dim() {
                        log::warn!(
                            "Skipping search with invalid embedding dimensions: {}",
                            search.embedding.len()
@@ -283,7 +284,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
            Ok(inserted)
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
    fn find_searches_in_range(
@@ -310,7 +311,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
            .map(|rows| rows.into_iter().map(|r| r.to_search_record()).collect())
            .map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn find_similar_searches(
@@ -325,10 +326,11 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                .lock()
                .expect("Unable to get SearchHistoryDao");
-            if query_embedding.len() != 768 {
+            if query_embedding.len() != crate::ai::embedding_dim() {
                return Err(anyhow::anyhow!(
-                    "Invalid query embedding dimensions: {} (expected 768)",
+                    "Invalid query embedding dimensions: {} (expected {})",
-                    query_embedding.len()
+                    query_embedding.len(),
                    crate::ai::embedding_dim()
                ));
            }
@@ -372,7 +374,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                .map(|(_, search)| search)
                .collect())
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn find_relevant_searches_hybrid(
@@ -406,10 +408,11 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
            // Step 2: If query embedding provided, rank by semantic similarity
            if let Some(query_emb) = query_embedding {
-                if query_emb.len() != 768 {
+                if query_emb.len() != crate::ai::embedding_dim() {
                    return Err(anyhow::anyhow!(
-                        "Invalid query embedding dimensions: {} (expected 768)",
+                        "Invalid query embedding dimensions: {} (expected {})",
-                        query_emb.len()
+                        query_emb.len(),
                        crate::ai::embedding_dim()
                    ));
                }
@@ -459,7 +462,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                    .collect())
            }
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn search_exists(
@@ -490,7 +493,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
            Ok(result.count > 0)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
@@ -513,6 +516,6 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
            Ok(result.count)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }
@@ -0,0 +1,206 @@
 use diesel::prelude::*;
 use diesel::sqlite::SqliteConnection;
 use std::ops::DerefMut;
 use std::sync::{Arc, Mutex};
 use crate::database::models::{UpsertUserAiPrefs, UserAiPrefs};
 use crate::database::schema;
 use crate::database::{DbError, DbErrorKind, connect};
 use crate::otel::trace_db_call;
 /// Generic single-row table that passively mirrors the latest client AI
 /// request parameters (voice, timezone, library). Read by the nightly
 /// pre-generation scheduler (Section D) to pick up user preferences.
 pub trait UserAiPrefsDao: Sync + Send {
    /// Read the single row; `None` when it hasn't been populated yet.
    fn get_prefs(
        &mut self,
        context: &opentelemetry::Context,
    ) -> Result<Option<UserAiPrefs>, DbError>;
    /// Upsert the single row (id is always 1).
    #[allow(dead_code)]
    fn upsert_prefs(
        &mut self,
        context: &opentelemetry::Context,
        prefs: &UpsertUserAiPrefs,
    ) -> Result<(), DbError>;
 }
 pub struct SqliteUserAiPrefsDao {
    connection: Arc<Mutex<SqliteConnection>>,
 }
 impl Default for SqliteUserAiPrefsDao {
    fn default() -> Self {
        Self::new()
    }
 }
 impl SqliteUserAiPrefsDao {
    pub fn new() -> Self {
        Self {
            connection: Arc::new(Mutex::new(connect())),
        }
    }
    #[cfg(test)]
    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
        Self { connection: conn }
    }
 }
 impl UserAiPrefsDao for SqliteUserAiPrefsDao {
    fn get_prefs(
        &mut self,
        context: &opentelemetry::Context,
    ) -> Result<Option<UserAiPrefs>, DbError> {
        trace_db_call(context, "query", "get_prefs", |_span| {
            use schema::user_ai_prefs::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock UserAiPrefsDao");
            dsl::user_ai_prefs
                .first::<UserAiPrefs>(connection.deref_mut())
                .optional()
                .map_err(|e| anyhow::anyhow!("Failed to get prefs: {}", e))
        })
        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
    fn upsert_prefs(
        &mut self,
        context: &opentelemetry::Context,
        prefs: &UpsertUserAiPrefs,
    ) -> Result<(), DbError> {
        trace_db_call(context, "upsert", "upsert_prefs", |_span| {
            use schema::user_ai_prefs::dsl;
            let mut connection = self
                .connection
                .lock()
                .expect("Unable to lock UserAiPrefsDao");
            // Single-row table (id=1): one atomic upsert. The explicit id=1
            // makes the conflict target deterministic so the second call
            // updates in place rather than tripping the CHECK(id=1) constraint,
            // and real insert errors surface instead of being swallowed into a
            // separate update branch. The columns are set explicitly (rather
            // than via AsChangeset) so a None field overwrites to NULL — the
            // row mirrors the latest request exactly, not a merge of past ones.
            diesel::insert_into(dsl::user_ai_prefs)
                .values((dsl::id.eq(1), prefs))
                .on_conflict(dsl::id)
                .do_update()
                .set((
                    dsl::voice.eq(&prefs.voice),
                    dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes),
                    dsl::library.eq(&prefs.library),
                    dsl::updated_at.eq(&prefs.updated_at),
                ))
                .execute(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?;
            Ok(())
        })
        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use diesel::Connection;
    use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
    const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
    fn setup_dao() -> SqliteUserAiPrefsDao {
        let mut conn = SqliteConnection::establish(":memory:")
            .expect("Unable to create in-memory db connection");
        conn.run_pending_migrations(DB_MIGRATIONS)
            .expect("Failure running DB migrations");
        SqliteUserAiPrefsDao::from_connection(Arc::new(Mutex::new(conn)))
    }
    fn ctx() -> opentelemetry::Context {
        opentelemetry::Context::new()
    }
    #[test]
    fn get_prefs_returns_none_when_empty() {
        let mut dao = setup_dao();
        let result = dao.get_prefs(&ctx()).unwrap();
        assert!(result.is_none());
    }
    #[test]
    fn upsert_prefs_inserts_row() {
        let mut dao = setup_dao();
        let now = 1_700_000_000i64;
        let prefs = UpsertUserAiPrefs {
            voice: Some("grandma".to_string()),
            tz_offset_minutes: Some(-480),
            library: Some("1".to_string()),
            updated_at: now,
        };
        dao.upsert_prefs(&ctx(), &prefs).unwrap();
        let row = dao.get_prefs(&ctx()).unwrap().unwrap();
        assert_eq!(row.id, 1);
        assert_eq!(row.voice, Some("grandma".to_string()));
        assert_eq!(row.tz_offset_minutes, Some(-480));
        assert_eq!(row.library, Some("1".to_string()));
        assert_eq!(row.updated_at, now);
    }
    #[test]
    fn upsert_prefs_replaces_existing() {
        let mut dao = setup_dao();
        let now1 = 1_700_000_000i64;
        let now2 = 1_800_000_000i64;
        let prefs1 = UpsertUserAiPrefs {
            voice: Some("grandma".to_string()),
            tz_offset_minutes: Some(-480),
            library: Some("1".to_string()),
            updated_at: now1,
        };
        dao.upsert_prefs(&ctx(), &prefs1).unwrap();
        let prefs2 = UpsertUserAiPrefs {
            voice: Some("dad".to_string()),
            tz_offset_minutes: Some(-300),
            library: None,
            updated_at: now2,
        };
        dao.upsert_prefs(&ctx(), &prefs2).unwrap();
        let row = dao.get_prefs(&ctx()).unwrap().unwrap();
        assert_eq!(row.voice, Some("dad".to_string()));
        assert_eq!(row.tz_offset_minutes, Some(-300));
        assert!(row.library.is_none());
        assert_eq!(row.updated_at, now2);
    }
    #[test]
    fn upsert_partial_fields() {
        let mut dao = setup_dao();
        let now = 1_700_000_000i64;
        let prefs = UpsertUserAiPrefs {
            voice: None,
            tz_offset_minutes: Some(-480),
            library: None,
            updated_at: now,
        };
        dao.upsert_prefs(&ctx(), &prefs).unwrap();
        let row = dao.get_prefs(&ctx()).unwrap().unwrap();
        assert_eq!(row.tz_offset_minutes, Some(-480));
        assert!(row.voice.is_none());
        assert!(row.library.is_none());
    }
 }
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1,2 @@`
							`-- Requires SQLite 3.35+ for ALTER TABLE DROP COLUMN.`
							`ALTER TABLE libraries DROP COLUMN enabled;`
		`@@ -0,0 +1,2 @@`
							`-- Requires SQLite 3.35+ for ALTER TABLE DROP COLUMN.`
							`ALTER TABLE libraries DROP COLUMN excluded_dirs;`
		`@@ -0,0 +1,2 @@`
							`DROP INDEX IF EXISTS idx_image_exif_date_backfill;`
							`ALTER TABLE image_exif DROP COLUMN date_taken_source;`
		`@@ -0,0 +1,2 @@`
							`ALTER TABLE image_exif DROP COLUMN original_date_taken_source;`
							`ALTER TABLE image_exif DROP COLUMN original_date_taken;`
		`@@ -0,0 +1,2 @@`
							`DROP INDEX IF EXISTS idx_entity_facts_superseded_by;`
							`ALTER TABLE entity_facts DROP COLUMN superseded_by;`
		`@@ -0,0 +1 @@`
							`ALTER TABLE personas DROP COLUMN reviewed_only_facts;`
		`@@ -0,0 +1,2 @@`
							`DROP INDEX IF EXISTS idx_precomputed_reels_span_library;`
							`DROP TABLE IF EXISTS precomputed_reels;`