Raise chat truncation default num_ctx to 32k, env-overridable

The history-truncation budget assumed an 8192-token context whenever a chat request omitted num_ctx, while the llama-swap chat slots serve 20k-131k. Replayed transcripts past ~6k tokens were silently gutted every turn — losing conversation history and destroying llama.cpp KV-cache prefix reuse (full SWA re-prefill per turn). Default is now 32768 (real conversations top out around 16k), with AGENTIC_CHAT_DEFAULT_NUM_CTX to override per deploy, floored at headroom + 1024. Explicit per-request num_ctx still wins. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
Fix clippy lints in backfill and libraries tests
2026-06-09 19:14:02 -04:00 · 2026-06-09 18:29:44 -04:00 · 2026-06-09 18:29:35 -04:00 · 2026-06-09 18:29:20 -04:00 · 2026-06-09 18:29:06 -04:00 · 2026-06-07 18:28:22 -04:00
70 changed files with 11549 additions and 2215 deletions
@@ -53,11 +53,50 @@ AGENTIC_CHAT_MAX_ITERATIONS=6
 # OPENROUTER_HTTP_REFERER=https://your-site.example
 # OPENROUTER_APP_TITLE=ImageApi

+# ── AI Insights — local backend switch ──────────────────────────────────
+# Picks which local LLM stack the server uses for chat, vision describe,
+# and embeddings. `ollama` (default) uses the OLLAMA_* settings above;
+# `llamacpp` uses the LLAMA_SWAP_* settings below. The switch is global
+# and applies to both `backend=local` and `backend=hybrid` (hybrid keeps
+# chat on OpenRouter but still uses this stack for the describe pass).
+# Don't flip mid-deploy without re-embedding existing index rows —
+# mixed vector spaces break similarity search.
+# LLM_BACKEND=ollama
+
+# ── AI Insights — llama.cpp / llama-swap (optional) ─────────────────────
+# Set LLAMA_SWAP_URL plus LLM_BACKEND=llamacpp to swap the local stack
+# off Ollama. Talks OpenAI-compatible /v1 to a llama-swap proxy fronting
+# per-slot llama-server instances. Chat models receive images directly
+# via content-parts (vision-capable models assumed); a separate vision
+# slot is used only by the describe_photo tool and describe-image utility.
+# LLAMA_SWAP_URL=http://localhost:9292/v1
+# LLAMA_SWAP_PRIMARY_MODEL=chat
+# Optional dedicated vision slot for describe_image. Defaults to
+# PRIMARY_MODEL so describe_photo works without extra config.
+# LLAMA_SWAP_VISION_MODEL=vision
+# LLAMA_SWAP_EMBEDDING_MODEL=embed
+# Comma-separated allowlist surfaced by /insights/models when
+# LLM_BACKEND=llamacpp. All report has_vision=true.
+# LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed
+# LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180
+
+# ── Text-to-speech (optional, requires LLAMA_SWAP_URL) ───────────────────
+# TTS routes through the same llama-swap proxy (a Chatterbox model id), so it
+# only needs LLAMA_SWAP_URL — it does NOT require LLM_BACKEND=llamacpp.
+# Powers POST /tts/speech and the /tts/voices* endpoints (read-aloud insights
+# + voice cloning in the mobile app).
+# LLAMA_SWAP_TTS_MODEL=chatterbox        # TTS model id in config.yaml
+# LLAMA_SWAP_TTS_VOICE=m                 # default voice when a request omits one
+# LLAMA_SWAP_TTS_REF_SECONDS=30          # max voice-clone reference clip length (s)
+# LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS=600   # synth timeout (long chunked text)
+
 # ── AI Insights — sibling services (optional) ───────────────────────────
-# Apollo (places + face inference). Single Apollo deploys typically set
-# only APOLLO_API_BASE_URL and let the face client fall back to it.
+# Apollo (places, face inference, CLIP encoders). Single-Apollo deploys
+# typically set only APOLLO_API_BASE_URL and let the face + CLIP
+# clients fall back to it.
 # APOLLO_API_BASE_URL=http://apollo.lan:8000
 # APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000
+# APOLLO_CLIP_API_BASE_URL=http://apollo.lan:8000
 # SMS_API_URL=http://localhost:8000
 # SMS_API_TOKEN=

@@ -80,6 +119,23 @@ FACE_DETECT_TIMEOUT_SEC=60
 FACE_BACKLOG_MAX_PER_TICK=64
 FACE_HASH_BACKFILL_MAX_PER_TICK=2000

+# ── CLIP semantic photo search ──────────────────────────────────────────
+# ImageApi calls Apollo's /api/internal/clip/{encode_image,encode_text}
+# to populate per-photo embeddings during the watcher's backlog drain
+# and to encode user queries at /photos/search time. Disabled when
+# neither APOLLO_CLIP_API_BASE_URL nor APOLLO_API_BASE_URL is set.
+#
+# Per-watcher-tick cap on the encode drain. Default 32 ≈ ~1 photo/sec
+# on CPU, ~30 photos/sec on a single-GPU host (Apollo's threadpool
+# is 1 on CUDA, so concurrency is bounded server-side regardless of
+# our setting). Bump on a fresh deploy to clear the backlog faster.
+CLIP_BACKLOG_MAX_PER_TICK=32
+# Client-side parallel encode calls per drain pass. Apollo's GPU pool
+# serializes server-side; this just overlaps file-IO with inference.
+CLIP_ENCODE_CONCURRENCY=4
+# Per-encode HTTP timeout. CPU-only Apollo deploys may need higher.
+CLIP_REQUEST_TIMEOUT_SEC=60
+
 # ── RAG / search ────────────────────────────────────────────────────────
 # Set to `1` to enable cross-encoder reranking on /search results.
 SEARCH_RAG_RERANK=0
@@ -0,0 +1,9 @@
+# Normalize line endings in the repo to LF. Windows checkouts can still
+# present working-copy files as CRLF; this just keeps the committed history
+# stable so contributors on any OS don't see whitespace-only diffs every
+# time someone touches a file.
+* text=auto eol=lf
+
+# Migrations and SQL must be LF — SQLite parsers don't care, but diffing
+# is much cleaner with stable endings.
+*.sql text eol=lf
@@ -473,10 +473,16 @@ GET /memories?path=...&recursive=true
 POST /insights/generate              (non-agentic single-shot)
 POST /insights/generate/agentic      (tool-calling loop; body: { file_path, backend?, model?, ... })
 GET  /insights?path=...&library=...
-GET  /insights/models                (local Ollama models + capabilities)
+GET  /insights/models                (local-backend models + capabilities; Ollama OR llama-swap based on LLM_BACKEND)
 GET  /insights/openrouter/models     (curated OpenRouter allowlist)
 POST /insights/rate                  (thumbs up/down for training data)

+// Text-to-Speech (Chatterbox via llama-swap; needs LLAMA_SWAP_URL)
+POST /tts/speech                     (read-aloud: { text, voice?, ... } -> { audio_base64, format })
+GET  /tts/voices                     (Chatterbox voice library)
+POST /tts/voices/upload              (clone a voice from an uploaded clip; multipart)
+POST /tts/voices/from-library        (clone a voice from a library audio/video file)
+
 // Insight Chat Continuation
 POST /insights/chat                  (single-turn reply, non-streaming)
 POST /insights/chat/stream           (SSE: text / tool_call / tool_result / truncated / done)
@@ -631,8 +637,45 @@ OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small  # Optional, embeddings
 OPENROUTER_HTTP_REFERER=https://your-site.example    # Optional attribution header
 OPENROUTER_APP_TITLE=ImageApi                  # Optional attribution header

+# Local LLM backend switch. `ollama` (default) keeps the OLLAMA_* settings
+# above; `llamacpp` swaps the entire local stack (chat + vision describe +
+# embeddings) over to llama-swap. The switch is global and applies to
+# `backend=local` requests and to `backend=hybrid`'s describe pass (hybrid
+# chat still goes to OpenRouter). Don't flip mid-deploy without
+# re-embedding — mixed vector spaces break similarity search.
+LLM_BACKEND=ollama
+
+# llama.cpp / llama-swap (used when LLM_BACKEND=llamacpp). OpenAI-compatible
+# proxy hosting one or more llama-server processes. Chat models receive
+# images directly via content-parts (all models assumed vision-capable).
+LLAMA_SWAP_URL=http://localhost:9292/v1         # Required when LLM_BACKEND=llamacpp
+LLAMA_SWAP_PRIMARY_MODEL=chat                   # Chat slot id (matches config.yaml)
+LLAMA_SWAP_VISION_MODEL=                        # Dedicated vision slot for describe_image / describe_photo
+                                                # tool. Defaults to PRIMARY_MODEL when unset.
+LLAMA_SWAP_EMBEDDING_MODEL=embed                # Embedding slot id
+LLAMA_SWAP_ALLOWED_MODELS=chat,coder            # Curated allowlist surfaced by GET /insights/models
+                                                # when LLM_BACKEND=llamacpp. All report has_vision=true.
+                                                # Empty = picker shows only the configured primary model.
+LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180          # Per-request timeout; bump for slow CPU offload
+
+# Text-to-speech (Chatterbox served behind llama-swap). Only needs
+# LLAMA_SWAP_URL — independent of LLM_BACKEND. Powers /tts/speech (read-aloud)
+# and /tts/voices* (voice cloning). Reference audio is ffmpeg-normalized to WAV
+# server-side, so any source format works.
+LLAMA_SWAP_TTS_MODEL=chatterbox                # TTS model id in config.yaml (default: chatterbox)
+LLAMA_SWAP_TTS_VOICE=m                         # Default voice when /tts/speech omits one (optional)
+LLAMA_SWAP_TTS_REF_SECONDS=30                  # Max voice-clone reference clip length, seconds
+                                               # (Chatterbox is zero-shot; ~10-20s clean ref is ideal)
+LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS=600     # Per-request synth timeout (long chunked insights take
+                                               # minutes); overrides the shared client timeout for /tts/speech
+
 # Insight Chat Continuation
 AGENTIC_CHAT_MAX_ITERATIONS=6                  # Cap on tool-calling iterations per chat turn (default 6)
+AGENTIC_CHAT_DEFAULT_NUM_CTX=32768             # Assumed context window for the history-truncation budget
+                                               # when a chat request omits num_ctx (default 32768). Size to
+                                               # the smallest context among the chat models actually served;
+                                               # too small silently guts replayed history every turn (and
+                                               # destroys llama.cpp KV-cache prefix reuse).
 ```

 **AI Insights Fallback Behavior:**
@@ -650,10 +693,50 @@ The `OllamaClient` provides methods to query available models:

 This allows runtime verification of model availability before generating insights.

+**Local backend switch (`LLM_BACKEND`):**
+
+One env var decides which "local" stack the server runs against — `ollama`
+(default) or `llamacpp`. It's global on purpose: chat, vision, and
+embeddings all route through the same backend, so the embedding-vector
+column in SQLite stays in one vector space. Don't flip mid-deploy without
+re-embedding the affected rows — similarity search will collapse.
+
+- `LLM_BACKEND=ollama`: chat, vision, and embeddings use Ollama. Vision
+  capability is probed per-model via `/api/show`.
+- `LLM_BACKEND=llamacpp`: chat models receive images directly via OpenAI
+  content-parts (all models assumed vision-capable). Embeddings hit the
+  `embed` slot. A dedicated `LLAMA_SWAP_VISION_MODEL` slot (defaults to
+  the chat model) handles `describe_image` for the `describe_photo` tool.
+  Requires `LLAMA_SWAP_URL`.
+
+The per-request `backend=hybrid` override is orthogonal: it always sends
+chat to OpenRouter (text-only, images are pre-described and inlined), but
+the describe + embed passes still route through whichever `LLM_BACKEND`
+is configured.
+
+**Backend dispatch (`ResolvedBackend`):**
+
+`InsightGenerator::resolve_backend(kind, overrides)` is the single entry
+point that builds clients for a request. Returns a `ResolvedBackend` with
+two roles: `.chat()` (the agentic/chat client) and `.local()` (local-only
+utility calls: rerank, describe_image, embeddings). `BackendKind` is an
+enum (`Local` | `Hybrid`) replacing the stringly-typed `"local"` /
+`"hybrid"` labels. `SamplingOverrides` groups model/ctx/temp/top_p/top_k/
+min_p per-request overrides. All downstream code (`execute_tool`,
+`run_streaming_agentic_loop`, etc.) takes `&ResolvedBackend` rather than
+individual client references.
+
+`GET /insights/models` returns the local-backend models with capabilities
+in the same envelope shape regardless of `LLM_BACKEND`: Ollama servers
+when `ollama`, llama-swap slots (from `LLAMA_SWAP_ALLOWED_MODELS`) when
+`llamacpp`. No `/insights/llamacpp/models` — the picker reads a single
+endpoint.
+
 **Hybrid Backend (OpenRouter):**
 - Per-request opt-in via `backend=hybrid` on `POST /insights/generate/agentic`.
- Local Ollama still describes the image (vision); the description is inlined
-  into the chat prompt and the agentic loop runs on OpenRouter.
+- Vision describe happens before the agentic loop; the description is inlined
+  into the chat prompt and the agentic loop runs on OpenRouter. Vision
+  routes through whichever `LLM_BACKEND` is configured.
 - `request.model` (if provided) overrides `OPENROUTER_DEFAULT_MODEL` for that
  call. The mobile picker reads from `OPENROUTER_ALLOWED_MODELS`.
 - No live capability precheck — the operator-curated allowlist is trusted.
@@ -661,6 +744,15 @@ This allows runtime verification of model availability before generating insight
 - `GET /insights/openrouter/models` returns `{ models, default_model, configured }`
  for client picker UIs.

+**Cross-replay matrix (chat continuation):**
+- `local → local` allowed (whether served by Ollama or llama-swap; that's
+  a deploy-time decision, not a request-time one).
+- `hybrid → hybrid` allowed.
+- `hybrid → local` allowed (the inlined description replays as text).
+- `local → hybrid` rejected — the stored transcript has raw images in the
+  first user message and OpenRouter providers don't accept that shape
+  consistently. Regenerate the insight in hybrid mode instead.
+
 **Insight Chat Continuation:**

 After an agentic insight is generated, the full `Vec<ChatMessage>` transcript is
@@ -707,14 +799,17 @@ Per-`(library_id, file_path)` async mutex (`AppState.insight_chat.chat_locks`)
 serialises concurrent turns on the same insight so the JSON blob doesn't race.

 Context management is a soft bound: if the serialized history exceeds
-`num_ctx - 2048` tokens (cheap 4-byte/token heuristic), the oldest
-assistant-tool_call + tool_result pairs are dropped until under budget. The
+`num_ctx - 2048` tokens (cheap 4-byte/token heuristic; `num_ctx` defaults
+to `AGENTIC_CHAT_DEFAULT_NUM_CTX`, 32768, when the request omits it), the
+oldest assistant-tool_call + tool_result pairs are dropped until under budget. The
 initial user message (with any images) and system prompt are always preserved.
 The `truncated` event / flag is surfaced to the client when a drop occurred.

 Configurable env:
 - `AGENTIC_CHAT_MAX_ITERATIONS` — cap on tool-calling iterations per turn
  (default 6). Per-request `max_iterations` is clamped to this cap.
+- `AGENTIC_CHAT_DEFAULT_NUM_CTX` — assumed context window for the truncation
+  budget when the request omits `num_ctx` (default 32768).

 **Apollo Places integration (optional):**

@@ -2051,7 +2051,7 @@ dependencies = [

 [[package]]
 name = "image-api"
-version = "1.1.0"
+version = "1.3.0"
 dependencies = [
 "actix",
 "actix-cors",
@@ -2104,6 +2104,7 @@ dependencies = [
 "tokio",
 "tokio-util",
 "urlencoding",
+ "uuid",
 "walkdir",
 "zerocopy",
 ]
@@ -4391,7 +4392,9 @@ version = "1.23.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76"
 dependencies = [
+ "getrandom 0.4.2",
 "js-sys",
+ "serde_core",
 "wasm-bindgen",
 ]

@@ -1,6 +1,6 @@
 [package]
 name = "image-api"
-version = "1.1.0"
+version = "1.3.0"
 authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
 edition = "2024"

@@ -66,6 +66,7 @@ image_hasher = "3.0"
 bk-tree = "0.5"
 async-trait = "0.1"
 indicatif = "0.17"
+uuid = { version = "1.10", features = ["v4", "serde"] }

 # Windows lacks system sqlite3, so re-enable the bundled C build there.
 # Linux/macOS use the system library (faster builds, smaller binary).
@@ -147,6 +147,34 @@ so you can rewrite the saved summary from within chat.
 - `AGENTIC_CHAT_MAX_ITERATIONS` - Cap on tool-calling iterations per chat turn [default: `6`]
  - Per-request `max_iterations` (when sent by the client) is clamped to this cap

+#### Text-to-Speech (Optional)
+Reads insights aloud and manages cloned voices via a Chatterbox model served
+behind the same llama-swap proxy. Only requires `LLAMA_SWAP_URL` (the TTS client
+is built whenever that's set — independent of `LLM_BACKEND`). Endpoints:
+- `POST /tts/speech` — body `{ text, voice?, format?, exaggeration?, cfg_weight?,
+  temperature? }`; returns `{ audio_base64, format }`. Input is cleaned
+  server-side (markdown + emoji stripped) and the generation knobs are clamped
+  to Chatterbox's ranges. Synthesis is serialized (one at a time — the upstream
+  has no GPU lock of its own); a concurrent request gets a fast `429`.
+- `GET /tts/voices` — list the voice library.
+- `POST /tts/voices/upload` — multipart `voice_name` + `voice_file`; clone a
+  voice from an uploaded clip (≤25 MB).
+- `POST /tts/voices/from-library` — body `{ voice_name, path, library? }`; clone
+  from a library file (audio forwarded as-is; video has its audio extracted via
+  ffmpeg).
+
+Env:
+- `LLAMA_SWAP_TTS_MODEL` - TTS model id in llama-swap's `config.yaml` [default: `chatterbox`]
+- `LLAMA_SWAP_TTS_VOICE` - default voice used when a `/tts/speech` request omits `voice` (optional)
+- `LLAMA_SWAP_TTS_REF_SECONDS` - max voice-clone reference clip length in seconds
+  [default: `30`]. Reference audio is ffmpeg-normalized to mono 24 kHz WAV (so any
+  source format works); Chatterbox is zero-shot, so a clean ~10–20s sample is the
+  sweet spot — more rarely helps.
+- `LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS` - per-request synthesis timeout in
+  seconds [default: `600`]. Long insights are chunked + synthesized server-side
+  and can take minutes; this is separate from (and overrides, for `/tts/speech`)
+  the shared `LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS`.
+
 #### Fallback Behavior
 - Primary server is tried first with 5-second connection timeout
 - On failure, automatically falls back to secondary server (if configured)
@@ -0,0 +1,3 @@
+DROP INDEX IF EXISTS idx_image_exif_clip_backfill;
+ALTER TABLE image_exif DROP COLUMN clip_model_version;
+ALTER TABLE image_exif DROP COLUMN clip_embedding;
@@ -0,0 +1,27 @@
+-- CLIP semantic photo search: store a per-photo image embedding so
+-- text queries can rerank against the live library via cosine
+-- similarity. Apollo encodes the bytes via its CLIP service; ImageApi
+-- writes the resulting blob here.
+--
+-- `clip_embedding` is the raw little-endian float32 buffer of an
+-- L2-normalized vector (dim depends on the model — 768 bytes×4 for
+-- ViT-L/14, 512 bytes×4 for ViT-B/32). Apollo always returns the
+-- normalized form so the search-time dot product reduces to a plain
+-- cosine similarity.
+--
+-- `clip_model_version` echoes the upstream `APOLLO_CLIP_MODEL` (e.g.
+-- "ViT-L/14"). A model swap shouldn't silently mix geometries — the
+-- backfill drain will re-eligibilize rows whose stored model_version
+-- differs from the live engine's, and the search route refuses to
+-- mix rows from two model_versions in the same response.
+ALTER TABLE image_exif ADD COLUMN clip_embedding BLOB;
+ALTER TABLE image_exif ADD COLUMN clip_model_version TEXT;
+
+-- Partial index for the backfill drain. Mirrors the shape of
+-- `idx_image_exif_date_backfill`: candidate rows are those with a
+-- known content_hash (so we don't race the unhashed backlog) but no
+-- embedding yet. SELECT cost stays O(missing rows) instead of full
+-- table scan once the column is mostly populated.
+CREATE INDEX IF NOT EXISTS idx_image_exif_clip_backfill
+    ON image_exif (id)
+    WHERE clip_embedding IS NULL AND content_hash IS NOT NULL;
@@ -0,0 +1,3 @@
+DROP INDEX IF EXISTS idx_insight_gen_jobs_status_cleanup;
+DROP INDEX IF EXISTS idx_insight_gen_jobs_file;
+DROP TABLE IF EXISTS insight_generation_jobs;
@@ -0,0 +1,23 @@
+-- Track async insight generation jobs so the client can poll for
+-- completion after the server returns 202 Accepted. Each generation
+-- creates a new row; the application layer cancels prior running
+-- jobs before inserting.
+CREATE TABLE insight_generation_jobs (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    library_id INTEGER NOT NULL DEFAULT 1,
+    file_path TEXT NOT NULL,
+    generation_type TEXT NOT NULL,
+    status TEXT NOT NULL DEFAULT 'running',
+    started_at INTEGER NOT NULL,
+    completed_at INTEGER,
+    result_insight_id INTEGER,
+    error_message TEXT
+);
+
+-- For the status endpoint: fast lookup by (library_id, file_path)
+CREATE INDEX idx_insight_gen_jobs_file
+    ON insight_generation_jobs(library_id, file_path);
+
+-- For startup cleanup (future): prune old completed/failed jobs
+CREATE INDEX idx_insight_gen_jobs_status_cleanup
+    ON insight_generation_jobs(status, started_at);
@@ -0,0 +1,28 @@
+-- Restore UNIQUE constraint
+
+CREATE TABLE insight_generation_jobs_new (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    library_id INTEGER NOT NULL DEFAULT 1,
+    file_path TEXT NOT NULL,
+    generation_type TEXT NOT NULL,
+    status TEXT NOT NULL DEFAULT 'running',
+    started_at INTEGER NOT NULL,
+    completed_at INTEGER,
+    result_insight_id INTEGER,
+    error_message TEXT,
+    UNIQUE(library_id, file_path, generation_type)
+);
+
+INSERT INTO insight_generation_jobs_new
+    SELECT id, library_id, file_path, generation_type, status, started_at, completed_at, result_insight_id, error_message
+    FROM insight_generation_jobs;
+
+DROP TABLE insight_generation_jobs;
+
+ALTER TABLE insight_generation_jobs_new RENAME TO insight_generation_jobs;
+
+CREATE INDEX idx_insight_gen_jobs_file
+    ON insight_generation_jobs(library_id, file_path);
+
+CREATE INDEX idx_insight_gen_jobs_status_cleanup
+    ON insight_generation_jobs(status, started_at);
@@ -0,0 +1,30 @@
+-- Remove UNIQUE(library_id, file_path, generation_type) constraint to allow
+-- multiple job rows per file. This enables proper cancel/regenerate semantics:
+-- a new job is always inserted on regenerate, and the old job is cancelled
+-- independently. The application layer prevents concurrent running jobs.
+
+CREATE TABLE insight_generation_jobs_new (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    library_id INTEGER NOT NULL DEFAULT 1,
+    file_path TEXT NOT NULL,
+    generation_type TEXT NOT NULL,
+    status TEXT NOT NULL DEFAULT 'running',
+    started_at INTEGER NOT NULL,
+    completed_at INTEGER,
+    result_insight_id INTEGER,
+    error_message TEXT
+);
+
+INSERT INTO insight_generation_jobs_new
+    SELECT id, library_id, file_path, generation_type, status, started_at, completed_at, result_insight_id, error_message
+    FROM insight_generation_jobs;
+
+DROP TABLE insight_generation_jobs;
+
+ALTER TABLE insight_generation_jobs_new RENAME TO insight_generation_jobs;
+
+CREATE INDEX idx_insight_gen_jobs_file
+    ON insight_generation_jobs(library_id, file_path);
+
+CREATE INDEX idx_insight_gen_jobs_status_cleanup
+    ON insight_generation_jobs(status, started_at);
@@ -0,0 +1,11 @@
+-- SQLite doesn't support DROP COLUMN before 3.35.0; recreate the table
+-- without the new columns. This is only needed for rollback.
+CREATE TABLE photo_insights_old AS
+    SELECT id, library_id, rel_path, title, summary, generated_at,
+           model_version, is_current, training_messages, approved,
+           backend, fewshot_source_ids, content_hash
+    FROM photo_insights;
+
+DROP TABLE photo_insights;
+
+ALTER TABLE photo_insights_old RENAME TO photo_insights;
@@ -0,0 +1,8 @@
+-- Persist generation parameters on each insight row for auditing.
+ALTER TABLE photo_insights ADD COLUMN num_ctx INTEGER;
+ALTER TABLE photo_insights ADD COLUMN temperature REAL;
+ALTER TABLE photo_insights ADD COLUMN top_p REAL;
+ALTER TABLE photo_insights ADD COLUMN top_k INTEGER;
+ALTER TABLE photo_insights ADD COLUMN min_p REAL;
+ALTER TABLE photo_insights ADD COLUMN system_prompt TEXT;
+ALTER TABLE photo_insights ADD COLUMN persona_id TEXT;
@@ -0,0 +1,13 @@
+-- SQLite doesn't support DROP COLUMN before 3.35.0; recreate the table
+-- without the token-count columns. This is only needed for rollback.
+CREATE TABLE photo_insights_old AS
+    SELECT id, library_id, rel_path, title, summary, generated_at,
+           model_version, is_current, training_messages, approved,
+           backend, fewshot_source_ids, content_hash,
+           num_ctx, temperature, top_p, top_k, min_p,
+           system_prompt, persona_id
+    FROM photo_insights;
+
+DROP TABLE photo_insights;
+
+ALTER TABLE photo_insights_old RENAME TO photo_insights;
@@ -0,0 +1,6 @@
+-- Persist token usage on each insight row. Split from
+-- 2026-05-27-000002_add_insight_generation_params because that
+-- migration was already applied on some environments before these
+-- columns were added.
+ALTER TABLE photo_insights ADD COLUMN prompt_eval_count INTEGER;
+ALTER TABLE photo_insights ADD COLUMN eval_count INTEGER;
@@ -0,0 +1,140 @@
+use anyhow::{Result, anyhow};
+
+use crate::ai::llm_client::LlmClient;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum BackendKind {
+    Local,
+    Hybrid,
+}
+
+impl BackendKind {
+    pub fn parse(s: &str) -> Result<Self> {
+        match s.trim().to_lowercase().as_str() {
+            "local" | "" => Ok(Self::Local),
+            "hybrid" => Ok(Self::Hybrid),
+            other => Err(anyhow!(
+                "unknown backend '{}'; expected 'local' or 'hybrid'",
+                other
+            )),
+        }
+    }
+
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::Local => "local",
+            Self::Hybrid => "hybrid",
+        }
+    }
+}
+
+impl std::fmt::Display for BackendKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+pub struct SamplingOverrides {
+    pub model: Option<String>,
+    pub num_ctx: Option<i32>,
+    pub temperature: Option<f32>,
+    pub top_p: Option<f32>,
+    pub top_k: Option<i32>,
+    pub min_p: Option<f32>,
+}
+
+impl SamplingOverrides {
+    pub fn has_sampling(&self) -> bool {
+        self.temperature.is_some()
+            || self.top_p.is_some()
+            || self.top_k.is_some()
+            || self.min_p.is_some()
+    }
+}
+
+pub struct ResolvedBackend {
+    chat: Box<dyn LlmClient>,
+    local: Box<dyn LlmClient>,
+    pub kind: BackendKind,
+    /// `true` when the chat model receives images directly (Ollama with
+    /// vision, or llamacpp). `false` for hybrid where we describe-then-inline.
+    pub images_inline: bool,
+}
+
+impl ResolvedBackend {
+    pub fn new(
+        chat: Box<dyn LlmClient>,
+        local: Box<dyn LlmClient>,
+        kind: BackendKind,
+        images_inline: bool,
+    ) -> Self {
+        Self {
+            chat,
+            local,
+            kind,
+            images_inline,
+        }
+    }
+
+    pub fn chat(&self) -> &dyn LlmClient {
+        self.chat.as_ref()
+    }
+
+    pub fn local(&self) -> &dyn LlmClient {
+        self.local.as_ref()
+    }
+
+    pub fn model(&self) -> &str {
+        self.chat.primary_model()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_backend_kind() {
+        assert_eq!(BackendKind::parse("local").unwrap(), BackendKind::Local);
+        assert_eq!(BackendKind::parse("hybrid").unwrap(), BackendKind::Hybrid);
+        assert_eq!(BackendKind::parse("  Local ").unwrap(), BackendKind::Local);
+        assert_eq!(BackendKind::parse("HYBRID").unwrap(), BackendKind::Hybrid);
+        assert_eq!(BackendKind::parse("").unwrap(), BackendKind::Local);
+        assert!(BackendKind::parse("vllm").is_err());
+    }
+
+    #[test]
+    fn backend_kind_as_str_roundtrips() {
+        assert_eq!(
+            BackendKind::parse(BackendKind::Local.as_str()).unwrap(),
+            BackendKind::Local
+        );
+        assert_eq!(
+            BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(),
+            BackendKind::Hybrid
+        );
+    }
+
+    #[test]
+    fn sampling_overrides_has_sampling() {
+        let empty = SamplingOverrides {
+            model: None,
+            num_ctx: None,
+            temperature: None,
+            top_p: None,
+            top_k: None,
+            min_p: None,
+        };
+        assert!(!empty.has_sampling());
+
+        let with_temp = SamplingOverrides {
+            model: None,
+            num_ctx: Some(4096),
+            temperature: Some(0.7),
+            top_p: None,
+            top_k: None,
+            min_p: None,
+        };
+        assert!(with_temp.has_sampling());
+    }
+}
@@ -0,0 +1,392 @@
+//! Thin async HTTP client for Apollo's `/api/internal/clip/*` endpoints.
+//!
+//! Apollo hosts the OpenAI CLIP inference service (ViT-L/14 by default,
+//! configurable via `APOLLO_CLIP_MODEL`). This client is the ImageApi side
+//! of the contract: shove image bytes through `/encode_image` to populate
+//! `image_exif.clip_embedding` during backfill, and call `/encode_text` to
+//! encode a user's natural-language query at search time. The actual
+//! cosine-similarity rerank runs locally in ImageApi.
+//!
+//! Mirrors `face_client.rs` / `tag_client.rs` shape: optional base URL
+//! (None = disabled — feature off, drain and search no-op), reqwest
+//! client with a generous timeout because GPU inference under a backlog
+//! can queue server-side (Apollo's threadpool is bounded to 1 worker on
+//! CUDA).
+//!
+//! Configured via `APOLLO_CLIP_API_BASE_URL`, falling back to
+//! `APOLLO_API_BASE_URL` when the dedicated var is unset (single-Apollo
+//! deploys are the common case).
+//!
+//! Wire format:
+//! - `/encode_image`: multipart/form-data with `file=<bytes>` and
+//!   `meta=<json>` (content_hash / library_id / rel_path for logging).
+//! - `/encode_text`: JSON `{"text": "<query>"}`.
+//!
+//! Both return `{model_version, embedding_dim, duration_ms, embedding}`
+//! where `embedding` is base64 of `dim×4` little-endian float32 bytes,
+//! L2-normalized so the rerank reduces to a plain dot product.
+//!
+//! Error mapping (reflected in [`ClipError`]):
+//! - 422 `decode_failed` / `empty_text` → permanent: ImageApi marks the
+//!   row failed or surfaces the empty-query error to the search caller.
+//! - 503 `cuda_oom` / `engine_unavailable` → defer-and-retry: no marker.
+//! - Any other 5xx / network error → defer.
+
+use anyhow::{Context, Result};
+use base64::Engine;
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+#[derive(Debug, Clone, Serialize)]
+pub struct EncodeImageMeta {
+    pub content_hash: String,
+    pub library_id: i32,
+    pub rel_path: String,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+#[allow(dead_code)] // duration_ms logged by the backfill drain
+pub struct EncodeResponse {
+    pub model_version: String,
+    pub embedding_dim: i32,
+    pub duration_ms: i64,
+    /// base64 of `embedding_dim * 4` bytes (LE float32). ImageApi stores
+    /// the decoded bytes verbatim as a BLOB.
+    pub embedding: String,
+}
+
+impl EncodeResponse {
+    /// Decode the wire-format embedding back into raw bytes for storage.
+    /// Validates the buffer is `embedding_dim * 4` bytes long so a
+    /// malformed response surfaces here rather than as a downstream
+    /// silent length mismatch.
+    pub fn decode_embedding(&self) -> Result<Vec<u8>> {
+        let bytes = base64::engine::general_purpose::STANDARD
+            .decode(self.embedding.as_bytes())
+            .context("clip embedding base64 decode")?;
+        let expected = (self.embedding_dim as usize) * 4;
+        if bytes.len() != expected {
+            anyhow::bail!(
+                "clip embedding wrong size: got {} bytes, expected {} ({} * 4)",
+                bytes.len(),
+                expected,
+                self.embedding_dim
+            );
+        }
+        Ok(bytes)
+    }
+}
+
+#[derive(Debug, Clone, Deserialize)]
+#[allow(dead_code)] // load_error consumed by future health probe
+pub struct ClipHealth {
+    pub loaded: bool,
+    pub device: String,
+    pub model_version: String,
+    pub embedding_dim: i32,
+    #[serde(default)]
+    pub load_error: Option<String>,
+}
+
+#[derive(Debug)]
+pub enum ClipError {
+    /// Apollo refused for a reason that won't change on retry (decode
+    /// failure on /encode_image, empty text on /encode_text).
+    Permanent(anyhow::Error),
+    /// Apollo couldn't process this turn but might next time (CUDA OOM,
+    /// engine not loaded, network hiccup).
+    Transient(anyhow::Error),
+    /// Feature is disabled (no `APOLLO_CLIP_API_BASE_URL` /
+    /// `APOLLO_API_BASE_URL`).
+    Disabled,
+}
+
+impl std::fmt::Display for ClipError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ClipError::Permanent(e) => write!(f, "permanent: {e}"),
+            ClipError::Transient(e) => write!(f, "transient: {e}"),
+            ClipError::Disabled => write!(f, "clip client disabled"),
+        }
+    }
+}
+
+impl std::error::Error for ClipError {}
+
+#[derive(Clone)]
+pub struct ClipClient {
+    client: Client,
+    base_url: Option<String>,
+}
+
+impl ClipClient {
+    pub fn new(base_url: Option<String>) -> Self {
+        let timeout_secs = std::env::var("CLIP_REQUEST_TIMEOUT_SEC")
+            .ok()
+            .and_then(|s| s.parse::<u64>().ok())
+            .unwrap_or(60);
+        let client = Client::builder()
+            .timeout(Duration::from_secs(timeout_secs))
+            .build()
+            .expect("reqwest client build");
+        Self {
+            client,
+            base_url: base_url.map(|u| u.trim_end_matches('/').to_string()),
+        }
+    }
+
+    /// Read both standard env vars. `APOLLO_CLIP_API_BASE_URL` wins;
+    /// fallback to `APOLLO_API_BASE_URL`. Both unset → disabled.
+    pub fn from_env() -> Self {
+        let base = std::env::var("APOLLO_CLIP_API_BASE_URL")
+            .ok()
+            .filter(|s| !s.trim().is_empty())
+            .or_else(|| {
+                std::env::var("APOLLO_API_BASE_URL")
+                    .ok()
+                    .filter(|s| !s.trim().is_empty())
+            });
+        Self::new(base)
+    }
+
+    pub fn is_enabled(&self) -> bool {
+        self.base_url.is_some()
+    }
+
+    /// Encode an image to a 768-d (ViT-L/14) or 512-d (ViT-B/32)
+    /// L2-normalized embedding. Used by the backfill drain.
+    pub async fn encode_image(
+        &self,
+        bytes: Vec<u8>,
+        meta: EncodeImageMeta,
+    ) -> std::result::Result<EncodeResponse, ClipError> {
+        let Some(base) = self.base_url.as_deref() else {
+            return Err(ClipError::Disabled);
+        };
+        let url = format!("{}/api/internal/clip/encode_image", base);
+        let meta_json = serde_json::to_string(&meta)
+            .map_err(|e| ClipError::Permanent(anyhow::anyhow!("meta serialize: {e}")))?;
+        let form = reqwest::multipart::Form::new()
+            .text("meta", meta_json)
+            .part(
+                "file",
+                reqwest::multipart::Part::bytes(bytes)
+                    .file_name(meta.rel_path.clone())
+                    .mime_str("application/octet-stream")
+                    .unwrap_or_else(|_| reqwest::multipart::Part::bytes(Vec::new())),
+            );
+        self.send_multipart(&url, form).await
+    }
+
+    /// Encode a natural-language query to an embedding. Used by the
+    /// search route to rank stored image embeddings by cosine sim.
+    pub async fn encode_text(&self, text: &str) -> std::result::Result<EncodeResponse, ClipError> {
+        let Some(base) = self.base_url.as_deref() else {
+            return Err(ClipError::Disabled);
+        };
+        let url = format!("{}/api/internal/clip/encode_text", base);
+        let body = serde_json::json!({ "text": text });
+
+        let resp = match self.client.post(&url).json(&body).send().await {
+            Ok(r) => r,
+            Err(e) if e.is_timeout() || e.is_connect() => {
+                return Err(ClipError::Transient(anyhow::anyhow!(
+                    "clip client network: {e}"
+                )));
+            }
+            Err(e) => {
+                return Err(ClipError::Transient(anyhow::anyhow!(
+                    "clip client request: {e}"
+                )));
+            }
+        };
+        let status = resp.status();
+        if status.is_success() {
+            let body: EncodeResponse = resp
+                .json()
+                .await
+                .map_err(|e| ClipError::Transient(anyhow::anyhow!("clip response decode: {e}")))?;
+            return Ok(body);
+        }
+        let body_text = resp.text().await.unwrap_or_default();
+        Err(classify_error_response(status.as_u16(), &body_text))
+    }
+
+    /// Engine reachability + device/model report. Used as a startup
+    /// sanity check from the probe binary and (later) the backlog drain.
+    #[allow(dead_code)] // consumed by probe + drain
+    pub async fn health(&self) -> Result<ClipHealth> {
+        let base = self.base_url.as_deref().context("clip client disabled")?;
+        let url = format!("{}/api/internal/clip/health", base);
+        let resp = self.client.get(&url).send().await?.error_for_status()?;
+        let body: ClipHealth = resp.json().await?;
+        Ok(body)
+    }
+
+    async fn send_multipart(
+        &self,
+        url: &str,
+        form: reqwest::multipart::Form,
+    ) -> std::result::Result<EncodeResponse, ClipError> {
+        let resp = match self.client.post(url).multipart(form).send().await {
+            Ok(r) => r,
+            Err(e) if e.is_timeout() || e.is_connect() => {
+                return Err(ClipError::Transient(anyhow::anyhow!(
+                    "clip client network: {e}"
+                )));
+            }
+            Err(e) => {
+                return Err(ClipError::Transient(anyhow::anyhow!(
+                    "clip client request: {e}"
+                )));
+            }
+        };
+        let status = resp.status();
+        if status.is_success() {
+            let body: EncodeResponse = resp
+                .json()
+                .await
+                .map_err(|e| ClipError::Transient(anyhow::anyhow!("clip response decode: {e}")))?;
+            return Ok(body);
+        }
+        let body_text = resp.text().await.unwrap_or_default();
+        Err(classify_error_response(status.as_u16(), &body_text))
+    }
+}
+
+/// Pulled out as a pure function so the marker-row contract is unit-
+/// testable without spinning up an HTTP server. Matches the shape used
+/// by face_client::classify_error_response so future retry policies
+/// can share code.
+fn classify_error_response(status: u16, body_text: &str) -> ClipError {
+    let detail_code = serde_json::from_str::<serde_json::Value>(body_text)
+        .ok()
+        .and_then(|v| {
+            v.get("detail")
+                .and_then(|d| d.as_str().map(str::to_string))
+                .or_else(|| {
+                    v.get("detail")
+                        .and_then(|d| d.get("code"))
+                        .and_then(|c| c.as_str())
+                        .map(str::to_string)
+                })
+        })
+        .unwrap_or_default();
+
+    if status == 422 {
+        return ClipError::Permanent(anyhow::anyhow!(
+            "clip {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ));
+    }
+    if status == 503 {
+        return ClipError::Transient(anyhow::anyhow!(
+            "clip {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ));
+    }
+    // 408 / 413 / 429 are operator-fixable infra issues; defer.
+    if matches!(status, 408 | 413 | 429) {
+        return ClipError::Transient(anyhow::anyhow!(
+            "clip {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ));
+    }
+    if (400..500).contains(&status) {
+        ClipError::Permanent(anyhow::anyhow!(
+            "clip {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ))
+    } else {
+        ClipError::Transient(anyhow::anyhow!(
+            "clip {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn is_permanent(e: &ClipError) -> bool {
+        matches!(e, ClipError::Permanent(_))
+    }
+    fn is_transient(e: &ClipError) -> bool {
+        matches!(e, ClipError::Transient(_))
+    }
+
+    #[test]
+    fn classify_422_decode_failed_is_permanent() {
+        assert!(is_permanent(&classify_error_response(
+            422,
+            r#"{"detail":"decode_failed: bad bytes"}"#
+        )));
+    }
+
+    #[test]
+    fn classify_422_empty_text_is_permanent() {
+        assert!(is_permanent(&classify_error_response(
+            422,
+            r#"{"detail":"empty_text"}"#
+        )));
+    }
+
+    #[test]
+    fn classify_503_cuda_oom_is_transient() {
+        assert!(is_transient(&classify_error_response(
+            503,
+            r#"{"detail":{"code":"cuda_oom","error":"out of memory"}}"#,
+        )));
+    }
+
+    #[test]
+    fn classify_5xx_is_transient_other_4xx_is_permanent() {
+        assert!(is_transient(&classify_error_response(500, "")));
+        assert!(is_permanent(&classify_error_response(404, "{}")));
+    }
+
+    #[test]
+    fn classify_infra_4xx_is_transient() {
+        assert!(is_transient(&classify_error_response(408, "")));
+        assert!(is_transient(&classify_error_response(413, "<html>")));
+        assert!(is_transient(&classify_error_response(429, "{}")));
+    }
+
+    #[test]
+    fn decode_embedding_size_mismatch_errors() {
+        // dim=4 says we expect 16 bytes (4 floats × 4 bytes). Encode 8.
+        use base64::Engine;
+        let resp = EncodeResponse {
+            model_version: "ViT-L/14".into(),
+            embedding_dim: 4,
+            duration_ms: 0,
+            embedding: base64::engine::general_purpose::STANDARD.encode([0u8; 8]),
+        };
+        assert!(resp.decode_embedding().is_err());
+    }
+
+    #[test]
+    fn decode_embedding_round_trip() {
+        use base64::Engine;
+        let bytes: Vec<u8> = (0..16).collect();
+        let resp = EncodeResponse {
+            model_version: "ViT-L/14".into(),
+            embedding_dim: 4,
+            duration_ms: 0,
+            embedding: base64::engine::general_purpose::STANDARD.encode(&bytes),
+        };
+        assert_eq!(resp.decode_embedding().unwrap(), bytes);
+    }
+}
@@ -170,3 +170,55 @@ pub struct ModelCapabilities {
    pub has_vision: bool,
    pub has_tool_calling: bool,
 }
+
+/// Strip a leading `<think>…</think>` reasoning block from model output.
+///
+/// Thinking models sometimes emit chain-of-thought inside think tags before
+/// the real answer. Everything after the first `</think>` is the answer;
+/// when no tag is present — or the text after it is empty — the trimmed
+/// input is returned unchanged. Mirrors the behavior Ollama's
+/// `extract_final_answer` has applied to single-shot generation; shared here
+/// so the tool-calling final-content paths (agentic generation + chat) can
+/// apply the identical cleanup before parsing / persisting.
+pub fn strip_think_blocks(response: &str) -> String {
+    let response = response.trim();
+
+    if let Some(pos) = response.find("</think>") {
+        let answer = response[pos + "</think>".len()..].trim();
+        if !answer.is_empty() {
+            return answer.to_string();
+        }
+    }
+
+    response.to_string()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn strip_think_blocks_removes_leading_think_block() {
+        let raw = "<think>\nLet me reason about this.\n</think>\n\nTitle: A Day Out\n\nThe body.";
+        assert_eq!(strip_think_blocks(raw), "Title: A Day Out\n\nThe body.");
+    }
+
+    #[test]
+    fn strip_think_blocks_passes_through_plain_content() {
+        assert_eq!(strip_think_blocks("  just an answer  "), "just an answer");
+    }
+
+    #[test]
+    fn strip_think_blocks_keeps_content_when_answer_after_tag_is_empty() {
+        // A think block with nothing after it: better to return the trimmed
+        // original than an empty string (matches Ollama's fallback).
+        let raw = "<think>only thoughts</think>";
+        assert_eq!(strip_think_blocks(raw), raw);
+    }
+
+    #[test]
+    fn strip_think_blocks_handles_unclosed_tag() {
+        let raw = "<think>thinking forever";
+        assert_eq!(strip_think_blocks(raw), raw);
+    }
+}
@@ -1,14 +1,18 @@
 pub mod apollo_client;
+pub mod backend;
+pub mod clip_client;
 pub mod daily_summary_job;
 pub mod face_client;
 pub mod handlers;
 pub mod insight_chat;
 pub mod insight_generator;
+pub mod llamacpp;
 pub mod llm_client;
 pub mod ollama;
 pub mod openrouter;
 pub mod sms_client;
-pub mod tag_client;
+pub mod tts;
+pub mod turn_registry;

 // strip_summary_boilerplate is used by binaries (test_daily_summary), not the library
 #[allow(unused_imports)]
@@ -17,18 +21,25 @@ pub use daily_summary_job::{
    generate_daily_summaries, strip_summary_boilerplate,
 };
 pub use handlers::{
-    chat_history_handler, chat_rewind_handler, chat_stream_handler, chat_turn_handler,
-    delete_insight_handler, export_training_data_handler, generate_agentic_insight_handler,
-    generate_insight_handler, get_all_insights_handler, get_available_models_handler,
-    get_insight_handler, get_openrouter_models_handler, rate_insight_handler,
+    cancel_generation_handler, cancel_turn_handler, chat_history_handler, chat_rewind_handler,
+    chat_stream_handler, chat_turn_handler, delete_insight_handler, export_training_data_handler,
+    generate_agentic_insight_handler, generate_insight_handler, generation_status_handler,
+    get_all_insights_handler, get_available_models_handler, get_insight_handler,
+    get_insight_history_handler, get_openrouter_models_handler, rate_insight_handler,
+    turn_async_handler, turn_replay_handler,
 };
 pub use insight_generator::InsightGenerator;
+pub use llamacpp::LlamaCppClient;
 #[allow(unused_imports)]
 pub use llm_client::{
    ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
 };
 pub use ollama::{EMBEDDING_MODEL, OllamaClient};
 pub use sms_client::{SmsApiClient, SmsMessage};
+pub use tts::{
+    create_voice_from_library_handler, create_voice_upload_handler, list_voices_handler,
+    tts_speech_handler,
+};

 /// Display name used for the user in message transcripts and first-person
 /// prompt text. Reads the `USER_NAME` env var; defaults to `"Me"`. Models
@@ -38,3 +49,88 @@ pub use sms_client::{SmsApiClient, SmsMessage};
 pub fn user_display_name() -> String {
    std::env::var("USER_NAME").unwrap_or_else(|_| "Me".to_string())
 }
+
+/// One switch for the "local" LLM stack: when `LLM_BACKEND=llamacpp` is
+/// set, chat / vision describe / embeddings all route through llama-swap
+/// instead of Ollama. Any other value (including unset, the default) is
+/// Ollama. This is intentionally global — embeddings must be drawn from
+/// a single source or similarity search across the index breaks (mixed
+/// vector spaces, possibly mixed dims). The `backend=hybrid` per-request
+/// override remains orthogonal: it always sends chat to OpenRouter, and
+/// uses `LLM_BACKEND` for the describe-then-inline vision pass.
+pub fn local_backend_is_llamacpp() -> bool {
+    matches!(
+        std::env::var("LLM_BACKEND")
+            .ok()
+            .as_deref()
+            .map(|s| s.trim().to_lowercase())
+            .as_deref(),
+        Some("llamacpp")
+    )
+}
+
+/// Embed one string via the configured local backend. Routes through
+/// llama-swap when `LLM_BACKEND=llamacpp` (and a client is configured),
+/// else Ollama. Returns the single embedding vector. See
+/// [`local_backend_is_llamacpp`] for the rationale on consistency.
+pub async fn embed_one(
+    ollama: &OllamaClient,
+    llamacpp: Option<&LlamaCppClient>,
+    text: &str,
+) -> anyhow::Result<Vec<f32>> {
+    if local_backend_is_llamacpp() {
+        if let Some(lc) = llamacpp {
+            let mut vecs = <LlamaCppClient as LlmClient>::generate_embeddings(lc, &[text]).await?;
+            return vecs
+                .pop()
+                .ok_or_else(|| anyhow::anyhow!("llama-swap returned no embeddings"));
+        }
+        anyhow::bail!(
+            "LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured — \
+             set LLAMA_SWAP_URL or switch to LLM_BACKEND=ollama"
+        );
+    }
+    ollama.generate_embedding(text).await
+}
+
+#[cfg(test)]
+mod env_dispatch_tests {
+    use super::*;
+
+    fn with_env<F: FnOnce()>(key: &str, val: Option<&str>, f: F) {
+        let prev = std::env::var(key).ok();
+        match val {
+            Some(v) => unsafe { std::env::set_var(key, v) },
+            None => unsafe { std::env::remove_var(key) },
+        }
+        f();
+        match prev {
+            Some(v) => unsafe { std::env::set_var(key, v) },
+            None => unsafe { std::env::remove_var(key) },
+        }
+    }
+
+    #[test]
+    fn llm_backend_defaults_to_ollama() {
+        with_env("LLM_BACKEND", None, || {
+            assert!(!local_backend_is_llamacpp());
+        });
+    }
+
+    #[test]
+    fn llm_backend_llamacpp_case_insensitive() {
+        with_env("LLM_BACKEND", Some("LlamaCpp"), || {
+            assert!(local_backend_is_llamacpp());
+        });
+        with_env("LLM_BACKEND", Some("  llamacpp "), || {
+            assert!(local_backend_is_llamacpp());
+        });
+    }
+
+    #[test]
+    fn llm_backend_unknown_value_is_ollama() {
+        with_env("LLM_BACKEND", Some("vllm"), || {
+            assert!(!local_backend_is_llamacpp());
+        });
+    }
+}
@@ -360,18 +360,7 @@ impl OllamaClient {
    /// Extract final answer from thinking model output
    /// Handles <think>...</think> tags and takes everything after
    fn extract_final_answer(&self, response: &str) -> String {
-        let response = response.trim();
-
-        // Look for </think> tag and take everything after it
-        if let Some(pos) = response.find("</think>") {
-            let answer = response[pos + 8..].trim();
-            if !answer.is_empty() {
-                return answer.to_string();
-            }
-        }
-
-        // Fallback: return the whole response trimmed
-        response.to_string()
+        crate::ai::llm_client::strip_think_blocks(response)
    }

    async fn try_generate(
@@ -424,10 +413,7 @@ impl OllamaClient {
        self.generate_with_images(prompt, system, None).await
    }

-    /// Variant of `generate` that sets Ollama's top-level `think: false`.
-    /// Used by latency-sensitive callers like the rerank pass, where the
-    /// task has nothing to reason about and chain-of-thought tokens are
-    /// wasted wall time. Server-side no-op on non-reasoning models.
+    #[allow(dead_code)]
    pub async fn generate_no_think(&self, prompt: &str, system: Option<&str>) -> Result<String> {
        self.generate_with_options(prompt, system, None, Some(false))
            .await
@@ -849,11 +835,14 @@ Analyze the image and use specific details from both the visual content and the
                            if !chunk.message.role.is_empty() {
                                role = chunk.message.role;
                            }
-                            // Ollama only attaches tool_calls on the final chunk.
+                            // Ollama ≥0.8 can stream tool_calls incrementally
+                            // across chunks (older servers attach them all to
+                            // one chunk) — append rather than overwrite so
+                            // calls from earlier chunks survive.
                            if let Some(tcs) = chunk.message.tool_calls
                                && !tcs.is_empty()
                            {
-                                tool_calls = Some(tcs);
+                                append_streamed_tool_calls(&mut tool_calls, tcs);
                            }
                            if chunk.done {
                                prompt_eval_count = chunk.prompt_eval_count;
@@ -1332,8 +1321,20 @@ struct OllamaEmbedResponse {
    embeddings: Vec<Vec<f32>>,
 }

+/// Accumulate tool calls streamed across NDJSON chunks. Ollama ≥0.8 may
+/// emit each tool call on its own chunk; replacing the accumulator on every
+/// chunk would keep only the last call, so extend instead.
+fn append_streamed_tool_calls(
+    acc: &mut Option<Vec<crate::ai::llm_client::ToolCall>>,
+    new: Vec<crate::ai::llm_client::ToolCall>,
+) {
+    acc.get_or_insert_with(Vec::new).extend(new);
+}
+
 #[cfg(test)]
 mod tests {
+    use super::append_streamed_tool_calls;
+    use crate::ai::llm_client::{ToolCall, ToolCallFunction};

    #[test]
    fn generate_photo_description_prompt_is_concise() {
@@ -1344,4 +1345,38 @@ mod tests {
                      Focus on the people, location, and activity.";
        assert!(prompt.len() < 200, "Prompt should be concise");
    }
+
+    fn call(name: &str) -> ToolCall {
+        ToolCall {
+            id: None,
+            function: ToolCallFunction {
+                name: name.to_string(),
+                arguments: serde_json::json!({}),
+            },
+        }
+    }
+
+    #[test]
+    fn streamed_tool_calls_across_chunks_accumulate() {
+        // Two tool calls arriving in two separate stream chunks must BOTH
+        // survive assembly — the old `tool_calls = Some(tcs)` kept only the
+        // last chunk's calls.
+        let mut acc: Option<Vec<ToolCall>> = None;
+        append_streamed_tool_calls(&mut acc, vec![call("get_sms_messages")]);
+        append_streamed_tool_calls(&mut acc, vec![call("reverse_geocode")]);
+
+        let calls = acc.expect("tool calls accumulated");
+        assert_eq!(calls.len(), 2);
+        assert_eq!(calls[0].function.name, "get_sms_messages");
+        assert_eq!(calls[1].function.name, "reverse_geocode");
+    }
+
+    #[test]
+    fn streamed_tool_calls_single_chunk_batch_kept_intact() {
+        // Older Ollama servers attach all calls to one chunk — unchanged.
+        let mut acc: Option<Vec<ToolCall>> = None;
+        append_streamed_tool_calls(&mut acc, vec![call("a"), call("b")]);
+        let calls = acc.expect("tool calls accumulated");
+        assert_eq!(calls.len(), 2);
+    }
 }
@@ -281,6 +281,9 @@ impl SmsApiClient {
        if let Some(cid) = params.contact_id {
            url.push_str(&format!("&contact_id={}", cid));
        }
+        if let Some(ref c) = params.contact {
+            url.push_str(&format!("&contact={}", urlencoding::encode(c)));
+        }
        if let Some(off) = params.offset {
            url.push_str(&format!("&offset={}", off));
        }
@@ -413,6 +416,9 @@ pub struct SmsSearchParams<'a> {
    pub mode: &'a str,
    pub limit: usize,
    pub contact_id: Option<i64>,
+    /// Contact name (case-insensitive). Resolved to a numeric ID by the
+    /// SMS-API server when `contact_id` is not set.
+    pub contact: Option<String>,
    /// Unix-seconds inclusive lower bound on `date`.
    pub date_from: Option<i64>,
    /// Unix-seconds inclusive upper bound on `date`.
@@ -1,319 +0,0 @@
-//! Thin async HTTP client for Apollo's `/api/internal/tags/*` endpoints.
-//!
-//! Apollo hosts the RAM++ auto-tag inference service alongside insightface.
-//! This client is the ImageApi side — shove image bytes through `/auto` and
-//! get back a list of `(name, confidence)` predictions over RAM++'s
-//! ~4585-tag vocabulary.
-//!
-//! Mirrors `face_client.rs` shape: optional base URL (None = disabled), one
-//! reqwest client with a generous timeout because GPU inference under a
-//! backlog can queue server-side (Apollo's threadpool is bounded to 1
-//! worker on CUDA).
-//!
-//! Configured via `APOLLO_TAG_API_BASE_URL`, falling back to
-//! `APOLLO_API_BASE_URL` when the dedicated var is unset (single-Apollo
-//! deploys are the common case). Both unset → `is_enabled()` returns false
-//! and the probe binary / future backlog drain no-op.
-//!
-//! Wire format: multipart/form-data with `file=<bytes>` and `meta=<json>`.
-//! `meta` carries `{content_hash, library_id, rel_path, threshold?}` —
-//! Apollo logs the path/lib for traceability and reads `threshold` to
-//! override the engine default for that call (the probe binary uses this
-//! to sweep without restarting Apollo).
-//!
-//! Error mapping (reflected in [`TagDetectError`]):
-//! - 422 `decode_failed` → permanent: ImageApi marks `status='failed'` and
-//!   doesn't retry until a manual rerun.
-//! - 200 with `tags:[]` → `status='no_tags'` marker (success-with-zero).
-//! - 503 `cuda_oom` / `engine_unavailable` → defer-and-retry: no marker
-//!   written.
-//! - Any other 5xx / network error → defer.
-
-use anyhow::{Context, Result};
-use reqwest::Client;
-use serde::{Deserialize, Serialize};
-use std::time::Duration;
-
-#[derive(Debug, Clone, Serialize)]
-pub struct TagMeta {
-    pub content_hash: String,
-    pub library_id: i32,
-    pub rel_path: String,
-    /// Per-call threshold override. Apollo's engine default (0.68 for
-    /// ram_plus_swin_large_14m) is used when unset. The probe binary
-    /// uses this to sweep without restarting Apollo.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub threshold: Option<f32>,
-}
-
-#[derive(Debug, Clone, Deserialize)]
-pub struct TagPrediction {
-    pub name: String,
-    pub confidence: f32,
-}
-
-#[derive(Debug, Clone, Deserialize)]
-pub struct TagResponse {
-    pub model_version: String,
-    pub duration_ms: i64,
-    pub threshold: f32,
-    pub tags: Vec<TagPrediction>,
-}
-
-#[derive(Debug, Clone, Deserialize)]
-#[allow(dead_code)] // Reported by Apollo; load_error consumed by future health probe
-pub struct TagHealth {
-    pub loaded: bool,
-    pub device: String,
-    pub model_version: String,
-    pub image_size: i32,
-    pub threshold: f32,
-    #[serde(default)]
-    pub load_error: Option<String>,
-}
-
-/// Distinguishes permanent failures (don't retry) from transient ones
-/// (defer and retry on next scan tick). Mirrors `FaceDetectError` so the
-/// future backlog drain can use the same marker-row decision tree.
-#[derive(Debug)]
-pub enum TagDetectError {
-    /// Apollo refused the bytes for a reason that won't change on retry
-    /// (decode failure, zero-dim image). Mark `status='failed'`.
-    Permanent(anyhow::Error),
-    /// Apollo couldn't process this turn but might next time (CUDA OOM,
-    /// engine not loaded yet, network hiccup). Don't mark anything.
-    Transient(anyhow::Error),
-    /// Feature is disabled (no APOLLO_TAG_API_BASE_URL / APOLLO_API_BASE_URL).
-    /// Caller should silently no-op.
-    Disabled,
-}
-
-impl std::fmt::Display for TagDetectError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            TagDetectError::Permanent(e) => write!(f, "permanent: {e}"),
-            TagDetectError::Transient(e) => write!(f, "transient: {e}"),
-            TagDetectError::Disabled => write!(f, "tag client disabled"),
-        }
-    }
-}
-
-impl std::error::Error for TagDetectError {}
-
-#[derive(Clone)]
-pub struct TagClient {
-    client: Client,
-    /// `None` → disabled. Trailing slash trimmed at construction so url
-    /// building doesn't double up.
-    base_url: Option<String>,
-}
-
-impl TagClient {
-    pub fn new(base_url: Option<String>) -> Self {
-        // 60 s timeout: GPU inference is fast (~50–150 ms on RTX-class
-        // hardware) but Apollo's 1-worker threadpool means a backlog drain
-        // queues server-side. 60 s is enough headroom for a small queue
-        // depth without surfacing a false transient.
-        let timeout_secs = std::env::var("TAG_DETECT_TIMEOUT_SEC")
-            .ok()
-            .and_then(|s| s.parse::<u64>().ok())
-            .unwrap_or(60);
-        let client = Client::builder()
-            .timeout(Duration::from_secs(timeout_secs))
-            .build()
-            .expect("reqwest client build");
-        Self {
-            client,
-            base_url: base_url.map(|u| u.trim_end_matches('/').to_string()),
-        }
-    }
-
-    /// Construct a client from the standard env vars. APOLLO_TAG_API_BASE_URL
-    /// wins; falls back to APOLLO_API_BASE_URL. Both unset → disabled.
-    pub fn from_env() -> Self {
-        let base = std::env::var("APOLLO_TAG_API_BASE_URL")
-            .ok()
-            .filter(|s| !s.trim().is_empty())
-            .or_else(|| {
-                std::env::var("APOLLO_API_BASE_URL")
-                    .ok()
-                    .filter(|s| !s.trim().is_empty())
-            });
-        Self::new(base)
-    }
-
-    pub fn is_enabled(&self) -> bool {
-        self.base_url.is_some()
-    }
-
-    /// Run RAM++ auto-tagging over `bytes`. Empty `tags[]` is the no-tags
-    /// signal — caller writes a marker row in the persistence phase.
-    pub async fn auto_tag(
-        &self,
-        bytes: Vec<u8>,
-        meta: TagMeta,
-    ) -> std::result::Result<TagResponse, TagDetectError> {
-        let Some(base) = self.base_url.as_deref() else {
-            return Err(TagDetectError::Disabled);
-        };
-        let url = format!("{}/api/internal/tags/auto", base);
-        self.post_multipart(&url, bytes, &meta).await
-    }
-
-    /// Engine reachability + device/model report.
-    #[allow(dead_code)] // consumed by future startup probe
-    pub async fn health(&self) -> Result<TagHealth> {
-        let base = self.base_url.as_deref().context("tag client disabled")?;
-        let url = format!("{}/api/internal/tags/health", base);
-        let resp = self.client.get(&url).send().await?.error_for_status()?;
-        let body: TagHealth = resp.json().await?;
-        Ok(body)
-    }
-
-    async fn post_multipart(
-        &self,
-        url: &str,
-        bytes: Vec<u8>,
-        meta: &TagMeta,
-    ) -> std::result::Result<TagResponse, TagDetectError> {
-        let meta_json = serde_json::to_string(meta)
-            .map_err(|e| TagDetectError::Permanent(anyhow::anyhow!("meta serialize: {e}")))?;
-        let form = reqwest::multipart::Form::new()
-            .text("meta", meta_json)
-            .part(
-                "file",
-                reqwest::multipart::Part::bytes(bytes)
-                    .file_name(meta.rel_path.clone())
-                    .mime_str("application/octet-stream")
-                    .unwrap_or_else(|_| reqwest::multipart::Part::bytes(Vec::new())),
-            );
-
-        let resp = match self.client.post(url).multipart(form).send().await {
-            Ok(r) => r,
-            Err(e) if e.is_timeout() || e.is_connect() => {
-                return Err(TagDetectError::Transient(anyhow::anyhow!(
-                    "tag client network: {e}"
-                )));
-            }
-            Err(e) => {
-                return Err(TagDetectError::Transient(anyhow::anyhow!(
-                    "tag client request: {e}"
-                )));
-            }
-        };
-
-        let status = resp.status();
-        if status.is_success() {
-            let body: TagResponse = resp.json().await.map_err(|e| {
-                TagDetectError::Transient(anyhow::anyhow!("tag response decode: {e}"))
-            })?;
-            return Ok(body);
-        }
-
-        let body_text = resp.text().await.unwrap_or_default();
-        Err(classify_error_response(status.as_u16(), &body_text))
-    }
-}
-
-/// Pulled out as a pure function so the marker-row contract is unit-testable
-/// without spinning up an HTTP server. Behavior matches face_client::classify
-/// so the future backlog drain can share the same retry policy.
-fn classify_error_response(status: u16, body_text: &str) -> TagDetectError {
-    let detail_code = serde_json::from_str::<serde_json::Value>(body_text)
-        .ok()
-        .and_then(|v| {
-            v.get("detail")
-                .and_then(|d| d.as_str().map(str::to_string))
-                .or_else(|| {
-                    v.get("detail")
-                        .and_then(|d| d.get("code"))
-                        .and_then(|c| c.as_str())
-                        .map(str::to_string)
-                })
-        })
-        .unwrap_or_default();
-
-    if status == 422 {
-        return TagDetectError::Permanent(anyhow::anyhow!(
-            "tag detect 422 {}: {}",
-            detail_code,
-            body_text
-        ));
-    }
-    if status == 503 {
-        return TagDetectError::Transient(anyhow::anyhow!(
-            "tag detect 503 {}: {}",
-            detail_code,
-            body_text
-        ));
-    }
-    // 408 / 413 / 429 are operator-fixable infra issues — defer so the
-    // next pass retries naturally once the proxy is fixed (see
-    // face_client::classify_error_response for the cautionary tale).
-    if matches!(status, 408 | 413 | 429) {
-        return TagDetectError::Transient(anyhow::anyhow!(
-            "tag detect {} {}: {}",
-            status,
-            detail_code,
-            body_text
-        ));
-    }
-    if (400..500).contains(&status) {
-        TagDetectError::Permanent(anyhow::anyhow!(
-            "tag detect {} {}: {}",
-            status,
-            detail_code,
-            body_text
-        ))
-    } else {
-        TagDetectError::Transient(anyhow::anyhow!(
-            "tag detect {} {}: {}",
-            status,
-            detail_code,
-            body_text
-        ))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn is_permanent(e: &TagDetectError) -> bool {
-        matches!(e, TagDetectError::Permanent(_))
-    }
-    fn is_transient(e: &TagDetectError) -> bool {
-        matches!(e, TagDetectError::Transient(_))
-    }
-
-    #[test]
-    fn classify_422_decode_failed_is_permanent() {
-        let e = classify_error_response(422, r#"{"detail":"decode_failed: bad bytes"}"#);
-        assert!(is_permanent(&e));
-        assert!(format!("{e}").contains("decode_failed"));
-    }
-
-    #[test]
-    fn classify_503_cuda_oom_is_transient() {
-        let e = classify_error_response(
-            503,
-            r#"{"detail":{"code":"cuda_oom","error":"out of memory"}}"#,
-        );
-        assert!(is_transient(&e));
-        assert!(format!("{e}").contains("cuda_oom"));
-    }
-
-    #[test]
-    fn classify_5xx_is_transient_other_4xx_is_permanent() {
-        assert!(is_transient(&classify_error_response(500, "")));
-        assert!(is_permanent(&classify_error_response(400, "{}")));
-        assert!(is_permanent(&classify_error_response(404, "{}")));
-    }
-
-    #[test]
-    fn classify_infra_4xx_is_transient() {
-        assert!(is_transient(&classify_error_response(408, "")));
-        assert!(is_transient(&classify_error_response(413, "<html>")));
-        assert!(is_transient(&classify_error_response(429, "{}")));
-    }
-}
@@ -0,0 +1,580 @@
+// TTS endpoints: proxy text-to-speech + voice-library management to the
+// Chatterbox server that sits behind llama-swap (via LlamaCppClient). Speech
+// synthesis returns audio as base64-in-JSON so the mobile app can play it as a
+// `data:` URI without a binary-fetch path. Voice cloning registers a named
+// voice from either an uploaded clip (device) or an existing library file
+// (audio read directly; video has its audio track extracted via ffmpeg).
+
+use actix_multipart::Multipart;
+use actix_web::{HttpRequest, HttpResponse, Responder, get, post, web};
+use anyhow::Context;
+use base64::Engine;
+use bytes::{BufMut, BytesMut};
+use futures::StreamExt;
+use opentelemetry::KeyValue;
+use opentelemetry::trace::{Span, Status, Tracer};
+use regex::Regex;
+use serde::{Deserialize, Serialize};
+use serde_json::json;
+use std::path::Path;
+use std::sync::LazyLock;
+use tokio::sync::Semaphore;
+
+use crate::data::Claims;
+use crate::file_types::{is_audio_file, is_video_file};
+use crate::files::is_valid_full_path;
+use crate::libraries;
+use crate::otel::{extract_context_from_request, global_tracer};
+use crate::state::AppState;
+
+/// Hard cap on an uploaded voice-reference clip. Chatterbox itself caps the
+/// payload (~60s clip); this is a defensive ceiling so a hostile/oversized
+/// upload can't balloon ImageApi memory before we ever forward it.
+const MAX_VOICE_UPLOAD_BYTES: usize = 25 * 1024 * 1024; // 25 MB
+
+/// Serialize speech synthesis: the Chatterbox server has no internal lock or
+/// queue, so concurrent requests contend on the single GPU and cascade into
+/// timeouts. One permit; when busy we fast-fail with 429 rather than queue —
+/// the app surfaces "busy" immediately, and typical jobs clear in well under a
+/// minute. (An abandoned upstream job can still occupy the GPU until it
+/// finishes — that's a wrapper limitation; the chunked-queue plan fixes it.)
+static TTS_PERMIT: LazyLock<Semaphore> = LazyLock::new(|| Semaphore::new(1));
+
+/// Sanitize a user-supplied voice name. The name is forwarded to Chatterbox
+/// where it becomes a filename in the voice-library directory, so we restrict
+/// it to a safe charset (alphanumerics, dash, underscore) — no path
+/// separators, dots, or whitespace — and bound its length. Returns `None`
+/// when nothing usable remains.
+fn sanitize_voice_name(raw: &str) -> Option<String> {
+    let cleaned: String = raw
+        .trim()
+        .chars()
+        .map(|c| {
+            if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
+                c
+            } else {
+                '-'
+            }
+        })
+        .collect();
+    let cleaned = cleaned.trim_matches('-').to_string();
+    if cleaned.is_empty() {
+        return None;
+    }
+    Some(cleaned.chars().take(64).collect())
+}
+
+/// Optional default voice for synthesis when the request doesn't name one.
+/// Set `LLAMA_SWAP_TTS_VOICE=m` to read insights in a cloned voice by default.
+fn default_voice() -> Option<String> {
+    std::env::var("LLAMA_SWAP_TTS_VOICE")
+        .ok()
+        .map(|s| s.trim().to_string())
+        .filter(|s| !s.is_empty())
+}
+
+// Markdown / formatting strippers, compiled once. Insight text is markdown,
+// which TTS would otherwise read literally ("star star bold star star").
+static MD_IMAGE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\([^)]*\)").unwrap());
+static MD_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\([^)]*\)").unwrap());
+static MD_HEADING: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"(?m)^\s{0,3}#{1,6}\s*").unwrap());
+static MD_BLOCKQUOTE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?m)^\s{0,3}>\s?").unwrap());
+static MD_LIST: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"(?m)^\s{0,3}([-*+]|\d+\.)\s+").unwrap());
+static MD_EMPHASIS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[*_`~]+").unwrap());
+static URL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"https?://\S+").unwrap());
+static MULTISPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[ \t]{2,}").unwrap());
+// Any run of 2+ newlines (incl. whitespace-only blank lines) collapses to ONE
+// newline: Chatterbox inserts a long pause (sometimes ~20s of silence) per
+// blank line, so paragraph breaks must reach it as a single line break at most.
+static MULTINEWLINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\n(?:[ \t]*\n)+").unwrap());
+
+/// True for emoji / pictographic symbols, which most TTS models either skip or
+/// mispronounce. Covers the main emoji blocks plus dingbats, misc-technical,
+/// variation selectors, and the ZWJ used to glue emoji sequences. We do NOT
+/// strip `[bracketed]` tags — non-turbo Chatterbox ignores them, and a future
+/// Turbo switch uses them as paralinguistic cues.
+fn is_emoji_like(c: char) -> bool {
+    let u = c as u32;
+    matches!(u,
+        0x1F000..=0x1FAFF   // emoji, pictographs, supplemental symbols, flags
+        | 0x2300..=0x23FF   // misc technical (⌚ ⏰ ⏳ …)
+        | 0x2600..=0x27BF   // misc symbols + dingbats
+        | 0x2B00..=0x2BFF   // misc symbols & arrows (★ ⬆ …)
+        | 0xFE00..=0xFE0F   // variation selectors
+        | 0x200D            // zero-width joiner
+    )
+}
+
+/// Normalize insight text for speech: unwrap markdown links/images to their
+/// visible text, drop heading/list/blockquote/emphasis markers and URLs, strip
+/// emoji, and collapse whitespace. Centralized here so every caller (app,
+/// WebUI, curl) gets clean audio.
+fn clean_for_tts(input: &str) -> String {
+    let s = MD_IMAGE.replace_all(input, "$1");
+    let s = MD_LINK.replace_all(&s, "$1");
+    let s = MD_HEADING.replace_all(&s, "");
+    let s = MD_BLOCKQUOTE.replace_all(&s, "");
+    let s = MD_LIST.replace_all(&s, "");
+    let s = MD_EMPHASIS.replace_all(&s, "");
+    let s = URL_RE.replace_all(&s, " ");
+    let s: String = s.chars().filter(|c| !is_emoji_like(*c)).collect();
+    let s = MULTISPACE.replace_all(&s, " ");
+    let s = MULTINEWLINE.replace_all(&s, "\n");
+    s.trim().to_string()
+}
+
+/// Decode an audio/video file to mono 24 kHz WAV via ffmpeg, returning the WAV
+/// bytes. Chatterbox validates the reference clip by file *extension* and
+/// rejects several formats (e.g. `.aac`, `.opus`), so we always normalize to
+/// WAV regardless of the source container. Capped at 30s — references only need
+/// a few seconds of clean speech.
+async fn run_ffmpeg_to_wav(input_path: &str) -> anyhow::Result<Vec<u8>> {
+    let out = tempfile::Builder::new()
+        .suffix(".wav")
+        .tempfile()
+        .context("creating temp wav")?;
+    let out_s = out.path().to_string_lossy().to_string();
+
+    // Cap the reference clip length. Chatterbox is zero-shot — a clean ~10–20s
+    // sample is the sweet spot and more rarely helps — so we use the first N
+    // seconds. Tune via LLAMA_SWAP_TTS_REF_SECONDS (default 30).
+    let secs = std::env::var("LLAMA_SWAP_TTS_REF_SECONDS")
+        .ok()
+        .and_then(|s| s.trim().parse::<u32>().ok())
+        .filter(|n| *n > 0)
+        .unwrap_or(30)
+        .to_string();
+
+    let output = tokio::process::Command::new("ffmpeg")
+        .args([
+            "-y", "-i", input_path, "-vn", "-ac", "1", "-ar", "24000", "-t", &secs, "-f", "wav",
+            &out_s,
+        ])
+        .output()
+        .await
+        .context("spawning ffmpeg")?;
+
+    if !output.status.success() {
+        anyhow::bail!("ffmpeg failed: {}", String::from_utf8_lossy(&output.stderr));
+    }
+    std::fs::read(&out_s).context("reading transcoded audio")
+}
+
+/// Normalize in-memory upload bytes to WAV: write to a temp file (keeping the
+/// source extension as an ffmpeg probe hint) then transcode.
+async fn transcode_bytes_to_wav(input: &[u8], src_ext: Option<&str>) -> anyhow::Result<Vec<u8>> {
+    let suffix = src_ext
+        .filter(|e| !e.is_empty())
+        .map(|e| format!(".{e}"))
+        .unwrap_or_else(|| ".bin".to_string());
+    let in_tmp = tempfile::Builder::new()
+        .suffix(&suffix)
+        .tempfile()
+        .context("creating temp input")?;
+    std::fs::write(in_tmp.path(), input).context("writing temp input")?;
+    run_ffmpeg_to_wav(&in_tmp.path().to_string_lossy()).await
+}
+
+#[derive(Debug, Deserialize)]
+pub struct TtsSpeechRequest {
+    pub text: String,
+    #[serde(default)]
+    pub voice: Option<String>,
+    /// Audio container, e.g. `"mp3"` (default) or `"wav"`.
+    #[serde(default)]
+    pub format: Option<String>,
+    /// Chatterbox knobs (clamped server-side). exaggeration 0.25–2.0 (emotion),
+    /// cfg_weight 0.0–1.0 (pace; ~0.3 for fast speakers, 0 to neutralize a
+    /// reference accent), temperature 0.05–5.0 (randomness).
+    #[serde(default)]
+    pub exaggeration: Option<f32>,
+    #[serde(default)]
+    pub cfg_weight: Option<f32>,
+    #[serde(default)]
+    pub temperature: Option<f32>,
+}
+
+#[derive(Debug, Serialize)]
+pub struct TtsSpeechResponse {
+    pub audio_base64: String,
+    pub format: String,
+}
+
+/// POST /tts/speech — synthesize `text` (optionally in a named `voice`) and
+/// return base64-encoded audio for `data:` URI playback on the client.
+#[post("/tts/speech")]
+pub async fn tts_speech_handler(
+    http_request: HttpRequest,
+    _claims: Claims,
+    req: web::Json<TtsSpeechRequest>,
+    app_state: web::Data<AppState>,
+) -> impl Responder {
+    let parent_context = extract_context_from_request(&http_request);
+    let mut span = global_tracer().start_with_context("http.tts.speech", &parent_context);
+
+    let text = clean_for_tts(&req.text);
+    if text.is_empty() {
+        span.set_status(Status::error("text is required"));
+        return HttpResponse::BadRequest().json(json!({ "error": "text is required" }));
+    }
+    let Some(client) = app_state.llamacpp.as_ref() else {
+        span.set_status(Status::error("tts backend not configured"));
+        return HttpResponse::ServiceUnavailable()
+            .json(json!({ "error": "TTS backend not configured (set LLAMA_SWAP_URL)" }));
+    };
+
+    let format = req
+        .format
+        .as_deref()
+        .filter(|s| !s.is_empty())
+        .unwrap_or("mp3");
+    let dv = default_voice();
+    let voice = req
+        .voice
+        .as_deref()
+        .filter(|s| !s.is_empty())
+        .or(dv.as_deref());
+
+    span.set_attribute(KeyValue::new("tts.model", client.tts_model.clone()));
+    span.set_attribute(KeyValue::new("tts.format", format.to_string()));
+    span.set_attribute(KeyValue::new("tts.has_voice", voice.is_some()));
+    span.set_attribute(KeyValue::new("tts.text_len", text.len() as i64));
+
+    // Clamp generation knobs to Chatterbox's documented ranges before forwarding.
+    let exaggeration = req.exaggeration.map(|x| x.clamp(0.25, 2.0));
+    let cfg_weight = req.cfg_weight.map(|x| x.clamp(0.0, 1.0));
+    let temperature = req.temperature.map(|x| x.clamp(0.05, 5.0));
+
+    // One synthesis at a time (see TTS_PERMIT) — fast-fail when busy.
+    let Ok(_permit) = TTS_PERMIT.try_acquire() else {
+        span.set_status(Status::error("tts busy"));
+        return HttpResponse::TooManyRequests().json(json!({
+            "error": "TTS is busy with another request — try again shortly"
+        }));
+    };
+
+    match client
+        .text_to_speech(&text, voice, format, exaggeration, cfg_weight, temperature)
+        .await
+    {
+        Ok(bytes) => {
+            span.set_attribute(KeyValue::new("tts.audio_bytes", bytes.len() as i64));
+            span.set_status(Status::Ok);
+            let audio_base64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
+            HttpResponse::Ok().json(TtsSpeechResponse {
+                audio_base64,
+                format: format.to_string(),
+            })
+        }
+        Err(e) => {
+            span.set_status(Status::error("tts synthesis failed"));
+            log::error!("TTS synth failed: {:?}", e);
+            HttpResponse::BadGateway().json(json!({ "error": format!("TTS failed: {e}") }))
+        }
+    }
+}
+
+/// GET /tts/voices — list the Chatterbox voice library (raw passthrough).
+#[get("/tts/voices")]
+pub async fn list_voices_handler(
+    http_request: HttpRequest,
+    _claims: Claims,
+    app_state: web::Data<AppState>,
+) -> impl Responder {
+    let parent_context = extract_context_from_request(&http_request);
+    let mut span = global_tracer().start_with_context("http.tts.voices.list", &parent_context);
+
+    let Some(client) = app_state.llamacpp.as_ref() else {
+        span.set_status(Status::error("tts backend not configured"));
+        return HttpResponse::ServiceUnavailable()
+            .json(json!({ "error": "TTS backend not configured" }));
+    };
+    match client.list_voices().await {
+        Ok(v) => {
+            span.set_status(Status::Ok);
+            HttpResponse::Ok().json(v)
+        }
+        Err(e) => {
+            span.set_status(Status::error("list_voices failed"));
+            log::error!("list_voices failed: {:?}", e);
+            HttpResponse::BadGateway().json(json!({ "error": format!("{e}") }))
+        }
+    }
+}
+
+/// POST /tts/voices/upload — register a cloned voice from an uploaded audio
+/// clip. Multipart fields: `voice_name` (text) + a file part (`voice_file`).
+#[post("/tts/voices/upload")]
+pub async fn create_voice_upload_handler(
+    http_request: HttpRequest,
+    _claims: Claims,
+    mut payload: Multipart,
+    app_state: web::Data<AppState>,
+) -> impl Responder {
+    let parent_context = extract_context_from_request(&http_request);
+    let mut span = global_tracer().start_with_context("http.tts.voices.upload", &parent_context);
+
+    let Some(client) = app_state.llamacpp.as_ref() else {
+        span.set_status(Status::error("tts backend not configured"));
+        return HttpResponse::ServiceUnavailable()
+            .json(json!({ "error": "TTS backend not configured" }));
+    };
+
+    let mut voice_name: Option<String> = None;
+    let mut file_bytes = BytesMut::new();
+    let mut filename = "voice.wav".to_string();
+
+    while let Some(Ok(mut part)) = payload.next().await {
+        // Capture disposition fields up front so the immutable borrow ends
+        // before we mutably stream the part body (mirrors handlers/image.rs).
+        let (fname_opt, name_opt) = {
+            let cd = part.content_disposition();
+            (
+                cd.and_then(|c| c.get_filename()).map(|s| s.to_string()),
+                cd.and_then(|c| c.get_name()).map(|s| s.to_string()),
+            )
+        };
+
+        if let Some(fname) = fname_opt {
+            filename = fname;
+            while let Some(Ok(data)) = part.next().await {
+                if file_bytes.len() + data.len() > MAX_VOICE_UPLOAD_BYTES {
+                    span.set_status(Status::error("voice clip exceeds limit"));
+                    return HttpResponse::PayloadTooLarge()
+                        .json(json!({ "error": "voice clip exceeds 25 MB" }));
+                }
+                file_bytes.put(data);
+            }
+        } else if name_opt.as_deref() == Some("voice_name") {
+            let mut buf = BytesMut::new();
+            while let Some(Ok(data)) = part.next().await {
+                buf.put(data);
+            }
+            voice_name = Some(String::from_utf8_lossy(&buf).trim().to_string());
+        } else {
+            while let Some(Ok(_)) = part.next().await {}
+        }
+    }
+
+    let Some(name) = voice_name.as_deref().and_then(sanitize_voice_name) else {
+        span.set_status(Status::error("voice_name is required"));
+        return HttpResponse::BadRequest()
+            .json(json!({ "error": "voice_name is required (alphanumerics, - and _ only)" }));
+    };
+    if file_bytes.is_empty() {
+        span.set_status(Status::error("voice_file is required"));
+        return HttpResponse::BadRequest().json(json!({ "error": "voice_file is required" }));
+    }
+    span.set_attribute(KeyValue::new("tts.voice_name", name.clone()));
+    span.set_attribute(KeyValue::new("tts.upload_bytes", file_bytes.len() as i64));
+
+    // Normalize to WAV so any device format (e.g. .aac / .opus, which Chatterbox
+    // rejects by extension) is accepted.
+    let src_ext = Path::new(&filename).extension().and_then(|e| e.to_str());
+    let wav = match transcode_bytes_to_wav(file_bytes.as_ref(), src_ext).await {
+        Ok(w) => w,
+        Err(e) => {
+            span.set_status(Status::error("audio decode failed"));
+            log::error!("voice upload transcode failed: {:?}", e);
+            return HttpResponse::BadRequest()
+                .json(json!({ "error": "couldn't decode that audio file" }));
+        }
+    };
+
+    match client
+        .create_voice(&name, wav, "reference.wav", "audio/wav")
+        .await
+    {
+        Ok(v) => {
+            span.set_status(Status::Ok);
+            HttpResponse::Ok().json(v)
+        }
+        Err(e) => {
+            span.set_status(Status::error("create_voice failed"));
+            log::error!("create_voice (upload) failed: {:?}", e);
+            HttpResponse::BadGateway().json(json!({ "error": format!("{e}") }))
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+pub struct CreateVoiceFromLibraryRequest {
+    pub voice_name: String,
+    /// Library-relative path to an audio or video file.
+    pub path: String,
+    #[serde(default)]
+    pub library: Option<String>,
+}
+
+/// POST /tts/voices/from-library — register a cloned voice from a file already
+/// in a library. Audio and video alike are ffmpeg-normalized to a mono 24 kHz
+/// WAV reference clip (length capped by LLAMA_SWAP_TTS_REF_SECONDS).
+#[post("/tts/voices/from-library")]
+pub async fn create_voice_from_library_handler(
+    http_request: HttpRequest,
+    _claims: Claims,
+    req: web::Json<CreateVoiceFromLibraryRequest>,
+    app_state: web::Data<AppState>,
+) -> impl Responder {
+    let parent_context = extract_context_from_request(&http_request);
+    let mut span =
+        global_tracer().start_with_context("http.tts.voices.from_library", &parent_context);
+
+    let Some(client) = app_state.llamacpp.as_ref() else {
+        span.set_status(Status::error("tts backend not configured"));
+        return HttpResponse::ServiceUnavailable()
+            .json(json!({ "error": "TTS backend not configured" }));
+    };
+    let Some(voice_name) = sanitize_voice_name(&req.voice_name) else {
+        span.set_status(Status::error("voice_name is required"));
+        return HttpResponse::BadRequest()
+            .json(json!({ "error": "voice_name is required (alphanumerics, - and _ only)" }));
+    };
+
+    let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) {
+        Ok(Some(l)) => l,
+        Ok(None) => app_state.primary_library(),
+        Err(msg) => {
+            span.set_status(Status::error("invalid library"));
+            return HttpResponse::BadRequest().json(json!({ "error": msg }));
+        }
+    };
+
+    // is_valid_full_path confines the path to the library root (no traversal).
+    let abs = match is_valid_full_path(&library.root_path, &req.path, false) {
+        Some(p) if p.exists() => p,
+        _ => {
+            span.set_status(Status::error("file not found"));
+            return HttpResponse::NotFound().json(json!({ "error": "file not found in library" }));
+        }
+    };
+
+    // Only real audio/video sources are valid voice references — refuse to
+    // slurp arbitrary library files into memory / ffmpeg.
+    if !is_audio_file(&abs) && !is_video_file(&abs) {
+        span.set_status(Status::error("not an audio/video file"));
+        return HttpResponse::BadRequest()
+            .json(json!({ "error": "file is not an audio or video file" }));
+    }
+    span.set_attribute(KeyValue::new("tts.voice_name", voice_name.clone()));
+
+    let wav = match prepare_reference_audio(&abs).await {
+        Ok(b) => b,
+        Err(e) => {
+            span.set_status(Status::error("audio decode failed"));
+            log::error!("voice reference prep failed for {:?}: {:?}", abs, e);
+            return HttpResponse::BadRequest()
+                .json(json!({ "error": "couldn't decode that file's audio" }));
+        }
+    };
+
+    match client
+        .create_voice(&voice_name, wav, "reference.wav", "audio/wav")
+        .await
+    {
+        Ok(v) => {
+            span.set_status(Status::Ok);
+            HttpResponse::Ok().json(v)
+        }
+        Err(e) => {
+            span.set_status(Status::error("create_voice failed"));
+            log::error!("create_voice (from-library) failed: {:?}", e);
+            HttpResponse::BadGateway().json(json!({ "error": format!("{e}") }))
+        }
+    }
+}
+
+/// Read a library file (audio or video) as a Chatterbox-ready reference: ffmpeg
+/// decodes/extracts its audio to mono 24 kHz WAV. Reading straight from the
+/// library path avoids slurping a (possibly large) video into memory.
+async fn prepare_reference_audio(abs: &Path) -> anyhow::Result<Vec<u8>> {
+    run_ffmpeg_to_wav(&abs.to_string_lossy()).await
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sanitize_voice_name_keeps_safe_chars() {
+        assert_eq!(sanitize_voice_name("m").as_deref(), Some("m"));
+        assert_eq!(
+            sanitize_voice_name("  Cameron ").as_deref(),
+            Some("Cameron")
+        );
+        assert_eq!(
+            sanitize_voice_name("voice_01-a").as_deref(),
+            Some("voice_01-a")
+        );
+    }
+
+    #[test]
+    fn sanitize_voice_name_strips_unsafe_chars() {
+        // Path separators / dots / spaces become '-' and are trimmed at edges.
+        assert_eq!(sanitize_voice_name("a b.c").as_deref(), Some("a-b-c"));
+        assert_eq!(
+            sanitize_voice_name("../etc/passwd").as_deref(),
+            Some("etc-passwd")
+        );
+    }
+
+    #[test]
+    fn sanitize_voice_name_rejects_empty_or_all_unsafe() {
+        assert_eq!(sanitize_voice_name(""), None);
+        assert_eq!(sanitize_voice_name("   "), None);
+        assert_eq!(sanitize_voice_name("../../"), None);
+        assert_eq!(sanitize_voice_name("...."), None);
+    }
+
+    #[test]
+    fn sanitize_voice_name_bounds_length() {
+        let long = "a".repeat(200);
+        assert_eq!(sanitize_voice_name(&long).unwrap().len(), 64);
+    }
+
+    #[test]
+    fn clean_for_tts_strips_markdown() {
+        assert_eq!(
+            clean_for_tts("**Bold** and _italic_ and `code`"),
+            "Bold and italic and code"
+        );
+        assert_eq!(clean_for_tts("# Title\n\nbody"), "Title\nbody");
+        assert_eq!(
+            clean_for_tts("See [docs](http://x.com) now"),
+            "See docs now"
+        );
+        assert_eq!(clean_for_tts("- one\n- two"), "one\ntwo");
+    }
+
+    #[test]
+    fn clean_for_tts_strips_emoji_and_urls() {
+        assert_eq!(clean_for_tts("Hello 😀 world 🎉"), "Hello world");
+        assert_eq!(
+            clean_for_tts("visit https://example.com today"),
+            "visit today"
+        );
+        // ZWJ-glued emoji sequence is fully removed.
+        assert_eq!(clean_for_tts("family 👨‍👩‍👧 photo"), "family photo");
+    }
+
+    #[test]
+    fn clean_for_tts_collapses_blank_lines_to_single_break() {
+        // Chatterbox pauses (sometimes ~20s) per blank line, so paragraph
+        // breaks must collapse to a single newline.
+        assert_eq!(clean_for_tts("para one\n\npara two"), "para one\npara two");
+        assert_eq!(clean_for_tts("a\n\n\n\nb"), "a\nb");
+        // Whitespace-only "blank" lines collapse too.
+        assert_eq!(clean_for_tts("a\n  \t \nb"), "a\nb");
+        // A single newline is left alone.
+        assert_eq!(clean_for_tts("a\nb"), "a\nb");
+    }
+
+    #[test]
+    fn clean_for_tts_preserves_bracket_tags() {
+        // Non-turbo Chatterbox ignores these; a future Turbo uses them as
+        // paralinguistic cues — so we must not strip them.
+        assert_eq!(clean_for_tts("hello [laugh] there"), "hello [laugh] there");
+    }
+}
@@ -0,0 +1,748 @@
+use crate::ai::insight_chat::ChatStreamEvent;
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::sync::Mutex as StdMutex;
+use std::sync::atomic::{AtomicU32, Ordering};
+use std::time::Instant;
+use tokio::sync::{Mutex, Notify};
+use tokio::task::AbortHandle;
+
+/// Maximum number of events buffered per turn. Agentic turns typically
+/// produce ~120 events; 500 provides 4× headroom. When exceeded, oldest
+/// events are evicted from the front.
+const MAX_BUFFERED_EVENTS: usize = 500;
+
+/// Turn status codes used by `TurnEntry::status`.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum TurnStatus {
+    Running = 0,
+    Done = 1,
+    Error = 2,
+    Cancelled = 3,
+}
+
+impl From<u32> for TurnStatus {
+    fn from(v: u32) -> Self {
+        match v {
+            0 => TurnStatus::Running,
+            1 => TurnStatus::Done,
+            2 => TurnStatus::Error,
+            3 => TurnStatus::Cancelled,
+            _ => TurnStatus::Running,
+        }
+    }
+}
+
+impl TurnStatus {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            TurnStatus::Running => "running",
+            TurnStatus::Done => "done",
+            TurnStatus::Error => "error",
+            TurnStatus::Cancelled => "cancelled",
+        }
+    }
+}
+
+/// Shared metadata about a turn, read by the SSE replay handler to emit
+/// the initial `turn_info` event and to decide whether to wait for new
+/// events or close immediately.
+#[derive(Debug, Clone)]
+pub struct TurnInfo {
+    pub turn_id: String,
+    pub file_path: String,
+    pub library_id: i32,
+    pub status: TurnStatus,
+    pub total_events_pushed: u32,
+    pub buffered_count: u32,
+}
+
+/// Result of reading events at or after an absolute `skip_before` index.
+#[derive(Debug)]
+pub enum ReplayOutcome {
+    /// New events are available. `next_skip` is the absolute index to pass
+    /// on the next read (i.e. one past the last event returned).
+    Events {
+        events: Vec<ChatStreamEvent>,
+        next_skip: u32,
+    },
+    /// The reader is caught up to the live edge — no events past `skip_before`
+    /// yet. `next_skip` is the current high-water mark.
+    CaughtUp { next_skip: u32 },
+    /// `skip_before` points below the buffer's base index: the requested
+    /// events were evicted. Maps to HTTP 410 Gone.
+    Gone,
+}
+
+/// Per-turn state shared between the agentic loop (writer) and all SSE
+/// replay connections (readers).
+pub struct TurnEntry {
+    pub turn_id: String,
+    pub file_path: String,
+    pub library_id: i32,
+    /// Shared event buffer — multiple SSE connections can read independently.
+    /// Each connection tracks its own `skip_before` offset.
+    events: Mutex<Vec<ChatStreamEvent>>,
+    /// Monotonic counter: total events pushed (may exceed events.len()
+    /// due to eviction). Used for skip_before indexing.
+    total_events_pushed: AtomicU32,
+    /// The event index that this entry started with. Adjusts on eviction
+    /// so that `skip_before` stays absolute across connections.
+    base_index: AtomicU32,
+    pub status: AtomicU32,
+    /// Abort handle for the spawned agentic task, set once after spawn.
+    /// Behind a std `Mutex` because the entry is shared via `Arc` and the
+    /// handle is installed after the entry is already in the registry.
+    abort_handle: StdMutex<Option<AbortHandle>>,
+    pub created_at: Instant,
+    notify: Arc<Notify>,
+}
+
+impl TurnEntry {
+    pub fn new(turn_id: String, file_path: String, library_id: i32) -> Self {
+        Self {
+            turn_id,
+            file_path,
+            library_id,
+            events: Mutex::new(Vec::new()),
+            total_events_pushed: AtomicU32::new(0),
+            base_index: AtomicU32::new(0),
+            status: AtomicU32::new(TurnStatus::Running as u32),
+            abort_handle: StdMutex::new(None),
+            created_at: Instant::now(),
+            notify: Arc::new(Notify::new()),
+        }
+    }
+
+    /// Install the abort handle for the spawned agentic task. Called once,
+    /// right after the task is spawned.
+    pub fn set_abort_handle(&self, handle: AbortHandle) {
+        *self.abort_handle.lock().expect("abort_handle poisoned") = Some(handle);
+    }
+
+    /// Abort the spawned agentic task, if a handle was installed. Returns
+    /// `true` if a task was aborted.
+    pub fn abort(&self) -> bool {
+        if let Some(handle) = self
+            .abort_handle
+            .lock()
+            .expect("abort_handle poisoned")
+            .take()
+        {
+            handle.abort();
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Push an event into the buffer. Evicts oldest events if the buffer
+    /// exceeds `MAX_BUFFERED_EVENTS`. Notifies all waiting SSE connections.
+    pub async fn push_event(&self, event: ChatStreamEvent) {
+        {
+            let mut events = self.events.lock().await;
+
+            // Evict oldest events if we've hit the cap.
+            if events.len() >= MAX_BUFFERED_EVENTS {
+                // Drop the oldest event to make room and advance the base
+                // index so skip_before stays absolute across connections.
+                events.remove(0);
+                self.base_index.fetch_add(1, Ordering::Relaxed);
+            }
+
+            events.push(event);
+            // Increment while holding the buffer lock so the counter stays in
+            // lock-step with the buffer even if multiple writers ever exist.
+            self.total_events_pushed.fetch_add(1, Ordering::Relaxed);
+        }
+
+        self.notify.notify_waiters();
+    }
+
+    /// Get a snapshot of turn metadata for the `turn_info` SSE event.
+    pub async fn info(&self) -> TurnInfo {
+        let events = self.events.lock().await;
+        let buffered = events.len() as u32;
+        let total = self.total_events_pushed.load(Ordering::Relaxed);
+        drop(events);
+
+        TurnInfo {
+            turn_id: self.turn_id.clone(),
+            file_path: self.file_path.clone(),
+            library_id: self.library_id,
+            status: self.status.load(Ordering::Relaxed).into(),
+            total_events_pushed: total,
+            buffered_count: buffered,
+        }
+    }
+
+    /// Set the terminal status and notify all waiters.
+    pub fn set_terminal_status(&self, status: TurnStatus) {
+        self.status.store(status as u32, Ordering::Relaxed);
+        self.notify.notify_waiters();
+    }
+
+    /// Read buffered events at or after absolute index `skip_before` without
+    /// waiting. Distinguishes "evicted" (Gone) from "caught up" (no new
+    /// events yet) — the previous boolean/`Option` API conflated the two.
+    pub async fn replay_from(&self, skip_before: u32) -> ReplayOutcome {
+        let events = self.events.lock().await;
+        let base = self.base_index.load(Ordering::Relaxed);
+
+        // The buffer holds absolute indices [base, base + len). A request
+        // below `base` asked for events that have been evicted.
+        if skip_before < base {
+            return ReplayOutcome::Gone;
+        }
+
+        let offset = (skip_before - base) as usize;
+        let next_skip = base + events.len() as u32;
+        if offset >= events.len() {
+            // Caught up to (or past) the live edge — nothing new yet.
+            return ReplayOutcome::CaughtUp { next_skip };
+        }
+
+        ReplayOutcome::Events {
+            events: events[offset..].to_vec(),
+            next_skip,
+        }
+    }
+
+    /// Wait for the next batch of events past `skip_before`, the turn to
+    /// finish, or eviction. Returns:
+    /// - `Events` when new events are available (drained before any terminal
+    ///   signal so the final `Done`/`Error` is never dropped),
+    /// - `CaughtUp` only when the turn has reached a terminal status and the
+    ///   reader is fully drained (the caller should close the stream),
+    /// - `Gone` when `skip_before` points into evicted territory.
+    pub async fn next_batch(&self, skip_before: u32) -> ReplayOutcome {
+        loop {
+            // Register interest BEFORE inspecting state so a push/terminal that
+            // races between our read and our await can't be lost (Notify's
+            // `notify_waiters` does not store a permit).
+            let notified = self.notify.notified();
+            tokio::pin!(notified);
+            notified.as_mut().enable();
+
+            match self.replay_from(skip_before).await {
+                ReplayOutcome::CaughtUp { next_skip } => {
+                    // No new events. If the turn is finished, every event
+                    // (including the terminal one) has already been drained
+                    // above on a prior call, so signal the caller to close.
+                    if !self.is_running() {
+                        return ReplayOutcome::CaughtUp { next_skip };
+                    }
+                    // Still running — wait for the next push or terminal.
+                }
+                other => return other, // Events or Gone
+            }
+
+            notified.await;
+        }
+    }
+
+    /// Check if this turn is still running.
+    pub fn is_running(&self) -> bool {
+        self.status.load(Ordering::Relaxed) == TurnStatus::Running as u32
+    }
+}
+
+/// In-memory registry of all active chat turns. Injected into `AppState`
+/// and shared across all handlers.
+pub struct TurnRegistry {
+    entries: Mutex<HashMap<String, Arc<TurnEntry>>>,
+    timeout_secs: u64,
+}
+
+impl TurnRegistry {
+    pub fn new(timeout_secs: u64) -> Self {
+        Self {
+            entries: Mutex::new(HashMap::new()),
+            timeout_secs,
+        }
+    }
+
+    /// Returns the cleanup timeout in seconds.
+    pub fn timeout_secs(&self) -> u64 {
+        self.timeout_secs
+    }
+
+    /// Insert a new turn entry. Returns the turn_id.
+    pub async fn insert(&self, entry: Arc<TurnEntry>) -> String {
+        let turn_id = entry.turn_id.clone();
+        let mut entries = self.entries.lock().await;
+        entries.insert(turn_id.clone(), entry);
+        turn_id
+    }
+
+    /// Look up a turn by id. Returns None if not found or expired.
+    pub async fn get(&self, turn_id: &str) -> Option<Arc<TurnEntry>> {
+        let entries = self.entries.lock().await;
+        entries.get(turn_id).cloned()
+    }
+
+    /// Clean up stale entries older than the timeout. Returns the count of
+    /// entries removed.
+    pub async fn cleanup_stale(&self) -> usize {
+        let mut entries = self.entries.lock().await;
+        let _now = Instant::now();
+        let stale: Vec<String> = entries
+            .iter()
+            .filter(|(_, entry)| entry.created_at.elapsed().as_secs() > self.timeout_secs)
+            .map(|(id, _)| id.clone())
+            .collect();
+
+        for id in &stale {
+            entries.remove(id);
+        }
+
+        if !stale.is_empty() {
+            log::info!(
+                "TurnRegistry: cleaned up {} stale entries (timeout={}s)",
+                stale.len(),
+                self.timeout_secs
+            );
+        }
+
+        stale.len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ai::insight_chat::ChatStreamEvent;
+    use std::time::Duration;
+
+    /// Unwrap the events from a `ReplayOutcome::Events`, panicking otherwise.
+    fn events_of(outcome: ReplayOutcome) -> Vec<ChatStreamEvent> {
+        match outcome {
+            ReplayOutcome::Events { events, .. } => events,
+            other => panic!("expected Events, got {other:?}"),
+        }
+    }
+
+    // ── TurnStatus ──────────────────────────────────────────────────
+
+    #[test]
+    fn turn_status_from_u32_valid_values() {
+        assert_eq!(TurnStatus::from(0), TurnStatus::Running);
+        assert_eq!(TurnStatus::from(1), TurnStatus::Done);
+        assert_eq!(TurnStatus::from(2), TurnStatus::Error);
+        assert_eq!(TurnStatus::from(3), TurnStatus::Cancelled);
+    }
+
+    #[test]
+    fn turn_status_from_u32_unknown_defaults_to_running() {
+        assert_eq!(TurnStatus::from(4), TurnStatus::Running);
+        assert_eq!(TurnStatus::from(u32::MAX), TurnStatus::Running);
+    }
+
+    #[test]
+    fn turn_status_as_str() {
+        assert_eq!(TurnStatus::Running.as_str(), "running");
+        assert_eq!(TurnStatus::Done.as_str(), "done");
+        assert_eq!(TurnStatus::Error.as_str(), "error");
+        assert_eq!(TurnStatus::Cancelled.as_str(), "cancelled");
+    }
+
+    // ── TurnEntry ───────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn turn_entry_push_and_replay() {
+        let entry = Arc::new(TurnEntry::new(
+            "t1".to_string(),
+            "/photo.jpg".to_string(),
+            1,
+        ));
+
+        entry
+            .push_event(ChatStreamEvent::TextDelta("hello".to_string()))
+            .await;
+        entry
+            .push_event(ChatStreamEvent::TextDelta(" world".to_string()))
+            .await;
+
+        let events = events_of(entry.replay_from(0).await);
+        assert_eq!(events.len(), 2);
+    }
+
+    #[tokio::test]
+    async fn turn_entry_replay_with_skip() {
+        let entry = Arc::new(TurnEntry::new(
+            "t1".to_string(),
+            "/photo.jpg".to_string(),
+            1,
+        ));
+
+        for i in 0..5 {
+            entry
+                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
+                .await;
+        }
+
+        // skip_before=0 → all 5 events
+        let all = events_of(entry.replay_from(0).await);
+        assert_eq!(all.len(), 5);
+
+        // skip_before=2 → events 2,3,4 (3 events)
+        let skipped = events_of(entry.replay_from(2).await);
+        assert_eq!(skipped.len(), 3);
+
+        // skip_before=5 → caught up to the live edge (not Gone).
+        assert!(matches!(
+            entry.replay_from(5).await,
+            ReplayOutcome::CaughtUp { next_skip: 5 }
+        ));
+    }
+
+    #[tokio::test]
+    async fn turn_entry_replay_empty_by_default() {
+        let entry = Arc::new(TurnEntry::new(
+            "t1".to_string(),
+            "/photo.jpg".to_string(),
+            1,
+        ));
+        // Empty buffer with skip_before=0 → caught up (nothing to replay yet).
+        assert!(matches!(
+            entry.replay_from(0).await,
+            ReplayOutcome::CaughtUp { next_skip: 0 }
+        ));
+    }
+
+    #[tokio::test]
+    async fn turn_entry_is_running_initially() {
+        let entry = TurnEntry::new("t1".to_string(), "/photo.jpg".to_string(), 1);
+        assert!(entry.is_running());
+    }
+
+    #[tokio::test]
+    async fn turn_entry_set_terminal_status() {
+        let entry = Arc::new(TurnEntry::new(
+            "t1".to_string(),
+            "/photo.jpg".to_string(),
+            1,
+        ));
+        assert!(entry.is_running());
+        entry.set_terminal_status(TurnStatus::Done);
+        assert!(!entry.is_running());
+    }
+
+    #[tokio::test]
+    async fn turn_entry_info() {
+        let entry = Arc::new(TurnEntry::new(
+            "t1".to_string(),
+            "/photo.jpg".to_string(),
+            42,
+        ));
+
+        entry
+            .push_event(ChatStreamEvent::TextDelta("x".to_string()))
+            .await;
+        entry.set_terminal_status(TurnStatus::Done);
+
+        let info = entry.info().await;
+        assert_eq!(info.turn_id, "t1");
+        assert_eq!(info.file_path, "/photo.jpg");
+        assert_eq!(info.library_id, 42);
+        assert_eq!(info.status, TurnStatus::Done);
+        assert_eq!(info.total_events_pushed, 1);
+        assert_eq!(info.buffered_count, 1);
+    }
+
+    #[tokio::test]
+    async fn turn_entry_eviction_caps_buffer() {
+        let entry = Arc::new(TurnEntry::new(
+            "t1".to_string(),
+            "/photo.jpg".to_string(),
+            1,
+        ));
+
+        // Push MAX_BUFFERED_EVENTS + 10 events.
+        for i in 0..(MAX_BUFFERED_EVENTS + 10) {
+            entry
+                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
+                .await;
+        }
+
+        // Asking from absolute 0 after eviction is Gone (0-9 were dropped).
+        assert!(matches!(entry.replay_from(0).await, ReplayOutcome::Gone));
+
+        // Reading from the new base (10) returns the full capped buffer.
+        let events = events_of(entry.replay_from(10).await);
+        assert_eq!(events.len(), MAX_BUFFERED_EVENTS);
+
+        // First event should be at index 10 (0-9 were evicted).
+        if let ChatStreamEvent::TextDelta(s) = &events[0] {
+            assert_eq!(s, "e10");
+        } else {
+            panic!("expected TextDelta");
+        }
+
+        // Last event should be at index MAX_BUFFERED_EVENTS + 9.
+        if let ChatStreamEvent::TextDelta(s) = &events[events.len() - 1] {
+            assert_eq!(s, &format!("e{}", MAX_BUFFERED_EVENTS + 9));
+        } else {
+            panic!("expected TextDelta");
+        }
+    }
+
+    #[tokio::test]
+    async fn turn_entry_replay_evicted_index_is_gone() {
+        let entry = Arc::new(TurnEntry::new(
+            "t1".to_string(),
+            "/photo.jpg".to_string(),
+            1,
+        ));
+
+        // Push one past the cap so exactly one event (index 0) is evicted.
+        for i in 0..=MAX_BUFFERED_EVENTS {
+            entry
+                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
+                .await;
+        }
+
+        // Base is now 1; asking from absolute 0 is evicted territory → Gone.
+        assert!(matches!(entry.replay_from(0).await, ReplayOutcome::Gone));
+
+        // skip_before = MAX_BUFFERED_EVENTS → last event only (index valid).
+        let last = events_of(entry.replay_from(MAX_BUFFERED_EVENTS as u32).await);
+        assert_eq!(last.len(), 1);
+
+        // skip_before = MAX_BUFFERED_EVENTS + 1 → caught up to the live edge.
+        assert!(matches!(
+            entry.replay_from((MAX_BUFFERED_EVENTS + 1) as u32).await,
+            ReplayOutcome::CaughtUp { .. }
+        ));
+    }
+
+    // ── TurnRegistry ────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn turn_registry_insert_and_get() {
+        let registry = TurnRegistry::new(300);
+        let entry = Arc::new(TurnEntry::new(
+            "t1".to_string(),
+            "/photo.jpg".to_string(),
+            1,
+        ));
+        let id = registry.insert(entry).await;
+        assert_eq!(id, "t1");
+
+        let retrieved = registry.get("t1").await;
+        assert!(retrieved.is_some());
+        assert_eq!(retrieved.unwrap().turn_id, "t1");
+    }
+
+    #[tokio::test]
+    async fn turn_registry_get_nonexistent_returns_none() {
+        let registry = TurnRegistry::new(300);
+        assert!(registry.get("nonexistent").await.is_none());
+    }
+
+    #[tokio::test]
+    async fn turn_registry_cleanup_stale_removes_old_entries() {
+        let registry = TurnRegistry::new(0);
+        let mut entry = TurnEntry::new("t1".to_string(), "/photo.jpg".to_string(), 1);
+        entry.created_at = Instant::now() - Duration::from_secs(1);
+        registry.insert(Arc::new(entry)).await;
+
+        let cleaned = registry.cleanup_stale().await;
+        assert_eq!(cleaned, 1);
+        assert!(registry.get("t1").await.is_none());
+    }
+
+    #[tokio::test]
+    async fn turn_registry_cleanup_stale_preserves_recent() {
+        let registry = TurnRegistry::new(3600); // 1 hour
+        let entry = Arc::new(TurnEntry::new(
+            "t1".to_string(),
+            "/photo.jpg".to_string(),
+            1,
+        ));
+        registry.insert(entry).await;
+
+        let cleaned = registry.cleanup_stale().await;
+        assert_eq!(cleaned, 0);
+        assert!(registry.get("t1").await.is_some());
+    }
+
+    #[tokio::test]
+    async fn turn_registry_cleanup_stale_multiple() {
+        let registry = TurnRegistry::new(0);
+
+        for i in 0..5 {
+            let mut entry = TurnEntry::new(format!("t{i}"), "/photo.jpg".to_string(), 1);
+            entry.created_at = Instant::now() - Duration::from_secs(1);
+            registry.insert(Arc::new(entry)).await;
+        }
+
+        let cleaned = registry.cleanup_stale().await;
+        assert_eq!(cleaned, 5);
+    }
+
+    #[tokio::test]
+    async fn turn_registry_timeout_secs() {
+        let registry = TurnRegistry::new(600);
+        assert_eq!(registry.timeout_secs(), 600);
+    }
+
+    // ── next_batch / live replay ────────────────────────────────────
+
+    /// Drain a turn the way the SSE replay handler does: pull batches via
+    /// `next_batch` until the turn is finished and fully drained.
+    async fn drain_to_end(entry: Arc<TurnEntry>) -> Vec<ChatStreamEvent> {
+        let mut out = Vec::new();
+        let mut skip = 0u32;
+        while let ReplayOutcome::Events { events, next_skip } = entry.next_batch(skip).await {
+            out.extend(events);
+            skip = next_skip;
+        }
+        out
+    }
+
+    fn is_terminal(ev: &ChatStreamEvent) -> bool {
+        matches!(ev, ChatStreamEvent::Done { .. } | ChatStreamEvent::Error(_))
+    }
+
+    /// The core guarantee behind the replay rewrite: a reader waiting on
+    /// `next_batch` always receives the terminal event, even though the
+    /// writer flips status to terminal immediately after pushing it.
+    #[tokio::test]
+    async fn next_batch_always_delivers_terminal_event() {
+        for _ in 0..50 {
+            let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
+
+            let writer = entry.clone();
+            let w = tokio::spawn(async move {
+                writer
+                    .push_event(ChatStreamEvent::IterationStart { n: 1, max: 6 })
+                    .await;
+                writer
+                    .push_event(ChatStreamEvent::TextDelta("hi".into()))
+                    .await;
+                // Push terminal then flip status with no await between — the
+                // race that previously dropped the Done on the reader side.
+                writer
+                    .push_event(ChatStreamEvent::Done {
+                        tool_calls_made: 0,
+                        iterations_used: 1,
+                        truncated: false,
+                        prompt_tokens: None,
+                        eval_tokens: None,
+                        num_ctx: None,
+                        amended_insight_id: None,
+                        backend_used: "local".into(),
+                        model_used: "m".into(),
+                        cancelled: false,
+                    })
+                    .await;
+                writer.set_terminal_status(TurnStatus::Done);
+            });
+
+            let events = drain_to_end(entry).await;
+            w.await.unwrap();
+
+            assert!(
+                events.last().is_some_and(is_terminal),
+                "terminal event missing; got {} events",
+                events.len()
+            );
+            assert_eq!(events.len(), 3, "expected IterationStart, TextDelta, Done");
+        }
+    }
+
+    /// A reader that connects before any event is pushed blocks in
+    /// `next_batch` and then receives events as the writer produces them.
+    #[tokio::test]
+    async fn next_batch_waits_for_late_events() {
+        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
+
+        let writer = entry.clone();
+        tokio::spawn(async move {
+            tokio::task::yield_now().await;
+            writer
+                .push_event(ChatStreamEvent::TextDelta("late".into()))
+                .await;
+            writer.set_terminal_status(TurnStatus::Done);
+        });
+
+        // First call blocks until the writer pushes, rather than returning
+        // CaughtUp on the empty buffer of a running turn.
+        match entry.next_batch(0).await {
+            ReplayOutcome::Events { events, next_skip } => {
+                assert_eq!(events.len(), 1);
+                assert_eq!(next_skip, 1);
+            }
+            other => panic!("expected Events, got {other:?}"),
+        }
+    }
+
+    #[tokio::test]
+    async fn next_batch_closes_on_terminal_when_caught_up() {
+        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
+        entry
+            .push_event(ChatStreamEvent::TextDelta("x".into()))
+            .await;
+        entry.set_terminal_status(TurnStatus::Done);
+
+        // Caught up (skip past the one buffered event) on a finished turn →
+        // CaughtUp so the handler closes the stream rather than hanging.
+        assert!(matches!(
+            entry.next_batch(1).await,
+            ReplayOutcome::CaughtUp { .. }
+        ));
+    }
+
+    #[tokio::test]
+    async fn next_batch_reports_gone_for_evicted_index() {
+        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
+        for i in 0..=MAX_BUFFERED_EVENTS {
+            entry
+                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
+                .await;
+        }
+        // Index 0 was evicted (base advanced to 1).
+        assert!(matches!(entry.next_batch(0).await, ReplayOutcome::Gone));
+    }
+
+    // ── abort handle (#1 cancellation) ──────────────────────────────
+
+    #[tokio::test]
+    async fn abort_handle_aborts_task_once() {
+        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
+
+        // No handle installed yet → abort is a no-op.
+        assert!(!entry.abort());
+
+        let handle = tokio::spawn(async {
+            // Long-lived task that only ends via abort.
+            futures::future::pending::<()>().await;
+        });
+        entry.set_abort_handle(handle.abort_handle());
+
+        assert!(entry.abort(), "first abort should fire");
+        assert!(!entry.abort(), "handle is taken; second abort is a no-op");
+
+        // The aborted task resolves to a cancellation JoinError.
+        let join = handle.await;
+        assert!(join.unwrap_err().is_cancelled());
+    }
+
+    #[tokio::test]
+    async fn base_index_tracks_eviction() {
+        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
+        for i in 0..(MAX_BUFFERED_EVENTS + 5) {
+            entry
+                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
+                .await;
+        }
+        let info = entry.info().await;
+        // 5 events evicted; total keeps climbing, buffer stays capped.
+        assert_eq!(info.total_events_pushed, (MAX_BUFFERED_EVENTS + 5) as u32);
+        assert_eq!(info.buffered_count, MAX_BUFFERED_EVENTS as u32);
+        // First live index is 5: reading from there yields the full buffer.
+        let from_base = events_of(entry.replay_from(5).await);
+        assert_eq!(from_base.len(), MAX_BUFFERED_EVENTS);
+    }
+}
@@ -220,6 +220,76 @@ pub fn backfill_missing_date_taken(
 /// unscanned image_exif rows directly via the FaceDao anti-join and
 /// hands them to the existing detection pass. Runs on every tick (not
 /// just full scans) so the backlog moves at quick-scan cadence.
+/// Per-tick CLIP encoding drain. Mirrors `process_face_backlog`: pull
+/// up to `CLIP_BACKLOG_MAX_PER_TICK` candidates with a known
+/// `content_hash` but no `clip_embedding`, hand them to
+/// `clip_watch::run_clip_encoding_pass` for parallel fan-out, and let
+/// that module write the result back via `backfill_clip_embedding`.
+///
+/// Idempotent — a row stays in the candidate set until its embedding
+/// lands, so a transient failure (Apollo unreachable, CUDA OOM) just
+/// defers to the next tick. Permanent failures (un-decodable bytes)
+/// retry every tick at this point; future Branch may add a status
+/// column like face_detections has.
+pub fn process_clip_backlog(
+    context: &opentelemetry::Context,
+    library: &libraries::Library,
+    clip_client: &crate::ai::clip_client::ClipClient,
+    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
+    excluded_dirs: &[String],
+) {
+    if !clip_client.is_enabled() {
+        return;
+    }
+    let cap: i64 = dotenv::var("CLIP_BACKLOG_MAX_PER_TICK")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|n: &i64| *n > 0)
+        .unwrap_or(32);
+
+    let rows: Vec<(String, String)> = {
+        let mut dao = exif_dao.lock().expect("exif dao");
+        match dao.list_clip_unencoded_candidates(context, library.id, cap) {
+            Ok(r) => r,
+            Err(e) => {
+                warn!(
+                    "clip_watch: list_clip_unencoded_candidates failed for library '{}': {:?}",
+                    library.name, e
+                );
+                return;
+            }
+        }
+    };
+    if rows.is_empty() {
+        return;
+    }
+
+    info!(
+        "clip_watch: backlog drain — encoding {} candidate(s) for library '{}' (cap={})",
+        rows.len(),
+        library.name,
+        cap
+    );
+
+    let candidates: Vec<crate::clip_watch::ClipCandidate> = rows
+        .into_iter()
+        .map(
+            |(rel_path, content_hash)| crate::clip_watch::ClipCandidate {
+                rel_path,
+                content_hash,
+            },
+        )
+        .collect();
+
+    crate::clip_watch::run_clip_encoding_pass(
+        library,
+        excluded_dirs,
+        clip_client,
+        Arc::clone(exif_dao),
+        candidates,
+    );
+}
+
 pub fn process_face_backlog(
    context: &opentelemetry::Context,
    library: &libraries::Library,
@@ -459,16 +529,21 @@ mod tests {
        opentelemetry::Context::new()
    }

-    /// Build a tempdir-backed library + DAOs sharing a single in-memory
-    /// SQLite connection (so cross-table joins like
-    /// `list_unscanned_candidates` see consistent state).
-    fn setup() -> (
+    /// Everything `setup` hands back to a test: tempdir, library, shared
+    /// connection, and the two DAOs. Aliased to keep clippy's
+    /// type-complexity lint satisfied.
+    type SetupFixture = (
        TempDir,
        Library,
        Arc<Mutex<diesel::SqliteConnection>>,
        Arc<Mutex<Box<dyn ExifDao>>>,
        Arc<Mutex<Box<dyn FaceDao>>>,
-    ) {
+    );
+
+    /// Build a tempdir-backed library + DAOs sharing a single in-memory
+    /// SQLite connection (so cross-table joins like
+    /// `list_unscanned_candidates` see consistent state).
+    fn setup() -> SetupFixture {
        let tmp = TempDir::new().expect("tempdir");
        let mut conn = in_memory_db_connection();
        // Migration seeds library id=1 with a placeholder root; rewrite it
@@ -195,6 +195,7 @@ async fn main() -> anyhow::Result<()> {
    let generator = InsightGenerator::new(
        ollama,
        None,
+        None,
        sms_client,
        apollo_client,
        insight_dao.clone(),
@@ -1,250 +0,0 @@
-//! Probe binary for RAM++ auto-tagging.
-//!
-//! No DB writes. Walks a library's `image_exif` rows, sends a sample
-//! through Apollo's `/api/internal/tags/auto`, and prints `(path, tags)`
-//! to stdout so the operator can eyeball whether the model's vocabulary
-//! and threshold defaults are appropriate for this library before
-//! committing to the persistence phase (new table, per-tick drain, UI).
-//!
-//! Usage:
-//!     cargo run --release --bin probe_auto_tags -- \
-//!         --library 1 --limit 50 --threshold 0.7
-//!
-//! Env: standard ImageApi `.env`. Requires either
-//! `APOLLO_TAG_API_BASE_URL` or `APOLLO_API_BASE_URL` to be set
-//! (otherwise the client is disabled and the probe bails).
-
-use std::path::{Path, PathBuf};
-use std::sync::{Arc, Mutex};
-use std::time::Instant;
-
-use clap::Parser;
-use log::{info, warn};
-
-use image_api::ai::tag_client::{TagClient, TagDetectError, TagMeta};
-use image_api::database::{ExifDao, SqliteExifDao, connect};
-use image_api::exif;
-use image_api::file_types;
-use image_api::libraries::{self, Library};
-
-#[derive(Parser, Debug)]
-#[command(name = "probe_auto_tags")]
-#[command(about = "Print RAM++ auto-tags for a sample of image_exif rows")]
-struct Args {
-    /// Library id to sample from.
-    #[arg(long)]
-    library: i32,
-
-    /// Max files to probe. The binary scans more rows internally because
-    /// non-image rows (videos, junk) are skipped client-side.
-    #[arg(long, default_value_t = 25)]
-    limit: usize,
-
-    /// Per-call threshold sent to Apollo. Overrides the engine default.
-    /// Lower = more tags per photo, more noise. 0.5–0.75 is the useful
-    /// sweep range for ram_plus_swin_large_14m.
-    #[arg(long, default_value_t = 0.65)]
-    threshold: f32,
-
-    /// Offset into the library's rel_path listing (sorted by id ASC).
-    /// Bump on re-runs to sample a different slice.
-    #[arg(long, default_value_t = 0)]
-    offset: i64,
-
-    /// How many DB rows to scan before giving up on hitting the limit.
-    /// Useful when a library is mostly videos.
-    #[arg(long, default_value_t = 2000)]
-    max_scan: i64,
-}
-
-/// Mirror of `face_watch::read_image_bytes_for_detect` — it's pub(crate)
-/// so we can't import it across the bin boundary. The probe is throwaway
-/// scope; inlining is cleaner than changing the visibility.
-fn read_image_bytes(path: &Path) -> std::io::Result<Vec<u8>> {
-    if file_types::needs_ffmpeg_thumbnail(path)
-        && let Some(preview) = exif::extract_embedded_jpeg_preview(path)
-    {
-        return Ok(preview);
-    }
-    std::fs::read(path)
-}
-
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    env_logger::init();
-    dotenv::dotenv().ok();
-
-    let args = Args::parse();
-
-    let client = TagClient::from_env();
-    if !client.is_enabled() {
-        anyhow::bail!(
-            "TagClient disabled: set APOLLO_TAG_API_BASE_URL or APOLLO_API_BASE_URL in .env"
-        );
-    }
-
-    // Quick health probe so we fail fast on a misconfig before grinding
-    // through a thousand rows.
-    match client.health().await {
-        Ok(h) => info!(
-            "tag engine: loaded={} device={} model={} threshold_default={}",
-            h.loaded, h.device, h.model_version, h.threshold
-        ),
-        Err(e) => warn!("health probe failed (continuing): {e}"),
-    }
-
-    let mut seed_conn = connect();
-    if let Some(base) = dotenv::var("BASE_PATH").ok().as_deref() {
-        libraries::seed_or_patch_from_env(&mut seed_conn, base);
-    }
-    let libs = libraries::load_all(&mut seed_conn);
-    drop(seed_conn);
-    let lib: Library = libs
-        .into_iter()
-        .find(|l| l.id == args.library)
-        .ok_or_else(|| anyhow::anyhow!("library id {} not found", args.library))?;
-    info!("probing library #{} ({}) at {}", lib.id, lib.name, lib.root_path);
-
-    let dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
-    let ctx = opentelemetry::Context::new();
-
-    // Paginate through (id, rel_path) for this library, filter to images
-    // on disk, take `limit`. Page size is tuned so we don't slam the DB
-    // when a library is video-heavy.
-    const PAGE: i64 = 500;
-    let mut offset = args.offset;
-    let mut scanned: i64 = 0;
-    let mut probed = 0usize;
-    let mut ok_count = 0usize;
-    let mut empty_count = 0usize;
-    let mut perm_fail = 0usize;
-    let mut transient_fail = 0usize;
-    let started = Instant::now();
-    let root = PathBuf::from(&lib.root_path);
-
-    'outer: loop {
-        if scanned >= args.max_scan {
-            warn!(
-                "scan cap ({}) reached before hitting limit ({}); bump --max-scan to scan deeper",
-                args.max_scan, args.limit
-            );
-            break;
-        }
-        let rows = {
-            let mut guard = dao.lock().expect("dao lock");
-            guard
-                .list_rel_paths_for_library_page(&ctx, lib.id, PAGE, offset)
-                .map_err(|e| anyhow::anyhow!("list rel_paths: {:?}", e))?
-        };
-        if rows.is_empty() {
-            info!("no more rows after offset {}", offset);
-            break;
-        }
-        offset += rows.len() as i64;
-        scanned += rows.len() as i64;
-
-        for (_id, rel_path) in rows {
-            if probed >= args.limit {
-                break 'outer;
-            }
-            let abs = root.join(&rel_path);
-            // Skip non-images and videos at the path level — same logic
-            // the face backlog drain uses, just inlined.
-            if !file_types::is_image_file(&abs) {
-                continue;
-            }
-            if !abs.exists() {
-                continue;
-            }
-            let bytes = match read_image_bytes(&abs) {
-                Ok(b) => b,
-                Err(e) => {
-                    warn!("read {rel_path}: {e}");
-                    continue;
-                }
-            };
-            // The probe doesn't need a real content_hash — Apollo only
-            // logs it. Pass an empty marker so we don't trip on no-hash
-            // image_exif rows.
-            let meta = TagMeta {
-                content_hash: String::new(),
-                library_id: lib.id,
-                rel_path: rel_path.clone(),
-                threshold: Some(args.threshold),
-            };
-
-            let call_start = Instant::now();
-            match client.auto_tag(bytes, meta).await {
-                Ok(resp) => {
-                    probed += 1;
-                    if resp.tags.is_empty() {
-                        empty_count += 1;
-                        println!(
-                            "[{:>3}] (no tags) {}ms  {}",
-                            probed, resp.duration_ms, rel_path
-                        );
-                    } else {
-                        ok_count += 1;
-                        let preview = resp
-                            .tags
-                            .iter()
-                            .map(|t| format!("{}({:.2})", t.name, t.confidence))
-                            .collect::<Vec<_>>()
-                            .join(", ");
-                        println!(
-                            "[{:>3}] {} tags {}ms  {}\n      {}",
-                            probed,
-                            resp.tags.len(),
-                            resp.duration_ms,
-                            rel_path,
-                            preview
-                        );
-                    }
-                }
-                Err(TagDetectError::Permanent(e)) => {
-                    probed += 1;
-                    perm_fail += 1;
-                    println!(
-                        "[{:>3}] PERMANENT FAIL ({:>4}ms) {}\n      {}",
-                        probed,
-                        call_start.elapsed().as_millis(),
-                        rel_path,
-                        e
-                    );
-                }
-                Err(TagDetectError::Transient(e)) => {
-                    probed += 1;
-                    transient_fail += 1;
-                    println!(
-                        "[{:>3}] TRANSIENT FAIL ({:>4}ms) {}\n      {}",
-                        probed,
-                        call_start.elapsed().as_millis(),
-                        rel_path,
-                        e
-                    );
-                }
-                Err(TagDetectError::Disabled) => {
-                    anyhow::bail!("tag client became disabled mid-run; impossible");
-                }
-            }
-        }
-    }
-
-    let elapsed = started.elapsed();
-    println!();
-    println!("── summary ───────────────────────────────────────");
-    println!("scanned rows         : {scanned}");
-    println!("probed files         : {probed}");
-    println!("  with tags          : {ok_count}");
-    println!("  empty (no tags)    : {empty_count}");
-    println!("  permanent failures : {perm_fail}");
-    println!("  transient failures : {transient_fail}");
-    println!("elapsed              : {:.1}s", elapsed.as_secs_f32());
-    if probed > 0 {
-        println!(
-            "throughput           : {:.2} photos/s",
-            probed as f32 / elapsed.as_secs_f32().max(0.001)
-        );
-    }
-    Ok(())
-}
@@ -0,0 +1,273 @@
+//! Probe binary for CLIP semantic search.
+//!
+//! No DB writes. Walks a library's `image_exif` rows, encodes a sample
+//! via Apollo's `/encode_image`, encodes the user's --query via
+//! `/encode_text`, and prints the top-K most similar photos by cosine
+//! similarity so the operator can eyeball quality before committing to
+//! the persistence phase (column populated by backlog drain, search
+//! endpoint, UI).
+//!
+//! Usage:
+//!     cargo run --release --bin probe_clip_search -- \
+//!         --library 1 --limit 200 --query "a beach at sunset" --top 10
+//!
+//! Env: standard ImageApi `.env`. Requires either
+//! `APOLLO_CLIP_API_BASE_URL` or `APOLLO_API_BASE_URL` to be set.
+
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, Mutex};
+use std::time::Instant;
+
+use clap::Parser;
+use log::{info, warn};
+
+use image_api::ai::clip_client::{ClipClient, ClipError, EncodeImageMeta};
+use image_api::database::{ExifDao, SqliteExifDao, connect};
+use image_api::exif;
+use image_api::file_types;
+use image_api::libraries::{self, Library};
+
+#[derive(Parser, Debug)]
+#[command(name = "probe_clip_search")]
+#[command(about = "Top-K CLIP semantic search over a sample of image_exif rows")]
+struct Args {
+    /// Library id to sample from.
+    #[arg(long)]
+    library: i32,
+
+    /// Max files to encode. CPU inference is slow (~1-3 s per photo at
+    /// ViT-L/14); start small and grow once GPU is sorted.
+    #[arg(long, default_value_t = 50)]
+    limit: usize,
+
+    /// Natural-language query. Empty triggers an error from Apollo.
+    #[arg(long)]
+    query: String,
+
+    /// How many top results to print.
+    #[arg(long, default_value_t = 10)]
+    top: usize,
+
+    /// Offset into the library's rel_path listing.
+    #[arg(long, default_value_t = 0)]
+    offset: i64,
+
+    /// How many DB rows to scan before giving up on hitting the limit.
+    #[arg(long, default_value_t = 5000)]
+    max_scan: i64,
+}
+
+/// Same as `face_watch::read_image_bytes_for_detect` (which is pub(crate)).
+/// Inlined for the throwaway probe.
+fn read_image_bytes(path: &Path) -> std::io::Result<Vec<u8>> {
+    if file_types::needs_ffmpeg_thumbnail(path)
+        && let Some(preview) = exif::extract_embedded_jpeg_preview(path)
+    {
+        return Ok(preview);
+    }
+    std::fs::read(path)
+}
+
+/// Decode a base64'd LE float32 vector to a `Vec<f32>`.
+fn decode_f32_vec(b64: &str) -> anyhow::Result<Vec<f32>> {
+    use base64::Engine;
+    let bytes = base64::engine::general_purpose::STANDARD.decode(b64.as_bytes())?;
+    if bytes.len() % 4 != 0 {
+        anyhow::bail!("embedding byte length {} not divisible by 4", bytes.len());
+    }
+    let mut out = Vec::with_capacity(bytes.len() / 4);
+    for chunk in bytes.chunks_exact(4) {
+        out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
+    }
+    Ok(out)
+}
+
+/// Plain dot product. Apollo L2-normalizes both sides, so this is cosine sim.
+fn dot(a: &[f32], b: &[f32]) -> f32 {
+    a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    env_logger::init();
+    dotenv::dotenv().ok();
+
+    let args = Args::parse();
+    if args.query.trim().is_empty() {
+        anyhow::bail!("--query must not be empty");
+    }
+
+    let client = ClipClient::from_env();
+    if !client.is_enabled() {
+        anyhow::bail!(
+            "ClipClient disabled: set APOLLO_CLIP_API_BASE_URL or APOLLO_API_BASE_URL in .env"
+        );
+    }
+
+    match client.health().await {
+        Ok(h) => info!(
+            "clip engine: loaded={} device={} model={} dim={}",
+            h.loaded, h.device, h.model_version, h.embedding_dim
+        ),
+        Err(e) => warn!("health probe failed (continuing): {e}"),
+    }
+
+    let mut seed_conn = connect();
+    if let Some(base) = dotenv::var("BASE_PATH").ok().as_deref() {
+        libraries::seed_or_patch_from_env(&mut seed_conn, base);
+    }
+    let libs = libraries::load_all(&mut seed_conn);
+    drop(seed_conn);
+    let lib: Library = libs
+        .into_iter()
+        .find(|l| l.id == args.library)
+        .ok_or_else(|| anyhow::anyhow!("library id {} not found", args.library))?;
+    info!(
+        "probing library #{} ({}) at {}",
+        lib.id, lib.name, lib.root_path
+    );
+
+    let dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
+    let ctx = opentelemetry::Context::new();
+
+    // Encode the query up-front so the long image-encode loop doesn't
+    // race a slow query encode. Fails fast on a misspelled query.
+    let query_resp = client
+        .encode_text(&args.query)
+        .await
+        .map_err(|e| anyhow::anyhow!("encode_text: {e}"))?;
+    let query_vec = decode_f32_vec(&query_resp.embedding)?;
+    info!(
+        "query encoded ({}d, {}ms): {:?}",
+        query_resp.embedding_dim, query_resp.duration_ms, args.query
+    );
+
+    // Page through (id, rel_path), filter to images on disk, encode up
+    // to `limit`. Each encoded photo gets scored against the query and
+    // kept in a top-K heap.
+    const PAGE: i64 = 500;
+    let mut offset = args.offset;
+    let mut scanned: i64 = 0;
+    let mut encoded = 0usize;
+    let mut perm_fail = 0usize;
+    let mut transient_fail = 0usize;
+    let root = PathBuf::from(&lib.root_path);
+    let started = Instant::now();
+    // (similarity, rel_path) — we keep all scored results and sort at
+    // the end. With limit≤few-hundred this is trivial.
+    let mut scores: Vec<(f32, String)> = Vec::with_capacity(args.limit);
+
+    'outer: loop {
+        if scanned >= args.max_scan {
+            warn!(
+                "scan cap ({}) reached before hitting limit ({}); bump --max-scan to scan deeper",
+                args.max_scan, args.limit
+            );
+            break;
+        }
+        let rows = {
+            let mut guard = dao.lock().expect("dao lock");
+            guard
+                .list_rel_paths_for_library_page(&ctx, lib.id, PAGE, offset)
+                .map_err(|e| anyhow::anyhow!("list rel_paths: {:?}", e))?
+        };
+        if rows.is_empty() {
+            info!("no more rows after offset {}", offset);
+            break;
+        }
+        offset += rows.len() as i64;
+        scanned += rows.len() as i64;
+
+        for (_id, rel_path) in rows {
+            if encoded >= args.limit {
+                break 'outer;
+            }
+            let abs = root.join(&rel_path);
+            if !file_types::is_image_file(&abs) || !abs.exists() {
+                continue;
+            }
+            let bytes = match read_image_bytes(&abs) {
+                Ok(b) => b,
+                Err(e) => {
+                    warn!("read {rel_path}: {e}");
+                    continue;
+                }
+            };
+            let meta = EncodeImageMeta {
+                content_hash: String::new(),
+                library_id: lib.id,
+                rel_path: rel_path.clone(),
+            };
+            let call_start = Instant::now();
+            match client.encode_image(bytes, meta).await {
+                Ok(resp) => {
+                    encoded += 1;
+                    let vec = match decode_f32_vec(&resp.embedding) {
+                        Ok(v) => v,
+                        Err(e) => {
+                            warn!("decode {rel_path}: {e}");
+                            continue;
+                        }
+                    };
+                    if vec.len() != query_vec.len() {
+                        warn!(
+                            "dim mismatch for {rel_path}: image={} query={}",
+                            vec.len(),
+                            query_vec.len()
+                        );
+                        continue;
+                    }
+                    let sim = dot(&vec, &query_vec);
+                    scores.push((sim, rel_path.clone()));
+                    if encoded.is_multiple_of(10) {
+                        info!(
+                            "progress: {} encoded, {:.1}s elapsed",
+                            encoded,
+                            started.elapsed().as_secs_f32()
+                        );
+                    }
+                    let _ = call_start;
+                }
+                Err(ClipError::Permanent(e)) => {
+                    perm_fail += 1;
+                    warn!("permanent encode failure for {rel_path}: {e}");
+                }
+                Err(ClipError::Transient(e)) => {
+                    transient_fail += 1;
+                    warn!("transient encode failure for {rel_path}: {e}");
+                }
+                Err(ClipError::Disabled) => {
+                    anyhow::bail!("clip client became disabled mid-run; impossible");
+                }
+            }
+        }
+    }
+
+    scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
+    let elapsed = started.elapsed();
+    println!();
+    println!(
+        "── top {} for query: {:?} ──",
+        args.top.min(scores.len()),
+        args.query
+    );
+    for (i, (sim, path)) in scores.iter().take(args.top).enumerate() {
+        println!("[{:>2}] sim={:.3}  {}", i + 1, sim, path);
+    }
+    println!();
+    println!("── summary ─────────────────────────────────────");
+    println!("query                : {:?}", args.query);
+    println!("scanned rows         : {scanned}");
+    println!("encoded photos       : {encoded}");
+    println!("permanent failures   : {perm_fail}");
+    println!("transient failures   : {transient_fail}");
+    println!("elapsed              : {:.1}s", elapsed.as_secs_f32());
+    if encoded > 0 {
+        println!(
+            "throughput           : {:.2} photos/s ({:.0}ms/photo avg)",
+            encoded as f32 / elapsed.as_secs_f32().max(0.001),
+            elapsed.as_millis() as f32 / encoded as f32
+        );
+    }
+    Ok(())
+}
@@ -0,0 +1,352 @@
+//! `/photos/search?q=<text>` — CLIP semantic photo search.
+//!
+//! The route lives outside `files.rs` to keep that 1500+ line module
+//! focused on EXIF / tag listing. The flow is:
+//!
+//! 1. Parse query params (`q`, `limit`, `threshold`, optional `library`).
+//! 2. Call Apollo's `/api/internal/clip/encode_text` to get the query
+//!    vector (L2-normalized 768-d f32 for ViT-L/14).
+//! 3. Load every `(content_hash, clip_embedding)` for the scope from
+//!    `image_exif` via `ExifDao::list_clip_index`. ~28–43 MB for a 14k
+//!    library at ViT-L/14; loaded fresh per request — fast enough for
+//!    v1, optimize via an AppState cache later if needed.
+//! 4. Dot product (= cosine since both sides are L2-normalized), filter
+//!    above `threshold`, top-K by score.
+//! 5. Resolve each surviving hash back to a `(library_id, rel_path)` so
+//!    the frontend can render the photo / hand off to the carousel.
+//!
+//! Response shape is intentionally minimal — paths + score — so the
+//! frontend can reuse existing PhotoGrid rendering by joining against
+//! `/api/photos/match` (or calling `/image/metadata` lazily). Don't
+//! bake camera/EXIF metadata into this route; it would force a fan-out
+//! per result and balloon the response.
+
+use crate::AppState;
+use crate::ai::clip_client::ClipError;
+use crate::database::ExifDao;
+use actix_web::{HttpResponse, Result as ActixResult, web};
+use base64::Engine;
+use serde::{Deserialize, Serialize};
+use std::sync::Mutex;
+
+#[derive(Debug, Deserialize)]
+pub struct SearchQuery {
+    /// Natural-language query. Required; empty triggers 400.
+    pub q: String,
+    /// Max results to return in this page. Capped to 200 server-side.
+    /// Defaults to 20. Pair with `offset` for pagination.
+    #[serde(default = "default_limit")]
+    pub limit: usize,
+    /// Zero-based offset into the sorted-and-filtered result set. The
+    /// scoring loop still runs over the full embedding matrix on every
+    /// page (cheap at personal-library scale — sub-100ms — and avoids
+    /// stateful pagination cursors). Defaults to 0.
+    #[serde(default)]
+    pub offset: usize,
+    /// Cosine-similarity floor below which results are dropped.
+    /// 0.20 is the rough "this is plausibly relevant" line for OpenAI
+    /// CLIP; tunable per call when sweeping. Defaults to 0.20.
+    #[serde(default = "default_threshold")]
+    pub threshold: f32,
+    /// Optional single-library scope. Legacy param — new clients pass
+    /// `library_ids` instead so multi-select scopes (Apollo's HUD library
+    /// chips, FileViewer-React's library picker) actually filter. Kept
+    /// for back-compat; `library_ids` wins when both are supplied.
+    pub library: Option<i32>,
+    /// Optional multi-library scope, comma-separated id list
+    /// (`?library_ids=1,3`). Empty / omitted = every enabled library
+    /// (the historical default). Apollo and FileViewer-React both send
+    /// this when 2+ libraries are selected; the single-library case
+    /// works through either param interchangeably.
+    pub library_ids: Option<String>,
+    /// Optional model-version filter. Defaults to the live engine's
+    /// version (queried lazily). Forces a strict join so mid-flight
+    /// model swaps can't mix geometries in a single response.
+    #[serde(default)]
+    pub model_version: Option<String>,
+}
+
+fn default_limit() -> usize {
+    20
+}
+
+fn default_threshold() -> f32 {
+    0.20
+}
+
+#[derive(Debug, Serialize)]
+pub struct SearchHit {
+    pub library_id: i32,
+    pub rel_path: String,
+    pub content_hash: String,
+    /// Cosine similarity in [-1, 1]. In practice OpenAI CLIP returns
+    /// 0.10–0.40 for the typical photo library.
+    pub score: f32,
+}
+
+#[derive(Debug, Serialize)]
+pub struct SearchResponse {
+    pub query: String,
+    pub model_version: String,
+    pub threshold: f32,
+    /// Total embeddings scored (= every photo in scope with a stored
+    /// embedding). Same value across pages of the same query.
+    pub considered: usize,
+    /// Count of results above threshold, before pagination. Lets the
+    /// client decide whether a "Load more" button is meaningful and
+    /// stop fetching when ``offset + results.len() >= total_matching``.
+    pub total_matching: usize,
+    pub offset: usize,
+    pub results: Vec<SearchHit>,
+}
+
+#[derive(Debug, Serialize)]
+struct SearchError {
+    error: String,
+}
+
+/// Decode a stored `clip_embedding` BLOB back into a `Vec<f32>`. Returns
+/// `None` on malformed bytes — those rows get skipped rather than
+/// failing the whole query.
+fn decode_embedding(bytes: &[u8]) -> Option<Vec<f32>> {
+    if bytes.is_empty() || !bytes.len().is_multiple_of(4) {
+        return None;
+    }
+    let mut out = Vec::with_capacity(bytes.len() / 4);
+    for chunk in bytes.chunks_exact(4) {
+        out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
+    }
+    Some(out)
+}
+
+#[inline]
+fn dot(a: &[f32], b: &[f32]) -> f32 {
+    a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
+}
+
+pub async fn search_photos(
+    state: web::Data<AppState>,
+    exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
+    query: web::Query<SearchQuery>,
+) -> ActixResult<HttpResponse> {
+    let q_text = query.q.trim().to_string();
+    if q_text.is_empty() {
+        return Ok(HttpResponse::BadRequest().json(SearchError {
+            error: "query parameter `q` is required".into(),
+        }));
+    }
+    if !state.clip_client.is_enabled() {
+        return Ok(HttpResponse::ServiceUnavailable().json(SearchError {
+            error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(),
+        }));
+    }
+
+    let limit = query.limit.clamp(1, 200);
+    let offset = query.offset;
+    let threshold = query.threshold.clamp(-1.0, 1.0);
+
+    // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms
+    // on CPU. Bail with a clear error message if Apollo's down so the
+    // user sees "service unavailable" rather than empty results.
+    let query_resp = match state.clip_client.encode_text(&q_text).await {
+        Ok(r) => r,
+        Err(ClipError::Permanent(e)) => {
+            return Ok(HttpResponse::BadRequest().json(SearchError {
+                error: format!("query rejected: {e}"),
+            }));
+        }
+        Err(ClipError::Transient(e)) => {
+            return Ok(HttpResponse::BadGateway().json(SearchError {
+                error: format!("CLIP service unavailable: {e}"),
+            }));
+        }
+        Err(ClipError::Disabled) => {
+            return Ok(HttpResponse::ServiceUnavailable().json(SearchError {
+                error: "CLIP service disabled".into(),
+            }));
+        }
+    };
+    // decode_embedding works on raw bytes; the wire format is b64.
+    let query_bytes = base64::engine::general_purpose::STANDARD
+        .decode(query_resp.embedding.as_bytes())
+        .unwrap_or_default();
+    let query_vec = match decode_embedding(&query_bytes) {
+        Some(v) => v,
+        None => {
+            return Ok(HttpResponse::BadGateway().json(SearchError {
+                error: "CLIP service returned a malformed query embedding".into(),
+            }));
+        }
+    };
+
+    // 2. Decide which library scope to search. `library_ids` (multi)
+    // wins over the legacy `library` (single) when both are present;
+    // either / both empty falls back to "every enabled library".
+    let library_ids: Vec<i32> = if let Some(raw) = query.library_ids.as_deref() {
+        let mut out: Vec<i32> = Vec::new();
+        for piece in raw.split(',') {
+            let trimmed = piece.trim();
+            if trimmed.is_empty() {
+                continue;
+            }
+            match trimmed.parse::<i32>() {
+                Ok(id) => {
+                    if !out.contains(&id) {
+                        out.push(id);
+                    }
+                }
+                Err(_) => {
+                    return Ok(HttpResponse::BadRequest().json(SearchError {
+                        error: format!("invalid library_ids entry: {trimmed:?}"),
+                    }));
+                }
+            }
+        }
+        out
+    } else if let Some(id) = query.library {
+        vec![id]
+    } else {
+        Vec::new()
+    };
+
+    // 3. Pull the (hash, embedding) matrix. Lock contention here is
+    // bounded — one big SELECT under a mutex Arc<Mutex<dyn ExifDao>>
+    // and then we release before scoring. If this becomes a hotspot
+    // we'll cache the decoded matrix in AppState with TTL.
+    let ctx = opentelemetry::Context::current();
+    let rows: Vec<(String, Vec<u8>)> = {
+        let mut dao = exif_dao.lock().expect("exif dao");
+        match dao.list_clip_index(
+            &ctx,
+            &library_ids,
+            query
+                .model_version
+                .as_deref()
+                .or(Some(&query_resp.model_version)),
+        ) {
+            Ok(r) => r,
+            Err(e) => {
+                log::warn!("clip_search: list_clip_index failed: {:?}", e);
+                return Ok(HttpResponse::InternalServerError().json(SearchError {
+                    error: "failed to load search index".into(),
+                }));
+            }
+        }
+    };
+    let considered = rows.len();
+    if considered == 0 {
+        return Ok(HttpResponse::Ok().json(SearchResponse {
+            query: q_text,
+            model_version: query_resp.model_version,
+            threshold,
+            considered,
+            total_matching: 0,
+            offset,
+            results: Vec::new(),
+        }));
+    }
+
+    // 4. Score. Cap the loop's transient allocation; we keep all scores
+    // and sort at the end. With ~14k entries the sort is microseconds.
+    let mut scored: Vec<(f32, String)> = Vec::with_capacity(considered);
+    for (hash, blob) in rows {
+        let Some(emb) = decode_embedding(&blob) else {
+            continue;
+        };
+        if emb.len() != query_vec.len() {
+            continue;
+        }
+        let sim = dot(&emb, &query_vec);
+        if sim < threshold {
+            continue;
+        }
+        scored.push((sim, hash));
+    }
+    scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
+    let total_matching = scored.len();
+    // Pagination — slice the sorted list at `[offset, offset+limit)`.
+    // Offsets past the end produce empty pages rather than an error so
+    // the client can stop fetching naturally on "load more" past the end.
+    let scored: Vec<(f32, String)> = if offset >= total_matching {
+        Vec::new()
+    } else {
+        let end = (offset + limit).min(total_matching);
+        scored[offset..end].to_vec()
+    };
+
+    if scored.is_empty() {
+        return Ok(HttpResponse::Ok().json(SearchResponse {
+            query: q_text,
+            model_version: query_resp.model_version,
+            threshold,
+            considered,
+            total_matching,
+            offset,
+            results: Vec::new(),
+        }));
+    }
+
+    // 5. Resolve each surviving hash back to a `(library_id, rel_path)`.
+    // `get_rel_paths_by_hash` returns every rel_path; we pick the first
+    // one for the result. Apollo / the UI can fetch alternatives via
+    // /image/metadata when needed.
+    let hashes: Vec<String> = scored.iter().map(|(_, h)| h.clone()).collect();
+    let path_map = {
+        let mut dao = exif_dao.lock().expect("exif dao");
+        match dao.get_rel_paths_for_hashes(&ctx, &hashes) {
+            Ok(m) => m,
+            Err(e) => {
+                log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e);
+                return Ok(HttpResponse::InternalServerError().json(SearchError {
+                    error: "failed to resolve photo paths".into(),
+                }));
+            }
+        }
+    };
+
+    // We need (library_id, rel_path) — get_rel_paths_for_hashes only
+    // returns rel_paths. Cross-reference via find_by_content_hash to
+    // pick the library too. Single call per surviving hash; cheap at
+    // top-20.
+    let mut results = Vec::with_capacity(scored.len());
+    {
+        let mut dao = exif_dao.lock().expect("exif dao");
+        for (score, hash) in scored {
+            let row = match dao.find_by_content_hash(&ctx, &hash) {
+                Ok(Some(r)) => r,
+                Ok(None) => continue,
+                Err(e) => {
+                    log::warn!(
+                        "clip_search: find_by_content_hash failed for {}: {:?}",
+                        hash,
+                        e
+                    );
+                    continue;
+                }
+            };
+            // Prefer get_rel_paths_for_hashes's first entry if it
+            // exists (it shares semantics with `image_exif`'s natural
+            // order), falling back to the ImageExif row.
+            let rel_path = path_map
+                .get(&hash)
+                .and_then(|paths| paths.first().cloned())
+                .unwrap_or(row.file_path);
+            results.push(SearchHit {
+                library_id: row.library_id,
+                rel_path,
+                content_hash: hash,
+                score,
+            });
+        }
+    }
+
+    Ok(HttpResponse::Ok().json(SearchResponse {
+        query: q_text,
+        model_version: query_resp.model_version,
+        threshold,
+        considered,
+        total_matching,
+        offset,
+        results,
+    }))
+}
@@ -0,0 +1,246 @@
+//! CLIP-encoding pass for the file watcher.
+//!
+//! `process_clip_backlog` in `backfill.rs` calls [`run_clip_encoding_pass`]
+//! with the page of candidates returned by
+//! `ExifDao::list_clip_unencoded_candidates`. We walk those, fan out K
+//! parallel encode calls to Apollo, and persist the resulting embeddings
+//! into `image_exif.clip_embedding` / `clip_model_version`.
+//!
+//! Unlike the face pipeline, CLIP has no marker rows — a permanent
+//! failure (un-decodable bytes) leaves the row's `clip_embedding` NULL
+//! and the drain will retry on the next tick. For personal-library
+//! scale this is fine; the per-tick cap bounds the wasted work, and
+//! `file_types::is_image_file` filters out videos / non-media client-
+//! side so most permanent failures are decoded-but-corrupt files (rare).
+//!
+//! The watcher thread isn't in any pre-existing async context, so we
+//! build a short-lived tokio runtime per pass and `block_on` the join
+//! of K encode futures. Concurrency knob: `CLIP_ENCODE_CONCURRENCY`
+//! (default 4 — lower than faces because Apollo's CLIP path doesn't
+//! release the GIL between preprocess and forward as cleanly).
+
+use crate::ai::clip_client::{ClipClient, ClipError, EncodeImageMeta};
+use crate::database::ExifDao;
+use crate::exif;
+use crate::file_types;
+use crate::libraries::Library;
+use crate::memories::PathExcluder;
+use log::{debug, info, warn};
+use std::path::Path;
+use std::sync::{Arc, Mutex};
+use tokio::sync::Semaphore;
+
+/// One file the watcher would like to CLIP-encode. Built from the DAO
+/// `list_clip_unencoded_candidates` result — needs the `content_hash`
+/// for traceability in Apollo's log lines, even though the embedding
+/// itself is keyed on `(library_id, rel_path)` for the back-write.
+#[derive(Debug, Clone)]
+pub struct ClipCandidate {
+    pub rel_path: String,
+    pub content_hash: String,
+}
+
+/// Synchronous entry point. Returns once every candidate has been
+/// processed (or definitively skipped). No-op when the client is
+/// disabled so the caller can call unconditionally.
+pub fn run_clip_encoding_pass(
+    library: &Library,
+    excluded_dirs: &[String],
+    clip_client: &ClipClient,
+    exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
+    candidates: Vec<ClipCandidate>,
+) {
+    if !clip_client.is_enabled() {
+        return;
+    }
+    if candidates.is_empty() {
+        return;
+    }
+
+    let base = Path::new(&library.root_path);
+    let filtered = filter_excluded(base, excluded_dirs, candidates, Some(&library.name));
+    if filtered.is_empty() {
+        return;
+    }
+
+    let concurrency: usize = std::env::var("CLIP_ENCODE_CONCURRENCY")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .filter(|n: &usize| *n > 0)
+        .unwrap_or(4);
+
+    info!(
+        "clip_watch: encoding {} candidate(s) for library '{}' (concurrency {})",
+        filtered.len(),
+        library.name,
+        concurrency
+    );
+
+    let rt = match tokio::runtime::Builder::new_multi_thread()
+        .worker_threads(2)
+        .enable_all()
+        .build()
+    {
+        Ok(rt) => rt,
+        Err(e) => {
+            warn!("clip_watch: failed to build tokio runtime: {e}");
+            return;
+        }
+    };
+
+    let library_id = library.id;
+    let library_root = library.root_path.clone();
+    rt.block_on(async move {
+        let sem = Arc::new(Semaphore::new(concurrency));
+        let mut handles = Vec::with_capacity(filtered.len());
+        for cand in filtered {
+            let permit_sem = sem.clone();
+            let clip_client = clip_client.clone();
+            let exif_dao = exif_dao.clone();
+            let library_root = library_root.clone();
+            handles.push(tokio::spawn(async move {
+                let _permit = permit_sem.acquire().await.expect("clip semaphore");
+                process_one(library_id, &library_root, cand, &clip_client, exif_dao).await;
+            }));
+        }
+        for h in handles {
+            let _ = h.await;
+        }
+    });
+}
+
+async fn process_one(
+    library_id: i32,
+    library_root: &str,
+    cand: ClipCandidate,
+    clip_client: &ClipClient,
+    exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
+) {
+    let abs = Path::new(library_root).join(&cand.rel_path);
+    let bytes = match read_image_bytes_for_encode(&abs) {
+        Ok(b) => b,
+        Err(e) => {
+            // Same rationale as face_watch: don't mark — the file may
+            // have been moved/renamed mid-scan; let the next pass retry.
+            warn!(
+                "clip_watch: read failed for {} (lib {}): {}",
+                cand.rel_path, library_id, e
+            );
+            return;
+        }
+    };
+
+    let meta = EncodeImageMeta {
+        content_hash: cand.content_hash.clone(),
+        library_id,
+        rel_path: cand.rel_path.clone(),
+    };
+    let ctx = opentelemetry::Context::current();
+
+    match clip_client.encode_image(bytes, meta).await {
+        Ok(resp) => {
+            let emb_bytes = match resp.decode_embedding() {
+                Ok(b) => b,
+                Err(e) => {
+                    warn!("clip_watch: bad embedding for {}: {:?}", cand.rel_path, e);
+                    return;
+                }
+            };
+            let mut dao = exif_dao.lock().expect("exif dao");
+            if let Err(e) = dao.backfill_clip_embedding(
+                &ctx,
+                library_id,
+                &cand.rel_path,
+                &emb_bytes,
+                &resp.model_version,
+            ) {
+                warn!(
+                    "clip_watch: backfill_clip_embedding failed for {}: {:?}",
+                    cand.rel_path, e
+                );
+                return;
+            }
+            debug!(
+                "clip_watch: {} → dim={} ({}ms, {})",
+                cand.rel_path, resp.embedding_dim, resp.duration_ms, resp.model_version
+            );
+        }
+        Err(ClipError::Permanent(e)) => {
+            // No marker — the row sits with NULL embedding and the drain
+            // retries next pass. For personal-library scale the cost of
+            // re-attempting permanently-broken files is bounded by the
+            // per-tick cap. If this becomes a recurring noise source,
+            // add a `clip_status` column with `failed` semantics like
+            // face_detections has.
+            warn!(
+                "clip_watch: permanent failure on {} (will retry next pass): {}",
+                cand.rel_path, e
+            );
+        }
+        Err(ClipError::Transient(e)) => {
+            debug!(
+                "clip_watch: transient on {}: {} (will retry next pass)",
+                cand.rel_path, e
+            );
+        }
+        Err(ClipError::Disabled) => {
+            // Defensive — the entry-point already checked is_enabled().
+        }
+    }
+}
+
+/// Drop candidates whose paths land in an excluded dir or whose
+/// extension isn't an image. Mirrors `face_watch::filter_excluded` so
+/// the two backlogs stay shape-consistent. Library name is passed
+/// purely for the log line that surfaces an exclusion hit.
+pub fn filter_excluded(
+    base: &Path,
+    excluded_dirs: &[String],
+    candidates: Vec<ClipCandidate>,
+    library_name: Option<&str>,
+) -> Vec<ClipCandidate> {
+    let excluder = if excluded_dirs.is_empty() {
+        None
+    } else {
+        Some(PathExcluder::new(base, excluded_dirs))
+    };
+    candidates
+        .into_iter()
+        .filter(|c| {
+            let abs = base.join(&c.rel_path);
+            if !file_types::is_image_file(&abs) {
+                debug!(
+                    "clip_watch: skipping non-image '{}' (lib {})",
+                    c.rel_path,
+                    library_name.unwrap_or("<unknown>")
+                );
+                return false;
+            }
+            if let Some(ex) = excluder.as_ref()
+                && ex.is_excluded(&abs)
+            {
+                debug!(
+                    "clip_watch: skipping excluded '{}' (lib {})",
+                    c.rel_path,
+                    library_name.unwrap_or("<unknown>")
+                );
+                return false;
+            }
+            true
+        })
+        .collect()
+}
+
+/// Read image bytes for CLIP encoding. Same logic as
+/// `face_watch::read_image_bytes_for_detect` — RAW / HEIC files don't
+/// decode in Apollo's PIL pipeline, so we pull the embedded JPEG
+/// preview the thumbnail pipeline already extracts. Plain JPEG / PNG /
+/// WebP go through a direct read.
+pub fn read_image_bytes_for_encode(path: &Path) -> std::io::Result<Vec<u8>> {
+    if file_types::needs_ffmpeg_thumbnail(path)
+        && let Some(preview) = exif::extract_embedded_jpeg_preview(path)
+    {
+        return Ok(preview);
+    }
+    std::fs::read(path)
+}
@@ -50,14 +50,32 @@ pub fn thumbnail_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
    thumbs_dir.join(shard).join(format!("{}.jpg", hash))
 }

+/// Hash-keyed large-preview path: `<thumbs_dir>/_large/<hash[..2]>/<hash>.jpg`.
+/// Kept under the same root as 200px thumbs so deployments don't need a
+/// second env var, but namespaced under `_large/` so the existing 200px
+/// shards don't collide with the larger derivative.
+pub fn large_preview_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
+    let shard = shard_prefix(hash);
+    thumbs_dir
+        .join("_large")
+        .join(shard)
+        .join(format!("{}.jpg", hash))
+}
+
+/// Hash-keyed xlarge-preview path: `<thumbs_dir>/_xlarge/<hash[..2]>/<hash>.jpg`.
+pub fn xlarge_preview_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
+    let shard = shard_prefix(hash);
+    thumbs_dir
+        .join("_xlarge")
+        .join(shard)
+        .join(format!("{}.jpg", hash))
+}
+
 /// Hash-keyed HLS output directory: `<video_dir>/<hash[..2]>/<hash>/`.
 /// The playlist lives at `playlist.m3u8` inside this directory and its
-/// segments are co-located so HLS relative references Just Work.
-///
-/// Allow-dead until Branch B/C rewires the HLS pipeline to use it; the
-/// helper lives here today so Branch A's path layout decisions stay
-/// adjacent to thumbnail/legacy ones.
-#[allow(dead_code)]
+/// segments are co-located so HLS relative references Just Work. See
+/// [`crate::video::hls_paths`] for the filename constants and the
+/// per-file helpers built on this dir.
 pub fn hls_dir(video_dir: &Path, hash: &str) -> PathBuf {
    let shard = shard_prefix(hash);
    video_dir.join(shard).join(hash)
@@ -123,6 +141,9 @@ mod tests {
        let p = thumbnail_path(thumbs, "abcdef0123");
        assert_eq!(p, PathBuf::from("/tmp/thumbs/ab/abcdef0123.jpg"));

+        let l = large_preview_path(thumbs, "abcdef0123");
+        assert_eq!(l, PathBuf::from("/tmp/thumbs/_large/ab/abcdef0123.jpg"));
+
        let video = Path::new("/tmp/video");
        let d = hls_dir(video, "1234deadbeef");
        assert_eq!(d, PathBuf::from("/tmp/video/12/1234deadbeef"));
@@ -194,6 +194,8 @@ pub enum MediaType {
 #[serde(rename_all = "lowercase")]
 pub enum PhotoSize {
    Full,
+    XLarge,
+    Large,
    Thumb,
 }

@@ -274,7 +274,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
                source_file: event.source_file,
            })
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn store_events_batch(
@@ -348,7 +348,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {

            Ok(inserted)
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn find_events_in_range(
@@ -373,7 +373,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
            .map(|rows| rows.into_iter().map(|r| r.to_calendar_event()).collect())
            .map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn find_similar_events(
@@ -429,7 +429,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {

            Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn find_relevant_events_hybrid(
@@ -500,7 +500,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
                Ok(events_in_range.into_iter().take(limit).map(|r| r.to_calendar_event()).collect())
            }
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn event_exists(
@@ -528,7 +528,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {

            Ok(result.count > 0)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
@@ -551,6 +551,6 @@ impl CalendarEventDao for SqliteCalendarEventDao {

            Ok(result.count)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }
@@ -190,7 +190,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
                model_version: summary.model_version,
            })
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn find_similar_summaries(
@@ -286,7 +286,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {

            Ok(top_results)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn find_similar_summaries_with_time_weight(
@@ -408,7 +408,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {

            Ok(top_results)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn summary_exists(
@@ -435,7 +435,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {

            Ok(count > 0)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_summary_count(
@@ -457,7 +457,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
            .map(|r| r.count)
            .map_err(|e| anyhow::anyhow!("Count query error: {:?}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn has_any_summaries(&mut self, context: &opentelemetry::Context) -> Result<bool, DbError> {
@@ -481,7 +481,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {

            Ok(!rows.is_empty())
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }

@@ -0,0 +1,681 @@
+use diesel::prelude::*;
+use diesel::sqlite::SqliteConnection;
+use std::ops::DerefMut;
+use std::sync::{Arc, Mutex};
+
+use crate::database::models::{
+    InsertInsightGenerationJob, InsightGenerationJob, InsightGenerationType, InsightJobStatus,
+};
+use crate::database::schema;
+use crate::database::{DbError, DbErrorKind, connect};
+use crate::otel::trace_db_call;
+
+/// Tracks async insight generation jobs. Each call to `create_job` inserts
+/// a new row; the application layer prevents concurrent running jobs by
+/// cancelling the old one before creating a new one.
+pub trait InsightGenerationJobDao: Sync + Send {
+    /// Insert a new running job. Always creates a new row (no upsert).
+    /// Cleans up terminal-state rows for the same key first.
+    fn create_job(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id: i32,
+        file_path: &str,
+        generation_type: InsightGenerationType,
+    ) -> Result<i32, DbError>;
+
+    /// Mark a job as completed with the resulting insight id. Only updates
+    /// if the job is still in "running" status (prevents overwriting a
+    /// cancelled job with a late-completing task).
+    fn complete_job(
+        &mut self,
+        context: &opentelemetry::Context,
+        job_id: i32,
+        insight_id: i32,
+    ) -> Result<(), DbError>;
+
+    /// Mark a job as failed with an error message. Only updates if the job
+    /// is still in "running" status.
+    fn fail_job(
+        &mut self,
+        context: &opentelemetry::Context,
+        job_id: i32,
+        error_message: &str,
+    ) -> Result<(), DbError>;
+
+    /// Cancel a specific job by id. Only updates if the job is still
+    /// in "running" status. Returns true if a row was updated.
+    fn cancel_job(
+        &mut self,
+        context: &opentelemetry::Context,
+        job_id: i32,
+    ) -> Result<bool, DbError>;
+
+    /// Cancel all running jobs for a given file. Returns the number of
+    /// jobs cancelled.
+    fn cancel_active_jobs(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id: i32,
+        file_path: &str,
+    ) -> Result<usize, DbError>;
+
+    /// Find the latest running job for a given file. Returns None if no
+    /// running job exists.
+    fn get_active_job(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id: i32,
+        file_path: &str,
+    ) -> Result<Option<InsightGenerationJob>, DbError>;
+
+    /// Find any job by id regardless of status.
+    fn get_job_by_id(
+        &mut self,
+        context: &opentelemetry::Context,
+        job_id: i32,
+    ) -> Result<Option<InsightGenerationJob>, DbError>;
+
+    /// Mark all jobs still in "running" status as "failed" with a recovery
+    /// error message. Returns the number of jobs recovered.
+    fn recover_orphaned_jobs(&mut self, context: &opentelemetry::Context)
+    -> Result<usize, DbError>;
+}
+
+pub struct SqliteInsightGenerationJobDao {
+    connection: Arc<Mutex<SqliteConnection>>,
+}
+
+impl Default for SqliteInsightGenerationJobDao {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SqliteInsightGenerationJobDao {
+    pub fn new() -> Self {
+        Self {
+            connection: Arc::new(Mutex::new(connect())),
+        }
+    }
+
+    #[cfg(test)]
+    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
+        Self { connection: conn }
+    }
+}
+
+impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
+    fn create_job(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id: i32,
+        file_path: &str,
+        generation_type: InsightGenerationType,
+    ) -> Result<i32, DbError> {
+        trace_db_call(context, "insert", "create_job", |_span| {
+            use schema::insight_generation_jobs::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock InsightGenerationJobDao");
+
+            let now = std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .expect("Time went backwards")
+                .as_secs() as i64;
+
+            let new_job = InsertInsightGenerationJob {
+                library_id,
+                path: file_path.to_string(),
+                gen_type: generation_type.to_string(),
+                status: InsightJobStatus::Running.to_string(),
+                started_at: now,
+            };
+
+            diesel::insert_into(dsl::insight_generation_jobs)
+                .values(&new_job)
+                .execute(connection.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Failed to insert job: {}", e))?;
+
+            dsl::insight_generation_jobs
+                .filter(
+                    dsl::library_id
+                        .eq(library_id)
+                        .and(dsl::file_path.eq(file_path))
+                        .and(dsl::generation_type.eq(generation_type.as_str()))
+                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
+                )
+                .select(dsl::id)
+                .order(dsl::id.desc())
+                .first::<i32>(connection.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Failed to get job id: {}", e))
+        })
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+    }
+
+    fn complete_job(
+        &mut self,
+        context: &opentelemetry::Context,
+        job_id: i32,
+        insight_id: i32,
+    ) -> Result<(), DbError> {
+        trace_db_call(context, "update", "complete_job", |_span| {
+            use schema::insight_generation_jobs::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock InsightGenerationJobDao");
+
+            let now = std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .expect("Time went backwards")
+                .as_secs() as i64;
+
+            // Only update if still running — prevents cancelled job from
+            // being overwritten by a late-completing task.
+            diesel::update(
+                dsl::insight_generation_jobs.filter(
+                    dsl::id
+                        .eq(job_id)
+                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
+                ),
+            )
+            .set((
+                dsl::status.eq(InsightJobStatus::Completed.as_str()),
+                dsl::completed_at.eq(Some(now)),
+                dsl::result_insight_id.eq(Some(insight_id)),
+            ))
+            .execute(connection.deref_mut())
+            .map(|_| ())
+            .map_err(|e| anyhow::anyhow!("Failed to complete job: {}", e))
+        })
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+    }
+
+    fn fail_job(
+        &mut self,
+        context: &opentelemetry::Context,
+        job_id: i32,
+        error_message: &str,
+    ) -> Result<(), DbError> {
+        trace_db_call(context, "update", "fail_job", |_span| {
+            use schema::insight_generation_jobs::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock InsightGenerationJobDao");
+
+            let now = std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .expect("Time went backwards")
+                .as_secs() as i64;
+
+            // Only update if still running.
+            diesel::update(
+                dsl::insight_generation_jobs.filter(
+                    dsl::id
+                        .eq(job_id)
+                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
+                ),
+            )
+            .set((
+                dsl::status.eq(InsightJobStatus::Failed.as_str()),
+                dsl::completed_at.eq(Some(now)),
+                dsl::error_message.eq(Some(error_message.to_string())),
+            ))
+            .execute(connection.deref_mut())
+            .map(|_| ())
+            .map_err(|e| anyhow::anyhow!("Failed to fail job: {}", e))
+        })
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+    }
+
+    fn cancel_job(
+        &mut self,
+        context: &opentelemetry::Context,
+        job_id: i32,
+    ) -> Result<bool, DbError> {
+        trace_db_call(context, "update", "cancel_job", |_span| {
+            use schema::insight_generation_jobs::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock InsightGenerationJobDao");
+
+            let now = std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .expect("Time went backwards")
+                .as_secs() as i64;
+
+            let rows = diesel::update(
+                dsl::insight_generation_jobs.filter(
+                    dsl::id
+                        .eq(job_id)
+                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
+                ),
+            )
+            .set((
+                dsl::status.eq(InsightJobStatus::Cancelled.as_str()),
+                dsl::completed_at.eq(Some(now)),
+                dsl::error_message.eq(Some("cancelled by user".to_string())),
+            ))
+            .execute(connection.deref_mut())
+            .map_err(|e| anyhow::anyhow!("Failed to cancel job: {}", e))?;
+
+            Ok(rows > 0)
+        })
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+    }
+
+    fn cancel_active_jobs(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id: i32,
+        file_path: &str,
+    ) -> Result<usize, DbError> {
+        trace_db_call(context, "update", "cancel_active_jobs", |_span| {
+            use schema::insight_generation_jobs::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock InsightGenerationJobDao");
+
+            let now = std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .expect("Time went backwards")
+                .as_secs() as i64;
+
+            let rows = diesel::update(
+                dsl::insight_generation_jobs.filter(
+                    dsl::library_id
+                        .eq(library_id)
+                        .and(dsl::file_path.eq(file_path))
+                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
+                ),
+            )
+            .set((
+                dsl::status.eq(InsightJobStatus::Cancelled.as_str()),
+                dsl::completed_at.eq(Some(now)),
+                dsl::error_message.eq(Some("cancelled by newer request".to_string())),
+            ))
+            .execute(connection.deref_mut())
+            .map_err(|e| anyhow::anyhow!("Failed to cancel active jobs: {}", e))?;
+
+            Ok(rows)
+        })
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+    }
+
+    fn get_active_job(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id: i32,
+        file_path: &str,
+    ) -> Result<Option<InsightGenerationJob>, DbError> {
+        trace_db_call(context, "query", "get_active_job", |_span| {
+            use schema::insight_generation_jobs::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock InsightGenerationJobDao");
+
+            dsl::insight_generation_jobs
+                .filter(
+                    dsl::library_id
+                        .eq(library_id)
+                        .and(dsl::file_path.eq(file_path))
+                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
+                )
+                .order(dsl::id.desc())
+                .first::<InsightGenerationJob>(connection.deref_mut())
+                .optional()
+                .map_err(|e| anyhow::anyhow!("Failed to get active job: {}", e))
+        })
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+    }
+
+    fn get_job_by_id(
+        &mut self,
+        context: &opentelemetry::Context,
+        job_id: i32,
+    ) -> Result<Option<InsightGenerationJob>, DbError> {
+        trace_db_call(context, "query", "get_job_by_id", |_span| {
+            use schema::insight_generation_jobs::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock InsightGenerationJobDao");
+
+            dsl::insight_generation_jobs
+                .filter(dsl::id.eq(job_id))
+                .first::<InsightGenerationJob>(connection.deref_mut())
+                .optional()
+                .map_err(|e| anyhow::anyhow!("Failed to get job: {}", e))
+        })
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+    }
+
+    fn recover_orphaned_jobs(
+        &mut self,
+        context: &opentelemetry::Context,
+    ) -> Result<usize, DbError> {
+        trace_db_call(context, "update", "recover_orphaned_jobs", |_span| {
+            use schema::insight_generation_jobs::dsl;
+
+            let mut connection = self
+                .connection
+                .lock()
+                .expect("Unable to lock InsightGenerationJobDao");
+
+            let now = std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .expect("Time went backwards")
+                .as_secs() as i64;
+
+            let rows = diesel::update(
+                dsl::insight_generation_jobs
+                    .filter(dsl::status.eq(InsightJobStatus::Running.as_str())),
+            )
+            .set((
+                dsl::status.eq(InsightJobStatus::Failed.as_str()),
+                dsl::completed_at.eq(Some(now)),
+                dsl::error_message.eq(Some("server crashed while running".to_string())),
+            ))
+            .execute(connection.deref_mut())
+            .map_err(|e| anyhow::anyhow!("Failed to recover orphaned jobs: {}", e))?;
+
+            Ok(rows)
+        })
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use diesel::Connection;
+    use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
+
+    const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
+
+    fn setup_dao() -> SqliteInsightGenerationJobDao {
+        let mut conn = SqliteConnection::establish(":memory:")
+            .expect("Unable to create in-memory db connection");
+        conn.run_pending_migrations(DB_MIGRATIONS)
+            .expect("Failure running DB migrations");
+        SqliteInsightGenerationJobDao::from_connection(Arc::new(Mutex::new(conn)))
+    }
+
+    fn ctx() -> opentelemetry::Context {
+        opentelemetry::Context::new()
+    }
+
+    #[test]
+    fn create_job_inserts_new_row() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let job_id_1 = dao
+            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
+            .unwrap();
+
+        let job_id_2 = dao
+            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
+            .unwrap();
+
+        assert_ne!(job_id_1, job_id_2, "each create_job call inserts a new row");
+    }
+
+    #[test]
+    fn complete_job_sets_result() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let job_id = dao
+            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
+            .unwrap();
+
+        dao.complete_job(&ctx, job_id, 42).unwrap();
+
+        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
+        assert_eq!(job.status, InsightJobStatus::Completed.as_str());
+        assert_eq!(job.result_insight_id, Some(42));
+        assert!(job.completed_at.is_some());
+    }
+
+    #[test]
+    fn fail_job_sets_error() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let job_id = dao
+            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Agentic)
+            .unwrap();
+
+        dao.fail_job(&ctx, job_id, "model timeout").unwrap();
+
+        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
+        assert_eq!(job.status, InsightJobStatus::Failed.as_str());
+        assert_eq!(job.error_message.as_deref(), Some("model timeout"));
+        assert!(job.completed_at.is_some());
+    }
+
+    #[test]
+    fn get_active_job_returns_none_when_completed() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let job_id = dao
+            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
+            .unwrap();
+
+        // Job is running
+        let active = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
+        assert!(active.is_some());
+        assert_eq!(active.unwrap().id, job_id);
+
+        // Complete it
+        dao.complete_job(&ctx, job_id, 1).unwrap();
+
+        // No longer active
+        let active = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
+        assert!(active.is_none());
+    }
+
+    #[test]
+    fn cancel_active_jobs() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let job_id = dao
+            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
+            .unwrap();
+
+        let cancelled = dao.cancel_active_jobs(&ctx, 1, "photos/test.jpg").unwrap();
+        assert_eq!(cancelled, 1, "should cancel 1 running job");
+
+        // Job is no longer active
+        let active = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
+        assert!(active.is_none());
+
+        // Job exists with cancelled status
+        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
+        assert_eq!(job.status, InsightJobStatus::Cancelled.as_str());
+
+        // Cancelling again returns 0 (nothing to cancel)
+        let cancelled2 = dao.cancel_active_jobs(&ctx, 1, "photos/test.jpg").unwrap();
+        assert_eq!(cancelled2, 0, "should return 0 when no running job");
+    }
+
+    #[test]
+    fn get_active_job_scoped_by_library() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let job_id_1 = dao
+            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
+            .unwrap();
+
+        let job_id_2 = dao
+            .create_job(&ctx, 2, "photos/test.jpg", InsightGenerationType::Standard)
+            .unwrap();
+
+        assert_ne!(
+            job_id_1, job_id_2,
+            "different libraries should have separate jobs"
+        );
+
+        // Complete lib1's job
+        dao.complete_job(&ctx, job_id_1, 1).unwrap();
+
+        // lib1 has no active job
+        let active1 = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
+        assert!(active1.is_none());
+
+        // lib2 still has active job
+        let active2 = dao.get_active_job(&ctx, 2, "photos/test.jpg").unwrap();
+        assert!(active2.is_some());
+        assert_eq!(active2.unwrap().id, job_id_2);
+    }
+
+    #[test]
+    fn get_job_by_id_finds_any_status() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let job_id = dao
+            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
+            .unwrap();
+
+        // Find while running
+        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
+        assert_eq!(job.status, InsightJobStatus::Running.as_str());
+
+        // Complete it
+        dao.complete_job(&ctx, job_id, 99).unwrap();
+
+        // Still findable
+        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
+        assert_eq!(job.status, InsightJobStatus::Completed.as_str());
+        assert_eq!(job.result_insight_id, Some(99));
+    }
+
+    #[test]
+    fn recover_orphaned_jobs() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        // Create two running jobs
+        let job_id_1 = dao
+            .create_job(&ctx, 1, "photos/a.jpg", InsightGenerationType::Standard)
+            .unwrap();
+        let job_id_2 = dao
+            .create_job(&ctx, 1, "photos/b.jpg", InsightGenerationType::Agentic)
+            .unwrap();
+
+        // Complete one
+        dao.complete_job(&ctx, job_id_1, 1).unwrap();
+
+        // Recover should only affect the running job
+        let recovered = dao.recover_orphaned_jobs(&ctx).unwrap();
+        assert_eq!(recovered, 1, "should recover exactly 1 running job");
+
+        // job_id_1 is still completed
+        let job1 = dao.get_job_by_id(&ctx, job_id_1).unwrap().unwrap();
+        assert_eq!(job1.status, InsightJobStatus::Completed.as_str());
+
+        // job_id_2 is now failed with recovery message
+        let job2 = dao.get_job_by_id(&ctx, job_id_2).unwrap().unwrap();
+        assert_eq!(job2.status, InsightJobStatus::Failed.as_str());
+        assert_eq!(
+            job2.error_message.as_deref(),
+            Some("server crashed while running")
+        );
+
+        // Second recovery is a no-op
+        let recovered2 = dao.recover_orphaned_jobs(&ctx).unwrap();
+        assert_eq!(recovered2, 0, "no running jobs remain");
+    }
+
+    #[test]
+    fn complete_job_noop_when_cancelled() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let job_id = dao
+            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
+            .unwrap();
+
+        dao.cancel_job(&ctx, job_id).unwrap();
+
+        // Late-completing task tries to mark as completed — should be a no-op
+        dao.complete_job(&ctx, job_id, 42).unwrap();
+
+        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
+        assert_eq!(
+            job.status,
+            InsightJobStatus::Cancelled.as_str(),
+            "cancelled status must not be overwritten by late complete"
+        );
+        assert_eq!(
+            job.result_insight_id, None,
+            "insight_id must stay None when complete is a no-op"
+        );
+    }
+
+    #[test]
+    fn fail_job_noop_when_cancelled() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let job_id = dao
+            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Agentic)
+            .unwrap();
+
+        dao.cancel_job(&ctx, job_id).unwrap();
+
+        // Late-failing task tries to mark as failed — should be a no-op
+        dao.fail_job(&ctx, job_id, "timeout after 120s").unwrap();
+
+        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
+        assert_eq!(
+            job.status,
+            InsightJobStatus::Cancelled.as_str(),
+            "cancelled status must not be overwritten by late fail"
+        );
+        assert_eq!(
+            job.error_message.as_deref(),
+            Some("cancelled by user"),
+            "error_message must reflect the cancel, not the late fail"
+        );
+    }
+
+    #[test]
+    fn cancel_job_by_id() {
+        let mut dao = setup_dao();
+        let ctx = ctx();
+
+        let job_id = dao
+            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
+            .unwrap();
+
+        let cancelled = dao.cancel_job(&ctx, job_id).unwrap();
+        assert!(cancelled, "should cancel running job");
+
+        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
+        assert_eq!(job.status, InsightJobStatus::Cancelled.as_str());
+        assert!(job.completed_at.is_some());
+
+        // Cancelling again is a no-op
+        let cancelled2 = dao.cancel_job(&ctx, job_id).unwrap();
+        assert!(!cancelled2, "already cancelled job should return false");
+    }
+}
@@ -47,7 +47,6 @@ pub trait InsightDao: Sync + Send {
        paths: &[String],
    ) -> Result<Option<PhotoInsight>, DbError>;

-    #[allow(dead_code)]
    fn get_insight_history(
        &mut self,
        context: &opentelemetry::Context,
@@ -82,6 +81,17 @@ pub trait InsightDao: Sync + Send {
        approved: bool,
    ) -> Result<(), DbError>;

+    /// Rate a specific insight version by primary key, regardless of
+    /// `is_current`. Used by the per-file history view to approve/reject
+    /// previously generated (superseded) versions, which the path-based
+    /// `rate_insight` (current row only) cannot reach.
+    fn rate_insight_by_id(
+        &mut self,
+        context: &opentelemetry::Context,
+        insight_id: i32,
+        approved: bool,
+    ) -> Result<(), DbError>;
+
    fn get_approved_insights(
        &mut self,
        context: &opentelemetry::Context,
@@ -90,13 +100,15 @@ pub trait InsightDao: Sync + Send {
    /// Replace the `training_messages` JSON blob on the current row for
    /// `(library_id, rel_path)`. Used by chat-turn append mode to persist
    /// the extended conversation without inserting a new insight version.
+    /// Returns the number of rows affected (0 if no current row matched,
+    /// indicating a concurrent regenerate/reconcile flipped `is_current`).
    fn update_training_messages(
        &mut self,
        context: &opentelemetry::Context,
        library_id: i32,
        file_path: &str,
        training_messages_json: &str,
-    ) -> Result<(), DbError>;
+    ) -> Result<usize, DbError>;
 }

 pub struct SqliteInsightDao {
@@ -159,13 +171,13 @@ impl InsightDao for SqliteInsightDao {
            )
            .set(is_current.eq(false))
            .execute(connection.deref_mut())
-            .map_err(|_| anyhow::anyhow!("Update is_current error"))?;
+            .map_err(|e| anyhow::anyhow!("Failed to flip is_current: {}", e))?;

            // Insert the new insight as current
            diesel::insert_into(photo_insights)
                .values(&insight)
                .execute(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Insert error"))?;
+                .map_err(|e| anyhow::anyhow!("Failed to insert insight: {}", e))?;

            // Retrieve the inserted record (is_current = true)
            photo_insights
@@ -173,9 +185,12 @@ impl InsightDao for SqliteInsightDao {
                .filter(rel_path.eq(&insight.file_path))
                .filter(is_current.eq(true))
                .first::<PhotoInsight>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Failed to retrieve inserted insight: {}", e))
+        })
+        .map_err(|e| {
+            log::error!("store_insight failed: {}", e);
+            DbError::new(DbErrorKind::InsertError)
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn get_insight(
@@ -193,9 +208,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(is_current.eq(true))
                .first::<PhotoInsight>(connection.deref_mut())
                .optional()
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_current_insight_for_library(
@@ -219,10 +234,10 @@ impl InsightDao for SqliteInsightDao {
                    .filter(is_current.eq(true))
                    .first::<PhotoInsight>(connection.deref_mut())
                    .optional()
-                    .map_err(|_| anyhow::anyhow!("Query error"))
+                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
            },
        )
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_insight_for_paths(
@@ -244,9 +259,9 @@ impl InsightDao for SqliteInsightDao {
                .order(generated_at.desc())
                .first::<PhotoInsight>(connection.deref_mut())
                .optional()
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_insight_history(
@@ -263,9 +278,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(rel_path.eq(path))
                .order(generated_at.desc())
                .load::<PhotoInsight>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_insight_by_id(
@@ -282,9 +297,9 @@ impl InsightDao for SqliteInsightDao {
                .find(insight_id)
                .first::<PhotoInsight>(connection.deref_mut())
                .optional()
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn delete_insight(
@@ -300,9 +315,9 @@ impl InsightDao for SqliteInsightDao {
            diesel::delete(photo_insights.filter(rel_path.eq(path)))
                .execute(connection.deref_mut())
                .map(|_| ())
-                .map_err(|_| anyhow::anyhow!("Delete error"))
+                .map_err(|e| anyhow::anyhow!("Delete error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_all_insights(
@@ -318,9 +333,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(is_current.eq(true))
                .order(generated_at.desc())
                .load::<PhotoInsight>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn rate_insight(
@@ -342,9 +357,29 @@ impl InsightDao for SqliteInsightDao {
            .set(approved.eq(Some(is_approved)))
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|_| anyhow::anyhow!("Update error"))
+            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+    }
+
+    fn rate_insight_by_id(
+        &mut self,
+        context: &opentelemetry::Context,
+        target_id: i32,
+        is_approved: bool,
+    ) -> Result<(), DbError> {
+        trace_db_call(context, "update", "rate_insight_by_id", |_span| {
+            use schema::photo_insights::dsl::*;
+
+            let mut connection = self.connection.lock().expect("Unable to get InsightDao");
+
+            diesel::update(photo_insights.find(target_id))
+                .set(approved.eq(Some(is_approved)))
+                .execute(connection.deref_mut())
+                .map(|_| ())
+                .map_err(|e| anyhow::anyhow!("Update error: {}", e))
+        })
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn get_approved_insights(
@@ -361,9 +396,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(training_messages.is_not_null())
                .order(generated_at.desc())
                .load::<PhotoInsight>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn update_training_messages(
@@ -372,7 +407,7 @@ impl InsightDao for SqliteInsightDao {
        lib_id: i32,
        path: &str,
        training_messages_json: &str,
-    ) -> Result<(), DbError> {
+    ) -> Result<usize, DbError> {
        trace_db_call(context, "update", "update_training_messages", |_span| {
            use schema::photo_insights::dsl::*;

@@ -386,9 +421,95 @@ impl InsightDao for SqliteInsightDao {
            )
            .set(training_messages.eq(Some(training_messages_json.to_string())))
            .execute(connection.deref_mut())
-            .map(|_| ())
-            .map_err(|_| anyhow::anyhow!("Update error"))
+            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::database::test::in_memory_db_connection;
+
+    fn dao() -> SqliteInsightDao {
+        let conn = Arc::new(Mutex::new(in_memory_db_connection()));
+        SqliteInsightDao::from_connection(conn)
+    }
+
+    /// Build an insight insert with sensible defaults; tests override the
+    /// fields they care about (path, generated_at, model).
+    fn insert(path: &str, generated_at: i64, model: &str) -> InsertPhotoInsight {
+        InsertPhotoInsight {
+            library_id: 1,
+            file_path: path.to_string(),
+            title: format!("title for {model}"),
+            summary: "summary".to_string(),
+            generated_at,
+            model_version: model.to_string(),
+            is_current: true,
+            training_messages: None,
+            backend: "local".to_string(),
+            fewshot_source_ids: None,
+            content_hash: None,
+            num_ctx: None,
+            temperature: None,
+            top_p: None,
+            top_k: None,
+            min_p: None,
+            system_prompt: None,
+            persona_id: None,
+            prompt_eval_count: None,
+            eval_count: None,
+        }
+    }
+
+    #[test]
+    fn get_insight_history_returns_all_versions_newest_first() {
+        let cx = opentelemetry::Context::new();
+        let mut dao = dao();
+
+        // store_insight flips prior rows to is_current=false, so three
+        // generations for the same path leave a 3-row history.
+        dao.store_insight(&cx, insert("a.jpg", 100, "m1")).unwrap();
+        dao.store_insight(&cx, insert("a.jpg", 200, "m2")).unwrap();
+        dao.store_insight(&cx, insert("a.jpg", 300, "m3")).unwrap();
+        // A different path must not leak into the history.
+        dao.store_insight(&cx, insert("b.jpg", 250, "other"))
+            .unwrap();
+
+        let history = dao.get_insight_history(&cx, "a.jpg").unwrap();
+        assert_eq!(history.len(), 3);
+        assert_eq!(
+            history.iter().map(|i| i.generated_at).collect::<Vec<_>>(),
+            vec![300, 200, 100],
+            "history should be newest-first"
+        );
+        // Exactly one version is current (the latest generation).
+        let current: Vec<_> = history.iter().filter(|i| i.is_current).collect();
+        assert_eq!(current.len(), 1);
+        assert_eq!(current[0].generated_at, 300);
+    }
+
+    #[test]
+    fn rate_insight_by_id_rates_only_the_targeted_version() {
+        let cx = opentelemetry::Context::new();
+        let mut dao = dao();
+
+        dao.store_insight(&cx, insert("a.jpg", 100, "m1")).unwrap();
+        dao.store_insight(&cx, insert("a.jpg", 200, "m2")).unwrap();
+
+        // History is newest-first: [200 (current), 100 (superseded)].
+        let history = dao.get_insight_history(&cx, "a.jpg").unwrap();
+        let old_version = history.iter().find(|i| i.generated_at == 100).unwrap();
+        assert!(!old_version.is_current);
+
+        dao.rate_insight_by_id(&cx, old_version.id, true).unwrap();
+
+        let history = dao.get_insight_history(&cx, "a.jpg").unwrap();
+        let old = history.iter().find(|i| i.generated_at == 100).unwrap();
+        let current = history.iter().find(|i| i.generated_at == 200).unwrap();
+        assert_eq!(old.approved, Some(true), "targeted version is rated");
+        assert_eq!(current.approved, None, "current version is untouched");
    }
 }
@@ -235,6 +235,7 @@ pub trait KnowledgeDao: Sync + Send {
    ///   - entity_type: optional, restricts nodes to one type
    ///   - node_limit: caps the number of nodes; lower-fact-count
    ///     entities drop first
+    ///
    /// Edges between dropped entities are pruned. Persona scoping
    /// affects fact_count + edge inclusion (rejected / superseded
    /// excluded; All vs Single mirrors the existing pattern).
@@ -581,7 +582,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
            }
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn get_entity_by_id(
@@ -598,7 +599,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_entity_by_name(
@@ -623,7 +624,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .load::<Entity>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_entities_with_embeddings(
@@ -648,7 +649,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .load::<Entity>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn list_entities(
@@ -705,7 +706,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {

            Ok((results, total))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn list_entities_with_fact_counts(
@@ -893,7 +894,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {

            Ok((pairs, total))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_predicate_stats(
@@ -937,7 +938,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            let mut conn = self.connection.lock().expect("KnowledgeDao lock");
            let mut q = sql_query(sql).into_boxed();
            match persona {
-                PersonaFilter::Single { user_id, persona_id } => {
+                PersonaFilter::Single {
+                    user_id,
+                    persona_id,
+                } => {
                    q = q
                        .bind::<Integer, _>(*user_id)
                        .bind::<Text, _>(persona_id.clone());
@@ -953,7 +957,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
            Ok(rows.into_iter().map(|r| (r.predicate, r.cnt)).collect())
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn bulk_reject_facts_by_predicate(
@@ -977,7 +981,10 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            // rows flip — REVIEWED survives so the curator can preserve
            // a hand-approved exception under the same predicate.
            let touched = match persona {
-                PersonaFilter::Single { user_id: uid, persona_id: pid } => diesel::update(
+                PersonaFilter::Single {
+                    user_id: uid,
+                    persona_id: pid,
+                } => diesel::update(
                    entity_facts
                        .filter(predicate.eq(target_predicate))
                        .filter(user_id.eq(*uid))
@@ -1009,7 +1016,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            };
            Ok(touched)
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn build_entity_graph(
@@ -1187,7 +1194,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {

            Ok(EntityGraph { nodes, edges })
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn find_consolidation_proposals(
@@ -1282,8 +1289,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                        Some(v) => v,
                        None => continue,
                    };
-                    for b in (a + 1)..indices.len() {
-                        let ib = indices[b];
+                    for &ib in &indices[a + 1..] {
                        let vb = match &decoded[ib] {
                            Some(v) => v,
                            None => continue,
@@ -1343,7 +1349,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            result.truncate(max_groups);
            Ok(result)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_persona_breakdowns_for_entities(
@@ -1405,7 +1411,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            }
            Ok(out)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn update_entity_status(
@@ -1423,7 +1429,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .map(|_| ())
                .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn update_entity(
@@ -1469,7 +1475,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn delete_entity(
@@ -1559,7 +1565,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            })
            .map_err(|e| anyhow::anyhow!("Merge transaction error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    // -----------------------------------------------------------------------
@@ -1630,7 +1636,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                Ok((inserted, true)) // true = newly created
            }
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn get_facts_for_entity(
@@ -1656,7 +1662,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            q.load::<EntityFact>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn list_facts(
@@ -1713,7 +1719,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {

            Ok((results, total))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn update_fact(
@@ -1795,7 +1801,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn update_facts_insight_id(
@@ -1817,7 +1823,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            .map(|_| ())
            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn delete_fact(&mut self, cx: &opentelemetry::Context, fact_id: i32) -> Result<(), DbError> {
@@ -2009,7 +2015,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            .map(|_| ())
            .map_err(|e| anyhow::anyhow!("Insert error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn delete_photo_links_for_file(
@@ -2025,7 +2031,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .map(|_| ())
                .map_err(|e| anyhow::anyhow!("Delete error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_links_for_photo(
@@ -2041,7 +2047,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .load::<EntityPhotoLink>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_links_for_entity(
@@ -2057,7 +2063,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .load::<EntityPhotoLink>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    // -----------------------------------------------------------------------
@@ -2105,7 +2111,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                facts: recent_facts,
            })
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }

@@ -273,7 +273,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
                source_file: location.source_file,
            })
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn store_locations_batch(
@@ -350,7 +350,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {

            Ok(inserted)
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn find_nearest_location(
@@ -385,7 +385,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {

            Ok(results.into_iter().next().map(|r| r.to_location_record()))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn find_locations_in_range(
@@ -413,7 +413,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
            .map(|rows| rows.into_iter().map(|r| r.to_location_record()).collect())
            .map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn find_locations_near_point(
@@ -468,7 +468,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {

            Ok(filtered)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn location_exists(
@@ -502,7 +502,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {

            Ok(result.count > 0)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
@@ -525,6 +525,6 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {

            Ok(result.count)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }
@@ -45,6 +45,7 @@ pub struct DuplicateRow {

 pub mod calendar_dao;
 pub mod daily_summary_dao;
+pub mod insight_generation_job_dao;
 pub mod insights_dao;
 pub mod knowledge_dao;
 pub mod location_dao;
@@ -57,6 +58,7 @@ pub mod search_dao;

 pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
 pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
+pub use insight_generation_job_dao::{InsightGenerationJobDao, SqliteInsightGenerationJobDao};
 pub use insights_dao::{InsightDao, SqliteInsightDao};
 pub use knowledge_dao::{
    ConsolidationGroup, EntityFilter, EntityGraph, EntityPatch, EntitySort, FactFilter, FactPatch,
@@ -191,14 +193,26 @@ pub fn connect() -> SqliteConnection {
    conn
 }

-#[derive(Debug)]
 pub struct DbError {
    pub kind: DbErrorKind,
+    pub source: Option<String>,
 }

 impl DbError {
    fn new(kind: DbErrorKind) -> Self {
-        DbError { kind }
+        DbError { kind, source: None }
+    }
+
+    /// Capture the source error message AND log it. Callers should use
+    /// this from `map_err` closures so the underlying Diesel/SQLite
+    /// error survives the conversion to `DbError`.
+    fn log(kind: DbErrorKind, source: impl std::fmt::Display) -> Self {
+        let msg = source.to_string();
+        log::error!("DB {:?}: {}", kind, msg);
+        DbError {
+            kind,
+            source: Some(msg),
+        }
    }

    fn exists() -> Self {
@@ -206,6 +220,26 @@ impl DbError {
    }
 }

+impl std::fmt::Debug for DbError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match &self.source {
+            Some(s) => write!(f, "DbError {{ kind: {:?}, source: {} }}", self.kind, s),
+            None => write!(f, "DbError {{ kind: {:?} }}", self.kind),
+        }
+    }
+}
+
+impl std::fmt::Display for DbError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match &self.source {
+            Some(s) => write!(f, "{:?}: {}", self.kind, s),
+            None => write!(f, "{:?}", self.kind),
+        }
+    }
+}
+
+impl std::error::Error for DbError {}
+
 #[derive(Debug, PartialEq)]
 pub enum DbErrorKind {
    AlreadyExists,
@@ -260,7 +294,7 @@ impl FavoriteDao for SqliteFavoriteDao {
                    path: favorite_path,
                })
                .execute(connection.deref_mut())
-                .map_err(|_| DbError::new(DbErrorKind::InsertError))
+                .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
        } else {
            Err(DbError::exists())
        }
@@ -281,7 +315,7 @@ impl FavoriteDao for SqliteFavoriteDao {
        favorites
            .filter(userid.eq(user_id))
            .load::<Favorite>(self.connection.lock().unwrap().deref_mut())
-            .map_err(|_| DbError::new(DbErrorKind::QueryError))
+            .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError> {
@@ -290,7 +324,7 @@ impl FavoriteDao for SqliteFavoriteDao {
        diesel::update(favorites.filter(rel_path.eq(old_path)))
            .set(rel_path.eq(new_path))
            .execute(self.connection.lock().unwrap().deref_mut())
-            .map_err(|_| DbError::new(DbErrorKind::UpdateError))?;
+            .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))?;
        Ok(())
    }

@@ -301,7 +335,7 @@ impl FavoriteDao for SqliteFavoriteDao {
            .select(rel_path)
            .distinct()
            .load(self.connection.lock().unwrap().deref_mut())
-            .map_err(|_| DbError::new(DbErrorKind::QueryError))
+            .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }

@@ -414,6 +448,27 @@ pub trait ExifDao: Sync + Send {
        size_bytes: i64,
    ) -> Result<(), DbError>;

+    /// Every distinct non-NULL `content_hash` across all libraries. Used
+    /// by HLS orphan cleanup to identify hash dirs under `$VIDEO_PATH`
+    /// whose source video no longer exists. Cheap query (single column,
+    /// indexed) but unbounded in size — the result is a HashSet membership
+    /// check, so a 100k-photo library produces ~100k strings.
+    fn list_distinct_content_hashes(
+        &mut self,
+        context: &opentelemetry::Context,
+    ) -> Result<Vec<String>, DbError>;
+
+    /// Every row in `image_exif` for `library_id`, as
+    /// `(rel_path, content_hash)`. The hash is Option because rows
+    /// mid-backfill carry NULL. Used by HLS readiness stats; callers
+    /// filter by extension client-side because the DB schema doesn't
+    /// carry media type.
+    fn list_paths_and_hashes_for_library(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id: i32,
+    ) -> Result<Vec<(String, Option<String>)>, DbError>;
+
    /// Return image_exif rows that need their `date_taken` resolved by the
    /// canonical-date waterfall (see `crate::date_resolver`): `date_taken
    /// IS NULL`. Returns `(library_id, rel_path)`. The caller filters to
@@ -449,6 +504,61 @@ pub trait ExifDao: Sync + Send {
        source: &str,
    ) -> Result<(), DbError>;

+    /// Find image_exif rows needing a CLIP embedding for semantic search:
+    /// `clip_embedding IS NULL AND content_hash IS NOT NULL`, ordered by id
+    /// ASC, limited. Hash-less rows wait for `backfill_unhashed_backlog` to
+    /// hash them first — embedding a row we can't key on bytes is wasted
+    /// work that the next library/move detection would invalidate. Backed
+    /// by the partial index `idx_image_exif_clip_backfill`.
+    ///
+    /// Returns `(rel_path, content_hash)` for the given library only. Video
+    /// rows are returned too (the underlying anti-join is shape-uniform);
+    /// the caller filters them out via `file_types::is_image_file` before
+    /// sending to Apollo, mirroring `face_watch::filter_excluded`.
+    ///
+    /// **Model upgrades** (re-encoding everything on a new
+    /// `APOLLO_CLIP_MODEL`) are handled out-of-band — run
+    /// `UPDATE image_exif SET clip_embedding = NULL
+    ///  WHERE clip_model_version != '<new model>';`
+    /// and the drain picks up the freshly-nulled rows on the next tick.
+    /// Mixing in-flight model versions in a single query is intentionally
+    /// not the drain's problem.
+    fn list_clip_unencoded_candidates(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id: i32,
+        limit: i64,
+    ) -> Result<Vec<(String, String)>, DbError>;
+
+    /// Persist a CLIP embedding for an existing row. Touches
+    /// `clip_embedding` and `clip_model_version` only — leaves every
+    /// other column alone so the drain can't accidentally clobber EXIF /
+    /// hash / date-resolver state that other paths have written.
+    fn backfill_clip_embedding(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id: i32,
+        rel_path: &str,
+        embedding: &[u8],
+        model_version: &str,
+    ) -> Result<(), DbError>;
+
+    /// Load every `(content_hash, clip_embedding)` pair from the live
+    /// image_exif rows for the given libraries, optionally filtered to a
+    /// single `model_version` (cosine sim across mixed geometries is
+    /// meaningless). Used by `/photos/search` to rerank against the query
+    /// embedding in-memory.
+    ///
+    /// Returns one pair per content_hash. If a hash appears under more
+    /// than one library, the first row wins (Diesel's natural ORDER BY id
+    /// ASC). Hash-less and embedding-less rows are filtered server-side.
+    fn list_clip_index(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_ids: &[i32],
+        model_version: Option<&str>,
+    ) -> Result<Vec<(String, Vec<u8>)>, DbError>;
+
    /// Operator-driven date_taken override (POST /image/exif/date). Snapshots
    /// the prior `(date_taken, date_taken_source)` into the `original_*`
    /// pair on first override, then writes the new value with
@@ -481,9 +591,9 @@ pub trait ExifDao: Sync + Send {
    /// whose calendar position matches the request's span:
    ///   - `"day"`   — same month + day-of-month (any year)
    ///   - `"week"`  — same week-of-year (SQLite `%W`, Monday-anchored —
-    ///                 close to but not exactly ISO week 8601; the
-    ///                 boundary cases at year-start/end can shift by ±1
-    ///                 vs the prior request-time `iso_week()` filter)
+    ///     close to but not exactly ISO week 8601; the boundary cases
+    ///     at year-start/end can shift by ±1 vs the prior request-time
+    ///     `iso_week()` filter)
    ///   - `"month"` — same month (any year)
    ///
    /// `tz_offset_minutes` is applied to both sides of the strftime
@@ -845,7 +955,7 @@ impl ExifDao for SqliteExifDao {
                .first::<ImageExif>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Post-insert lookup failed: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn get_exif(
@@ -872,7 +982,7 @@ impl ExifDao for SqliteExifDao {
                Err(_) => Err(anyhow::anyhow!("Query error")),
            }
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn update_exif(
@@ -909,15 +1019,15 @@ impl ExifDao for SqliteExifDao {
                last_modified.eq(&exif_data.last_modified),
            ))
            .execute(connection.deref_mut())
-            .map_err(|_| anyhow::anyhow!("Update error"))?;
+            .map_err(|e| anyhow::anyhow!("Update error: {}", e))?;

            image_exif
                .filter(library_id.eq(exif_data.library_id))
                .filter(rel_path.eq(&exif_data.file_path))
                .first::<ImageExif>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn delete_exif(&mut self, context: &opentelemetry::Context, path: &str) -> Result<(), DbError> {
@@ -927,9 +1037,9 @@ impl ExifDao for SqliteExifDao {
            diesel::delete(image_exif.filter(rel_path.eq(path)))
                .execute(self.connection.lock().unwrap().deref_mut())
                .map(|_| ())
-                .map_err(|_| anyhow::anyhow!("Delete error"))
+                .map_err(|e| anyhow::anyhow!("Delete error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_all_with_date_taken(
@@ -960,9 +1070,9 @@ impl ExifDao for SqliteExifDao {
                        .filter_map(|(path, dt)| dt.map(|ts| (path, ts)))
                        .collect()
                })
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_exif_batch(
@@ -986,9 +1096,9 @@ impl ExifDao for SqliteExifDao {
            query
                .filter(rel_path.eq_any(file_paths))
                .load::<ImageExif>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn query_by_exif(
@@ -1047,9 +1157,9 @@ impl ExifDao for SqliteExifDao {

            query
                .load::<ImageExif>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_camera_makes(
@@ -1074,9 +1184,9 @@ impl ExifDao for SqliteExifDao {
                        .filter_map(|(make, cnt)| make.map(|m| (m, cnt)))
                        .collect()
                })
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn update_file_path(
@@ -1093,10 +1203,10 @@ impl ExifDao for SqliteExifDao {
            diesel::update(image_exif.filter(rel_path.eq(old_path)))
                .set(rel_path.eq(new_path))
                .execute(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Update error"))?;
+                .map_err(|e| anyhow::anyhow!("Update error: {}", e))?;
            Ok(())
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn get_all_file_paths(
@@ -1111,9 +1221,9 @@ impl ExifDao for SqliteExifDao {
            image_exif
                .select(rel_path)
                .load(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_all_with_gps(
@@ -1181,7 +1291,7 @@ impl ExifDao for SqliteExifDao {

            Ok(filtered)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_rows_missing_hash(
@@ -1200,9 +1310,9 @@ impl ExifDao for SqliteExifDao {
                .order(id.asc())
                .limit(limit)
                .load::<(i32, String)>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn backfill_content_hash(
@@ -1226,9 +1336,53 @@ impl ExifDao for SqliteExifDao {
            .set((content_hash.eq(hash), size_bytes.eq(size_val)))
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|_| anyhow::anyhow!("Update error"))
+            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+    }
+
+    fn list_distinct_content_hashes(
+        &mut self,
+        context: &opentelemetry::Context,
+    ) -> Result<Vec<String>, DbError> {
+        trace_db_call(context, "query", "list_distinct_content_hashes", |_span| {
+            use schema::image_exif::dsl::*;
+
+            let mut connection = self.connection.lock().expect("Unable to get ExifDao");
+
+            image_exif
+                .filter(content_hash.is_not_null())
+                .select(content_hash)
+                .distinct()
+                .load::<Option<String>>(connection.deref_mut())
+                .map(|rows| rows.into_iter().flatten().collect())
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+        })
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+    }
+
+    fn list_paths_and_hashes_for_library(
+        &mut self,
+        context: &opentelemetry::Context,
+        lib_id: i32,
+    ) -> Result<Vec<(String, Option<String>)>, DbError> {
+        trace_db_call(
+            context,
+            "query",
+            "list_paths_and_hashes_for_library",
+            |_span| {
+                use schema::image_exif::dsl::*;
+
+                let mut connection = self.connection.lock().expect("Unable to get ExifDao");
+
+                image_exif
+                    .filter(library_id.eq(lib_id))
+                    .select((rel_path, content_hash))
+                    .load::<(String, Option<String>)>(connection.deref_mut())
+                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+            },
+        )
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_rows_needing_date_backfill(
@@ -1255,10 +1409,10 @@ impl ExifDao for SqliteExifDao {
                    .order(id.asc())
                    .limit(limit)
                    .load::<(i32, String)>(connection.deref_mut())
-                    .map_err(|_| anyhow::anyhow!("Query error"))
+                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
            },
        )
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn backfill_date_taken(
@@ -1322,6 +1476,146 @@ impl ExifDao for SqliteExifDao {
        })
    }

+    fn list_clip_unencoded_candidates(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id_val: i32,
+        limit: i64,
+    ) -> Result<Vec<(String, String)>, DbError> {
+        trace_db_call(
+            context,
+            "query",
+            "list_clip_unencoded_candidates",
+            |_span| {
+                use schema::image_exif::dsl::*;
+
+                let mut connection = self.connection.lock().expect("Unable to get ExifDao");
+
+                // Partial index `idx_image_exif_clip_backfill` covers the
+                // (clip_embedding IS NULL AND content_hash IS NOT NULL)
+                // filter; the planner hits it directly. ORDER BY id ASC
+                // keeps drain progress monotone across ticks.
+                image_exif
+                    .filter(library_id.eq(library_id_val))
+                    .filter(clip_embedding.is_null())
+                    .filter(content_hash.is_not_null())
+                    .select((rel_path, content_hash.assume_not_null()))
+                    .order(id.asc())
+                    .limit(limit)
+                    .load::<(String, String)>(connection.deref_mut())
+                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+            },
+        )
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+    }
+
+    fn backfill_clip_embedding(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_id_val: i32,
+        rel_path_val: &str,
+        embedding: &[u8],
+        model_version: &str,
+    ) -> Result<(), DbError> {
+        trace_db_call(context, "update", "backfill_clip_embedding", |_span| {
+            use schema::image_exif::dsl::*;
+
+            let mut connection = self.connection.lock().expect("Unable to get ExifDao");
+
+            let result = diesel::update(
+                image_exif
+                    .filter(library_id.eq(library_id_val))
+                    .filter(rel_path.eq(rel_path_val)),
+            )
+            .set((
+                clip_embedding.eq(embedding),
+                clip_model_version.eq(model_version),
+            ))
+            .execute(connection.deref_mut());
+
+            match result {
+                Ok(rows) => {
+                    if rows == 0 {
+                        // Same race as backfill_date_taken — row vanished
+                        // between the candidate query and this write. Not
+                        // a hard error; the drain re-scans next tick.
+                        log::debug!(
+                            "backfill_clip_embedding: 0 rows matched lib={} {} \
+                             (row likely retired by missing-file scan)",
+                            library_id_val,
+                            rel_path_val
+                        );
+                    }
+                    Ok(())
+                }
+                Err(e) => Err(anyhow::anyhow!(
+                    "diesel update failed (lib={}, rel_path={}, model={}): {}",
+                    library_id_val,
+                    rel_path_val,
+                    model_version,
+                    e
+                )),
+            }
+        })
+        .map_err(|e| {
+            log::warn!("backfill_clip_embedding: {}", e);
+            DbError::new(DbErrorKind::UpdateError)
+        })
+    }
+
+    fn list_clip_index(
+        &mut self,
+        context: &opentelemetry::Context,
+        library_ids_val: &[i32],
+        model_version_filter: Option<&str>,
+    ) -> Result<Vec<(String, Vec<u8>)>, DbError> {
+        trace_db_call(context, "query", "list_clip_index", |_span| {
+            use schema::image_exif::dsl::*;
+
+            let mut connection = self.connection.lock().expect("Unable to get ExifDao");
+
+            // Build the base filter. content_hash + clip_embedding both
+            // need to be present for the row to be searchable.
+            let mut query = image_exif
+                .filter(content_hash.is_not_null())
+                .filter(clip_embedding.is_not_null())
+                .into_boxed();
+            if !library_ids_val.is_empty() {
+                query = query.filter(library_id.eq_any(library_ids_val));
+            }
+            if let Some(mv) = model_version_filter {
+                query = query.filter(clip_model_version.eq(mv));
+            }
+
+            // Order by id ASC so cross-library duplicates pick the
+            // earliest-ingested row (stable across calls; the in-memory
+            // matrix gets a deterministic row order). Group-by on
+            // content_hash via post-filter — Diesel doesn't expose a
+            // clean DISTINCT ON in this query shape.
+            let rows: Vec<(String, Vec<u8>)> = query
+                .select((
+                    content_hash.assume_not_null(),
+                    clip_embedding.assume_not_null(),
+                ))
+                .order(id.asc())
+                .load::<(String, Vec<u8>)>(connection.deref_mut())
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
+
+            // Dedupe by hash, keeping the first occurrence. Cheap; sized
+            // to ~14k entries on this library.
+            let mut seen: std::collections::HashSet<String> =
+                std::collections::HashSet::with_capacity(rows.len());
+            let mut out = Vec::with_capacity(rows.len());
+            for (h, e) in rows {
+                if seen.insert(h.clone()) {
+                    out.push((h, e));
+                }
+            }
+            Ok(out)
+        })
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+    }
+
    fn set_manual_date_taken(
        &mut self,
        context: &opentelemetry::Context,
@@ -1479,7 +1773,7 @@ impl ExifDao for SqliteExifDao {
                })
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn find_by_content_hash(
@@ -1496,9 +1790,9 @@ impl ExifDao for SqliteExifDao {
                .filter(content_hash.eq(hash))
                .first::<ImageExif>(connection.deref_mut())
                .optional()
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_rel_paths_sharing_content(
@@ -1521,7 +1815,7 @@ impl ExifDao for SqliteExifDao {
                .select(content_hash)
                .first::<Option<String>>(connection.deref_mut())
                .optional()
-                .map_err(|_| anyhow::anyhow!("Query error"))?
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))?
                .flatten();

            let paths = match hash {
@@ -1530,13 +1824,13 @@ impl ExifDao for SqliteExifDao {
                    .select(rel_path)
                    .distinct()
                    .load::<String>(connection.deref_mut())
-                    .map_err(|_| anyhow::anyhow!("Query error"))?,
+                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))?,
                None => vec![rel_path_val.to_string()],
            };

            Ok(paths)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_rel_paths_for_library(
@@ -1553,9 +1847,9 @@ impl ExifDao for SqliteExifDao {
                .filter(library_id.eq(library_id_val))
                .select(rel_path)
                .load::<String>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn find_content_hash_anywhere(
@@ -1575,9 +1869,9 @@ impl ExifDao for SqliteExifDao {
                .first::<Option<String>>(connection.deref_mut())
                .optional()
                .map(|opt| opt.flatten())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_rel_paths_by_hash(
@@ -1595,9 +1889,9 @@ impl ExifDao for SqliteExifDao {
                .select(rel_path)
                .distinct()
                .load::<String>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_rel_paths_for_hashes(
@@ -1624,14 +1918,14 @@ impl ExifDao for SqliteExifDao {
                    .select((content_hash.assume_not_null(), rel_path))
                    .distinct()
                    .load::<(String, String)>(connection.deref_mut())
-                    .map_err(|_| anyhow::anyhow!("Query error"))?;
+                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
                for (hash, path) in rows {
                    out.entry(hash).or_default().push(path);
                }
            }
            Ok(out)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn list_rel_paths_for_libraries(
@@ -1697,9 +1991,9 @@ impl ExifDao for SqliteExifDao {

            query
                .load::<(i32, String)>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn delete_exif_by_library(
@@ -1718,9 +2012,9 @@ impl ExifDao for SqliteExifDao {
            )
            .execute(self.connection.lock().unwrap().deref_mut())
            .map(|_| ())
-            .map_err(|_| anyhow::anyhow!("Delete error"))
+            .map_err(|e| anyhow::anyhow!("Delete error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn count_for_library(
@@ -1735,9 +2029,9 @@ impl ExifDao for SqliteExifDao {
                .filter(library_id.eq(library_id_val))
                .count()
                .get_result::<i64>(self.connection.lock().unwrap().deref_mut())
-                .map_err(|_| anyhow::anyhow!("Count error"))
+                .map_err(|e| anyhow::anyhow!("Count error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn list_rel_paths_for_library_page(
@@ -1761,10 +2055,10 @@ impl ExifDao for SqliteExifDao {
                    .limit(limit)
                    .offset(offset)
                    .load::<(i32, String)>(self.connection.lock().unwrap().deref_mut())
-                    .map_err(|_| anyhow::anyhow!("Query error"))
+                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
            },
        )
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_rows_missing_perceptual_hash(
@@ -1809,10 +2103,10 @@ impl ExifDao for SqliteExifDao {
                    .order(id.asc())
                    .limit(limit)
                    .load::<(i32, String)>(connection.deref_mut())
-                    .map_err(|_| anyhow::anyhow!("Query error"))
+                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
            },
        )
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn backfill_perceptual_hash(
@@ -1836,11 +2130,12 @@ impl ExifDao for SqliteExifDao {
            .set((phash_64.eq(phash_val), dhash_64.eq(dhash_val)))
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|_| anyhow::anyhow!("Update error"))
+            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

+    #[allow(clippy::type_complexity)]
    fn list_duplicates_exact(
        &mut self,
        context: &opentelemetry::Context,
@@ -1867,7 +2162,7 @@ impl ExifDao for SqliteExifDao {
                    q = q.filter(library_id.eq(lib));
                }
                q.load::<String>(connection.deref_mut())
-                    .map_err(|_| anyhow::anyhow!("Query error"))?
+                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))?
            };

            if dup_hashes.is_empty() {
@@ -1914,7 +2209,7 @@ impl ExifDao for SqliteExifDao {
                Option<i64>,
            )> = q
                .load(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))?;
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))?;

            Ok(rows
                .into_iter()
@@ -1933,9 +2228,10 @@ impl ExifDao for SqliteExifDao {
                })
                .collect())
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

+    #[allow(clippy::type_complexity)]
    fn list_perceptual_candidates(
        &mut self,
        context: &opentelemetry::Context,
@@ -1995,7 +2291,7 @@ impl ExifDao for SqliteExifDao {
                Option<i64>,
            )> = q
                .load(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))?;
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))?;

            // Dedup keyed on content_hash, keeping the first occurrence
            // (deterministic by the SQL ORDER BY: lowest library_id,
@@ -2021,7 +2317,7 @@ impl ExifDao for SqliteExifDao {
            }
            Ok(out)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn list_image_paths(
@@ -2046,9 +2342,9 @@ impl ExifDao for SqliteExifDao {
                q = q.filter(duplicate_of_hash.is_null());
            }
            q.load::<(i32, String)>(connection.deref_mut())
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn lookup_duplicate_row(
@@ -2108,9 +2404,9 @@ impl ExifDao for SqliteExifDao {
                        duplicate_decided_at: r.10,
                    })
                })
-                .map_err(|_| anyhow::anyhow!("Query error"))
+                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn set_duplicate_of(
@@ -2137,9 +2433,9 @@ impl ExifDao for SqliteExifDao {
            ))
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|_| anyhow::anyhow!("Update error"))
+            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn clear_duplicate_of(
@@ -2164,9 +2460,9 @@ impl ExifDao for SqliteExifDao {
            ))
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|_| anyhow::anyhow!("Update error"))
+            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn union_perceptual_tags(
@@ -2204,9 +2500,9 @@ impl ExifDao for SqliteExifDao {
            .bind::<diesel::sql_types::Text, _>(survivor_hash)
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|_| anyhow::anyhow!("Tag union error"))
+            .map_err(|e| anyhow::anyhow!("Tag union error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }
 }

@@ -1,9 +1,75 @@
 use crate::database::schema::{
-    entities, entity_facts, entity_photo_links, favorites, image_exif, libraries, personas,
-    photo_insights, users, video_preview_clips,
+    entities, entity_facts, entity_photo_links, favorites, image_exif, insight_generation_jobs,
+    libraries, personas, photo_insights, users, video_preview_clips,
 };
 use serde::Serialize;

+/// Possible statuses for an insight generation job.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, FromSqlRow)]
+#[serde(rename_all = "snake_case")]
+pub enum InsightJobStatus {
+    Running,
+    Completed,
+    Failed,
+    Cancelled,
+}
+
+impl InsightJobStatus {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::Running => "running",
+            Self::Completed => "completed",
+            Self::Failed => "failed",
+            Self::Cancelled => "cancelled",
+        }
+    }
+
+    pub fn parse(s: &str) -> Self {
+        match s {
+            "running" => Self::Running,
+            "completed" => Self::Completed,
+            "failed" => Self::Failed,
+            "cancelled" => Self::Cancelled,
+            other => {
+                log::warn!(
+                    "Unknown InsightJobStatus value: {:?}, treating as failed",
+                    other
+                );
+                Self::Failed
+            }
+        }
+    }
+}
+
+impl std::fmt::Display for InsightJobStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+/// Type of insight generation (standard vs agentic).
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum InsightGenerationType {
+    Standard,
+    Agentic,
+}
+
+impl InsightGenerationType {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::Standard => "standard",
+            Self::Agentic => "agentic",
+        }
+    }
+}
+
+impl std::fmt::Display for InsightGenerationType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
 #[derive(Insertable)]
 #[diesel(table_name = users)]
 pub struct InsertUser<'a> {
@@ -114,6 +180,15 @@ pub struct ImageExif {
    /// Snapshot of the prior `date_taken_source` taken on first manual
    /// override. NULL when no override is active.
    pub original_date_taken_source: Option<String>,
+    /// L2-normalized CLIP image embedding (raw little-endian float32 bytes;
+    /// length depends on the model — 768×4 for ViT-L/14, 512×4 for ViT-B/32).
+    /// NULL until Apollo's CLIP service has encoded this photo via the
+    /// backfill drain. Used by `/photos/search` for semantic queries.
+    pub clip_embedding: Option<Vec<u8>>,
+    /// Which CLIP model produced `clip_embedding` (e.g. `"ViT-L/14"`). A
+    /// swap of `APOLLO_CLIP_MODEL` re-eligibilizes rows whose stored
+    /// version differs so the drain rebuilds them.
+    pub clip_model_version: Option<String>,
 }

 #[derive(Insertable)]
@@ -143,6 +218,15 @@ pub struct InsertPhotoInsight {
    /// inserted before the hash is available stay null and the
    /// reconciliation pass backfills them.
    pub content_hash: Option<String>,
+    pub num_ctx: Option<i32>,
+    pub temperature: Option<f32>,
+    pub top_p: Option<f32>,
+    pub top_k: Option<i32>,
+    pub min_p: Option<f32>,
+    pub system_prompt: Option<String>,
+    pub persona_id: Option<String>,
+    pub prompt_eval_count: Option<i32>,
+    pub eval_count: Option<i32>,
 }

 #[derive(Serialize, Queryable, Clone, Debug)]
@@ -162,6 +246,15 @@ pub struct PhotoInsight {
    pub backend: String,
    pub fewshot_source_ids: Option<String>,
    pub content_hash: Option<String>,
+    pub num_ctx: Option<i32>,
+    pub temperature: Option<f32>,
+    pub top_p: Option<f32>,
+    pub top_k: Option<i32>,
+    pub min_p: Option<f32>,
+    pub system_prompt: Option<String>,
+    pub persona_id: Option<String>,
+    pub prompt_eval_count: Option<i32>,
+    pub eval_count: Option<i32>,
 }

 // --- Libraries ---
@@ -385,3 +478,30 @@ pub struct VideoPreviewClip {
    pub created_at: String,
    pub updated_at: String,
 }
+
+#[derive(Insertable)]
+#[diesel(table_name = insight_generation_jobs)]
+pub struct InsertInsightGenerationJob {
+    pub library_id: i32,
+    #[diesel(column_name = file_path)]
+    pub path: String,
+    #[diesel(column_name = generation_type)]
+    pub gen_type: String,
+    pub status: String,
+    pub started_at: i64,
+}
+
+#[derive(Queryable, Serialize, Clone, Debug)]
+pub struct InsightGenerationJob {
+    pub id: i32,
+    pub library_id: i32,
+    #[diesel(column_name = file_path)]
+    pub path: String,
+    #[diesel(column_name = generation_type)]
+    pub gen_type: String,
+    pub status: String,
+    pub started_at: i64,
+    pub completed_at: Option<i64>,
+    pub result_insight_id: Option<i32>,
+    pub error_message: Option<String>,
+}
@@ -119,7 +119,7 @@ impl PersonaDao for SqlitePersonaDao {
                .load::<Persona>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_persona(
@@ -138,7 +138,7 @@ impl PersonaDao for SqlitePersonaDao {
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn create_persona(
@@ -178,7 +178,7 @@ impl PersonaDao for SqlitePersonaDao {
                .first::<Persona>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn update_persona(
@@ -241,7 +241,7 @@ impl PersonaDao for SqlitePersonaDao {
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn delete_persona(
@@ -258,7 +258,7 @@ impl PersonaDao for SqlitePersonaDao {
                .map_err(|e| anyhow::anyhow!("Delete error: {}", e))?;
            Ok(n > 0)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn bulk_import(
@@ -294,7 +294,7 @@ impl PersonaDao for SqlitePersonaDao {
            }
            Ok(inserted)
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }
 }

@@ -96,7 +96,7 @@ impl PreviewDao for SqlitePreviewDao {
                .map(|_| ())
                .map_err(|e| anyhow::anyhow!("Insert error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn update_status(
@@ -126,7 +126,7 @@ impl PreviewDao for SqlitePreviewDao {
                .map(|_| ())
                .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
+        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
    }

    fn get_preview(
@@ -148,7 +148,7 @@ impl PreviewDao for SqlitePreviewDao {
                Err(e) => Err(anyhow::anyhow!("Query error: {}", e)),
            }
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_previews_batch(
@@ -170,7 +170,7 @@ impl PreviewDao for SqlitePreviewDao {
                .load::<VideoPreviewClip>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_by_status(
@@ -188,7 +188,7 @@ impl PreviewDao for SqlitePreviewDao {
                .load::<VideoPreviewClip>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }

@@ -57,30 +57,28 @@ impl ReconcileStats {
 /// watcher tick. Errors are logged but never propagated; reconciliation
 /// is best-effort and a transient DB hiccup must not stall the watcher.
 pub fn run(conn: &mut SqliteConnection) -> ReconcileStats {
-    let mut stats = ReconcileStats::default();
-
-    stats.tagged_photo_hashes_filled = match backfill_tagged_photo_hashes(conn) {
-        Ok(n) => n,
-        Err(e) => {
-            warn!("reconcile: tagged_photo hash backfill failed: {:?}", e);
-            0
-        }
-    };
-
-    stats.photo_insights_hashes_filled = match backfill_photo_insights_hashes(conn) {
-        Ok(n) => n,
-        Err(e) => {
-            warn!("reconcile: photo_insights hash backfill failed: {:?}", e);
-            0
-        }
-    };
-
-    stats.photo_insights_demoted = match collapse_insight_currents(conn) {
-        Ok(n) => n,
-        Err(e) => {
-            warn!("reconcile: photo_insights scalar merge failed: {:?}", e);
-            0
-        }
+    let stats = ReconcileStats {
+        tagged_photo_hashes_filled: match backfill_tagged_photo_hashes(conn) {
+            Ok(n) => n,
+            Err(e) => {
+                warn!("reconcile: tagged_photo hash backfill failed: {:?}", e);
+                0
+            }
+        },
+        photo_insights_hashes_filled: match backfill_photo_insights_hashes(conn) {
+            Ok(n) => n,
+            Err(e) => {
+                warn!("reconcile: photo_insights hash backfill failed: {:?}", e);
+                0
+            }
+        },
+        photo_insights_demoted: match collapse_insight_currents(conn) {
+            Ok(n) => n,
+            Err(e) => {
+                warn!("reconcile: photo_insights scalar merge failed: {:?}", e);
+                0
+            }
+        },
    };

    if stats.changed() {
@@ -138,6 +138,8 @@ diesel::table! {
        date_taken_source -> Nullable<Text>,
        original_date_taken -> Nullable<BigInt>,
        original_date_taken_source -> Nullable<Text>,
+        clip_embedding -> Nullable<Binary>,
+        clip_model_version -> Nullable<Text>,
    }
 }

@@ -214,6 +216,15 @@ diesel::table! {
        backend -> Text,
        fewshot_source_ids -> Nullable<Text>,
        content_hash -> Nullable<Text>,
+        num_ctx -> Nullable<Integer>,
+        temperature -> Nullable<Float>,
+        top_p -> Nullable<Float>,
+        top_k -> Nullable<Integer>,
+        min_p -> Nullable<Float>,
+        system_prompt -> Nullable<Text>,
+        persona_id -> Nullable<Text>,
+        prompt_eval_count -> Nullable<Integer>,
+        eval_count -> Nullable<Integer>,
    }
 }

@@ -269,12 +280,27 @@ diesel::table! {
    }
 }

+diesel::table! {
+    insight_generation_jobs (id) {
+        id -> Integer,
+        library_id -> Integer,
+        file_path -> Text,
+        generation_type -> Text,
+        status -> Text,
+        started_at -> BigInt,
+        completed_at -> Nullable<BigInt>,
+        result_insight_id -> Nullable<Integer>,
+        error_message -> Nullable<Text>,
+    }
+}
+
 diesel::joinable!(entity_facts -> photo_insights (source_insight_id));
 diesel::joinable!(entity_photo_links -> entities (entity_id));
 diesel::joinable!(entity_photo_links -> libraries (library_id));
 diesel::joinable!(face_detections -> libraries (library_id));
 diesel::joinable!(face_detections -> persons (person_id));
 diesel::joinable!(image_exif -> libraries (library_id));
+diesel::joinable!(insight_generation_jobs -> libraries (library_id));
 diesel::joinable!(personas -> users (user_id));
 diesel::joinable!(persons -> entities (entity_id));
 diesel::joinable!(photo_insights -> libraries (library_id));
@@ -290,6 +316,7 @@ diesel::allow_tables_to_appear_in_same_query!(
    face_detections,
    favorites,
    image_exif,
+    insight_generation_jobs,
    libraries,
    location_history,
    personas,
@@ -227,7 +227,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                source_file: search.source_file,
            })
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn store_searches_batch(
@@ -283,7 +283,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {

            Ok(inserted)
        })
-        .map_err(|_| DbError::new(DbErrorKind::InsertError))
+        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
    }

    fn find_searches_in_range(
@@ -310,7 +310,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
            .map(|rows| rows.into_iter().map(|r| r.to_search_record()).collect())
            .map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn find_similar_searches(
@@ -372,7 +372,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                .map(|(_, search)| search)
                .collect())
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn find_relevant_searches_hybrid(
@@ -459,7 +459,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                    .collect())
            }
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn search_exists(
@@ -490,7 +490,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {

            Ok(result.count > 0)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }

    fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
@@ -513,6 +513,6 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {

            Ok(result.count)
        })
-        .map_err(|_| DbError::new(DbErrorKind::QueryError))
+        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
    }
 }
@@ -1024,9 +1024,14 @@ impl FaceDao for SqliteFaceDao {
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
-                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
-                    .first(conn.deref_mut())
-                    .with_context(|| "stats: scanned")?
+                q.select(
+                    #[allow(deprecated)]
+                    {
+                        diesel::dsl::count_distinct(face_detections::content_hash)
+                    },
+                )
+                .first(conn.deref_mut())
+                .with_context(|| "stats: scanned")?
            };
            let with_faces: i64 = {
                let mut q = face_detections::table
@@ -1035,9 +1040,14 @@ impl FaceDao for SqliteFaceDao {
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
-                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
-                    .first(conn.deref_mut())
-                    .with_context(|| "stats: with_faces")?
+                q.select(
+                    #[allow(deprecated)]
+                    {
+                        diesel::dsl::count_distinct(face_detections::content_hash)
+                    },
+                )
+                .first(conn.deref_mut())
+                .with_context(|| "stats: with_faces")?
            };
            let no_faces: i64 = {
                let mut q = face_detections::table
@@ -1046,9 +1056,14 @@ impl FaceDao for SqliteFaceDao {
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
-                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
-                    .first(conn.deref_mut())
-                    .with_context(|| "stats: no_faces")?
+                q.select(
+                    #[allow(deprecated)]
+                    {
+                        diesel::dsl::count_distinct(face_detections::content_hash)
+                    },
+                )
+                .first(conn.deref_mut())
+                .with_context(|| "stats: no_faces")?
            };
            let failed: i64 = {
                let mut q = face_detections::table
@@ -1057,9 +1072,14 @@ impl FaceDao for SqliteFaceDao {
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
-                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
-                    .first(conn.deref_mut())
-                    .with_context(|| "stats: failed")?
+                q.select(
+                    #[allow(deprecated)]
+                    {
+                        diesel::dsl::count_distinct(face_detections::content_hash)
+                    },
+                )
+                .first(conn.deref_mut())
+                .with_context(|| "stats: failed")?
            };
            // Image-extension filter mirrors `list_unscanned_candidates` so
            // SCANNED can actually reach 100%: videos sit in `image_exif` but
@@ -2118,7 +2138,10 @@ async fn update_face_handler<D: FaceDao>(
            // the short context string we surface in the response body —
            // SQLITE_BUSY here usually means another DAO's writer held the
            // lock past `busy_timeout` (5s), which is invisible in `{}`.
-            warn!("PATCH /image/faces/{}: 500 — update_face failed: {:#}", id, e);
+            warn!(
+                "PATCH /image/faces/{}: 500 — update_face failed: {:#}",
+                id, e
+            );
            return HttpResponse::InternalServerError().body(e.to_string());
        }
    };
@@ -53,6 +53,7 @@ pub fn walk_library_files(base_path: &Path, excluded_dirs: &[String]) -> Vec<Dir
 /// used by the watcher's quick-scan tick to skip the long tail. Files
 /// whose metadata can't be read are kept; the caller's batch EXIF lookup
 /// dedups against existing rows.
+#[allow(dead_code)]
 pub fn enumerate_indexable_files(
    base_path: &Path,
    excluded_dirs: &[String],
@@ -22,8 +22,42 @@ pub fn needs_ffmpeg_thumbnail(path: &Path) -> bool {
 /// Supported video file extensions
 pub const VIDEO_EXTENSIONS: &[&str] = &["mp4", "mov", "avi", "mkv"];

+/// Audio file extensions accepted as voice-clone references (TTS). Mirrors
+/// the formats Chatterbox can decode (wav/mp3/flac/m4a/aac/ogg).
+pub const AUDIO_EXTENSIONS: &[&str] = &["wav", "mp3", "flac", "m4a", "aac", "ogg", "oga", "opus"];
+
+/// Filenames that are filesystem metadata, not real media — exact
+/// basename match. Extend if a new platform sidecar appears (Windows
+/// Thumbs.db / desktop.ini live here too if those libraries land).
+const METADATA_FILENAMES: &[&str] = &[".DS_Store"];
+
+/// True if the basename is a filesystem metadata sidecar that should be
+/// invisible to every media predicate.
+///
+/// macOS writes `._<name>` AppleDouble companions when copying to
+/// non-HFS volumes — each holds the extended attributes of `<name>`,
+/// NOT a copy of the bytes. Same extension as the real file, so a
+/// pure-extension match treats `._photo.jpg` as a JPEG, ships it to
+/// the decoder, and accumulates failed rows: face_detections
+/// `status='failed'`, clip_embedding `status='failed'`, plus a
+/// pointless `image_exif` row whose `content_hash` will be the hash
+/// of the metadata blob. The downstream noise (failed-row counts that
+/// never go to zero, 422 bursts to Apollo, evictor timer reset by
+/// those 422s) is the visible damage. `.DS_Store` is the per-directory
+/// version (Finder view state) — no extension, but cheap to guard
+/// here too in case some future predicate matches by content type.
+pub fn is_filesystem_metadata(path: &Path) -> bool {
+    let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
+        return false;
+    };
+    name.starts_with("._") || METADATA_FILENAMES.contains(&name)
+}
+
 /// Check if a path has an image extension
 pub fn is_image_file(path: &Path) -> bool {
+    if is_filesystem_metadata(path) {
+        return false;
+    }
    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
        let ext_lower = ext.to_lowercase();
        IMAGE_EXTENSIONS.contains(&ext_lower.as_str())
@@ -34,6 +68,9 @@ pub fn is_image_file(path: &Path) -> bool {

 /// Check if a path has a video extension
 pub fn is_video_file(path: &Path) -> bool {
+    if is_filesystem_metadata(path) {
+        return false;
+    }
    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
        let ext_lower = ext.to_lowercase();
        VIDEO_EXTENSIONS.contains(&ext_lower.as_str())
@@ -42,6 +79,19 @@ pub fn is_video_file(path: &Path) -> bool {
    }
 }

+/// Check if a path has an audio extension (voice-clone references)
+pub fn is_audio_file(path: &Path) -> bool {
+    if is_filesystem_metadata(path) {
+        return false;
+    }
+    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
+        let ext_lower = ext.to_lowercase();
+        AUDIO_EXTENSIONS.contains(&ext_lower.as_str())
+    } else {
+        false
+    }
+}
+
 /// Check if a path has a supported media extension (image or video)
 pub fn is_media_file(path: &Path) -> bool {
    is_image_file(path) || is_video_file(path)
@@ -98,4 +148,46 @@ mod tests {
        assert!(!is_media_file(Path::new("document.txt")));
        assert!(!is_media_file(Path::new("no_extension")));
    }
+
+    #[test]
+    fn test_apple_double_excluded_from_media() {
+        // The bug-of-record: ImageApi was shipping macOS AppleDouble
+        // sidecars to Apollo's CLIP/face decoders, accumulating failed
+        // rows and pinning Apollo's eviction timer with the 422 burst.
+        // Predicate-level guard means every downstream walker
+        // (face_watch, backfill, clip_watch, watcher) inherits the fix
+        // without touching their filters.
+        assert!(!is_image_file(Path::new("._photo.jpg")));
+        assert!(!is_image_file(Path::new("dir/._photo.JPG")));
+        assert!(!is_image_file(Path::new("a/b/._DSC_2182-S.jpg")));
+        assert!(!is_video_file(Path::new("._video.mp4")));
+        assert!(!is_media_file(Path::new("._photo.png")));
+        // A real file that merely starts with "_" (no leading dot) is
+        // not AppleDouble — must NOT be filtered.
+        assert!(is_image_file(Path::new("_photo.jpg")));
+    }
+
+    #[test]
+    fn test_ds_store_excluded() {
+        // Finder per-directory metadata. No image extension so
+        // is_image_file would already say false; the guard makes the
+        // predicate's *reason* explicit and covers a hypothetical
+        // future caller matching by basename.
+        assert!(!is_image_file(Path::new(".DS_Store")));
+        assert!(!is_video_file(Path::new(".DS_Store")));
+        assert!(!is_media_file(Path::new("some/dir/.DS_Store")));
+        assert!(is_filesystem_metadata(Path::new(".DS_Store")));
+        assert!(is_filesystem_metadata(Path::new("dir/.DS_Store")));
+    }
+
+    #[test]
+    fn test_dotfiles_other_than_apple_double_are_unaffected() {
+        // We deliberately scope to `._*` + the exact .DS_Store name —
+        // not all dotfiles — because a user could plausibly name a
+        // cover image `.cover.jpg` and we shouldn't silently drop it.
+        // If that turns out to be wrong, broaden here; for now,
+        // narrow + explicit > broad + surprising.
+        assert!(is_image_file(Path::new(".cover.jpg")));
+        assert!(!is_filesystem_metadata(Path::new(".cover.jpg")));
+    }
 }
@@ -1511,6 +1511,8 @@ mod tests {
            date_taken_source,
            original_date_taken: None,
            original_date_taken_source: None,
+            clip_embedding: None,
+            clip_model_version: None,
        }
    }

@@ -1550,6 +1552,8 @@ mod tests {
                date_taken_source: data.date_taken_source.clone(),
                original_date_taken: None,
                original_date_taken_source: None,
+                clip_embedding: None,
+                clip_model_version: None,
            })
        }

@@ -1596,6 +1600,8 @@ mod tests {
                date_taken_source: data.date_taken_source.clone(),
                original_date_taken: None,
                original_date_taken_source: None,
+                clip_embedding: None,
+                clip_model_version: None,
            })
        }

@@ -1689,6 +1695,21 @@ mod tests {
            Ok(())
        }

+        fn list_distinct_content_hashes(
+            &mut self,
+            _context: &opentelemetry::Context,
+        ) -> Result<Vec<String>, DbError> {
+            Ok(Vec::new())
+        }
+
+        fn list_paths_and_hashes_for_library(
+            &mut self,
+            _context: &opentelemetry::Context,
+            _library_id: i32,
+        ) -> Result<Vec<(String, Option<String>)>, DbError> {
+            Ok(Vec::new())
+        }
+
        fn get_rows_needing_date_backfill(
            &mut self,
            _context: &opentelemetry::Context,
@@ -1917,6 +1938,35 @@ mod tests {
        ) -> Result<(), DbError> {
            Ok(())
        }
+
+        fn list_clip_unencoded_candidates(
+            &mut self,
+            _context: &opentelemetry::Context,
+            _library_id: i32,
+            _limit: i64,
+        ) -> Result<Vec<(String, String)>, DbError> {
+            Ok(Vec::new())
+        }
+
+        fn backfill_clip_embedding(
+            &mut self,
+            _context: &opentelemetry::Context,
+            _library_id: i32,
+            _rel_path: &str,
+            _embedding: &[u8],
+            _model_version: &str,
+        ) -> Result<(), DbError> {
+            Ok(())
+        }
+
+        fn list_clip_index(
+            &mut self,
+            _context: &opentelemetry::Context,
+            _library_ids: &[i32],
+            _model_version: Option<&str>,
+        ) -> Result<Vec<(String, Vec<u8>)>, DbError> {
+            Ok(Vec::new())
+        }
    }

    mod api {
@@ -82,6 +82,209 @@ pub async fn get_image(

    if let Some((library, path)) = resolved {
        let image_size = req.size.unwrap_or(PhotoSize::Full);
+
+        // `size=large|xlarge` is only meaningful for stills — there's no
+        // useful "resized video preview" tier. Videos fall back to the
+        // existing thumb pipeline (which already handles gif/static
+        // selection). `mut` so preview branches can downgrade to `Full`
+        // after a generation failure.
+        let mut image_size = if (image_size == PhotoSize::Large || image_size == PhotoSize::XLarge)
+            && file_types::is_video_file(&path)
+        {
+            PhotoSize::Thumb
+        } else {
+            image_size
+        };
+
+        if image_size == PhotoSize::Large {
+            let relative_path = path
+                .strip_prefix(&library.root_path)
+                .expect("Error stripping library root prefix from large preview");
+            let relative_path_str = relative_path.to_string_lossy().replace('\\', "/");
+            let thumbs = Path::new(&app_state.thumbnail_path);
+            let large_dir = thumbs.join("_large");
+
+            // Lookup chain mirrors the Thumb branch — hash-keyed first so
+            // multi-library deployments share derivative bytes across
+            // libraries, then library-scoped legacy as the fallback for
+            // rows that aren't hashed yet.
+            let hash_large_path: Option<PathBuf> = {
+                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+                match dao.get_exif(&context, &relative_path_str) {
+                    Ok(Some(row)) => row
+                        .content_hash
+                        .as_deref()
+                        .map(|h| content_hash::large_preview_path(thumbs, h)),
+                    _ => None,
+                }
+            };
+            let scoped_legacy_large_path =
+                content_hash::library_scoped_legacy_path(&large_dir, library.id, relative_path);
+
+            let existing = hash_large_path
+                .as_ref()
+                .filter(|p| p.exists())
+                .cloned()
+                .or_else(|| {
+                    if scoped_legacy_large_path.exists() {
+                        Some(scoped_legacy_large_path.clone())
+                    } else {
+                        None
+                    }
+                });
+
+            if let Some(found) = existing
+                && let Ok(file) = NamedFile::open(&found)
+            {
+                span.set_status(Status::Ok);
+                return file
+                    .use_etag(true)
+                    .use_last_modified(true)
+                    .prefer_utf8(true)
+                    .into_response(&request);
+            }
+
+            // Cache miss — generate. Resize + JPEG-encode can take 100–500ms
+            // for a 24MP source (longer for RAW), so run on the blocking pool
+            // to keep the actix worker free. Prefer the hash-keyed
+            // destination when a hash is known so the result is reusable
+            // across libraries that hold the same bytes.
+            let dest = hash_large_path
+                .clone()
+                .unwrap_or_else(|| scoped_legacy_large_path.clone());
+            let src = path.clone();
+            let dest_for_block = dest.clone();
+            let generated = web::block(move || {
+                if let Some(parent) = dest_for_block.parent() {
+                    std::fs::create_dir_all(parent)?;
+                }
+                // Write to a sibling tempfile then atomically rename so a
+                // concurrent reader never observes a half-written JPEG.
+                let tmp = dest_for_block.with_extension("jpg.tmp");
+                crate::thumbnails::generate_large_preview(&src, &tmp)?;
+                std::fs::rename(&tmp, &dest_for_block)?;
+                Ok::<(), std::io::Error>(())
+            })
+            .await;
+
+            match generated {
+                Ok(Ok(())) => {
+                    if let Ok(file) = NamedFile::open(&dest) {
+                        span.set_status(Status::Ok);
+                        return file
+                            .use_etag(true)
+                            .use_last_modified(true)
+                            .prefer_utf8(true)
+                            .into_response(&request);
+                    }
+                }
+                Ok(Err(e)) => {
+                    warn!(
+                        "Large preview generation failed for {:?}: {} — falling back to original",
+                        path, e
+                    );
+                }
+                Err(e) => {
+                    warn!(
+                        "Large preview blocking-pool error for {:?}: {} — falling back to original",
+                        path, e
+                    );
+                }
+            }
+            // Fall through to the Full branch below so the caller gets
+            // *something* useful (the original bytes — or the RAW
+            // embedded preview, which is what the Full branch returns for
+            // unrenderable RAW containers) instead of a 404.
+            image_size = PhotoSize::Full;
+        }
+
+        if image_size == PhotoSize::XLarge {
+            let relative_path = path
+                .strip_prefix(&library.root_path)
+                .expect("Error stripping library root prefix from xlarge preview");
+            let relative_path_str = relative_path.to_string_lossy().replace('\\', "/");
+            let thumbs = Path::new(&app_state.thumbnail_path);
+            let xlarge_dir = thumbs.join("_xlarge");
+
+            let hash_xlarge_path: Option<PathBuf> = {
+                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+                match dao.get_exif(&context, &relative_path_str) {
+                    Ok(Some(row)) => row
+                        .content_hash
+                        .as_deref()
+                        .map(|h| content_hash::xlarge_preview_path(thumbs, h)),
+                    _ => None,
+                }
+            };
+            let scoped_legacy_xlarge_path =
+                content_hash::library_scoped_legacy_path(&xlarge_dir, library.id, relative_path);
+
+            let existing = hash_xlarge_path
+                .as_ref()
+                .filter(|p| p.exists())
+                .cloned()
+                .or_else(|| {
+                    if scoped_legacy_xlarge_path.exists() {
+                        Some(scoped_legacy_xlarge_path.clone())
+                    } else {
+                        None
+                    }
+                });
+
+            if let Some(found) = existing
+                && let Ok(file) = NamedFile::open(&found)
+            {
+                span.set_status(Status::Ok);
+                return file
+                    .use_etag(true)
+                    .use_last_modified(true)
+                    .prefer_utf8(true)
+                    .into_response(&request);
+            }
+
+            let dest = hash_xlarge_path
+                .clone()
+                .unwrap_or_else(|| scoped_legacy_xlarge_path.clone());
+            let src = path.clone();
+            let dest_for_block = dest.clone();
+            let generated = web::block(move || {
+                if let Some(parent) = dest_for_block.parent() {
+                    std::fs::create_dir_all(parent)?;
+                }
+                let tmp = dest_for_block.with_extension("jpg.tmp");
+                crate::thumbnails::generate_xlarge_preview(&src, &tmp)?;
+                std::fs::rename(&tmp, &dest_for_block)?;
+                Ok::<(), std::io::Error>(())
+            })
+            .await;
+
+            match generated {
+                Ok(Ok(())) => {
+                    if let Ok(file) = NamedFile::open(&dest) {
+                        span.set_status(Status::Ok);
+                        return file
+                            .use_etag(true)
+                            .use_last_modified(true)
+                            .prefer_utf8(true)
+                            .into_response(&request);
+                    }
+                }
+                Ok(Err(e)) => {
+                    warn!(
+                        "XLarge preview generation failed for {:?}: {} — falling back to original",
+                        path, e
+                    );
+                }
+                Err(e) => {
+                    warn!(
+                        "XLarge preview blocking-pool error for {:?}: {} — falling back to original",
+                        path, e
+                    );
+                }
+            }
+            image_size = PhotoSize::Full;
+        }
+
        if image_size == PhotoSize::Thumb {
            let relative_path = path
                .strip_prefix(&library.root_path)
@@ -183,14 +386,15 @@ pub async fn get_image(
        // review JPEG, ~1–2 MP). Falls through to NamedFile if no preview is
        // available, which preserves the historical behavior for callers
        // that genuinely want the original bytes.
-        if image_size == PhotoSize::Full && exif::is_tiff_raw(&path) {
-            if let Some(preview) = exif::extract_embedded_jpeg_preview(&path) {
-                span.set_status(Status::Ok);
-                return HttpResponse::Ok()
-                    .content_type("image/jpeg")
-                    .insert_header(("Cache-Control", "public, max-age=3600"))
-                    .body(preview);
-            }
+        if image_size == PhotoSize::Full
+            && exif::is_tiff_raw(&path)
+            && let Some(preview) = exif::extract_embedded_jpeg_preview(&path)
+        {
+            span.set_status(Status::Ok);
+            return HttpResponse::Ok()
+                .content_type("image/jpeg")
+                .insert_header(("Cache-Control", "public, max-age=3600"))
+                .body(preview);
        }

        if let Ok(file) = NamedFile::open(&path) {
@@ -706,7 +910,7 @@ pub async fn set_image_date(
        Ok(row) => {
            span.set_status(Status::Ok);
            HttpResponse::Ok().json(build_metadata_response_for_date_mutation(
-                &library,
+                library,
                &normalized_path,
                row,
            ))
@@ -757,7 +961,7 @@ pub async fn clear_image_date(
        Ok(row) => {
            span.set_status(Status::Ok);
            HttpResponse::Ok().json(build_metadata_response_for_date_mutation(
-                &library,
+                library,
                &normalized_path,
                row,
            ))
@@ -11,190 +11,312 @@ use actix_web::{
    web::{self, Data},
 };
 use log::{debug, error, info, warn};
+use opentelemetry::KeyValue;
 use opentelemetry::trace::{Span, Status, Tracer};
-use opentelemetry::{KeyValue, global};
+use serde::Serialize;

+use crate::content_hash;
 use crate::data::{
    Claims, PreviewClipRequest, PreviewStatusItem, PreviewStatusRequest, PreviewStatusResponse,
    ThumbnailRequest,
 };
-use crate::database::PreviewDao;
+use crate::database::{ExifDao, PreviewDao};
 use crate::files::is_valid_full_path;
 use crate::libraries;
 use crate::otel::{extract_context_from_request, global_tracer};
 use crate::state::AppState;
-use crate::video::actors::{GeneratePreviewClipMessage, ProcessMessage, create_playlist};
+use crate::video::actors::{
+    GeneratePreviewClipMessage, QueueVideosMessage, VideoToQueue, probe_video_stream_meta,
+};
+use crate::video::hls_paths;
+
+/// Response body for `POST /video/generate`. Clients consume
+/// `playlist_url` (hash-keyed, stable across libraries and renames)
+/// and poll for readiness via the URL itself.
+#[derive(Serialize, Debug)]
+struct GenerateVideoResponse {
+    /// Hash-keyed URL to the HLS playlist. Resolves to
+    /// `$VIDEO_PATH/<shard>/<hash>/playlist.m3u8` server-side. Relative
+    /// segment refs inside the playlist resolve correctly because the
+    /// browser appends to this URL's path.
+    playlist_url: String,
+    /// blake3 content hash of the source video. Stable per byte content,
+    /// so duplicate uploads / archive ingests share one set of HLS
+    /// output.
+    content_hash: String,
+    /// `true` iff the playlist file is already on disk. `false` means a
+    /// transcode was queued; clients should retry the URL after a short
+    /// delay (or rely on HLS.js's own retry policy).
+    ready: bool,
+    /// Source-video frame rate in Hz, probed via ffprobe. `None` when the
+    /// probe failed or ffprobe couldn't parse either rate field — clients
+    /// fall back to their own default (typically 30) for frame stepping.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    frame_rate: Option<f32>,
+}

 #[post("/video/generate")]
 pub async fn generate_video(
    _claims: Claims,
    request: HttpRequest,
    app_state: Data<AppState>,
+    exif_dao: Data<std::sync::Mutex<Box<dyn ExifDao>>>,
    body: web::Json<ThumbnailRequest>,
 ) -> impl Responder {
    let tracer = global_tracer();
-
    let context = extract_context_from_request(&request);
    let mut span = tracer.start_with_context("generate_video", &context);

-    let filename = PathBuf::from(&body.path);
+    let preferred_library = libraries::resolve_library_param(&app_state, body.library.as_deref())
+        .ok()
+        .flatten()
+        .unwrap_or_else(|| app_state.primary_library());

-    if let Some(name) = filename.file_name() {
-        let filename = name.to_str().expect("Filename should convert to string");
-        // KNOWN ISSUE (multi-library): playlist filename is the basename
-        // alone, so two source files with the same basename — whether in
-        // different libraries or different subdirs of one library —
-        // overwrite each other's playlists while ffmpeg runs. The
-        // hash-keyed `content_hash::hls_dir` is the long-term answer
-        // (see CLAUDE.md "Multi-library data model"); rewiring the
-        // actor pipeline to use it is out of scope for this branch.
-        // The orphan-cleanup job above already walks every library so
-        // it doesn't false-delete archive playlists.
-        let playlist = format!("{}/{}.m3u8", app_state.video_path, filename);
+    // Try the resolved library first, then fall back to any other library
+    // that actually contains the file — handles union-mode requests where
+    // the mobile client passes no library but the file lives in a
+    // non-primary library. Track which library won so the DB lookup is
+    // scoped correctly.
+    let resolved = is_valid_full_path(&preferred_library.root_path, &body.path, false)
+        .filter(|p| p.exists())
+        .map(|p| (preferred_library.id, preferred_library.root_path.clone(), p))
+        .or_else(|| {
+            app_state.libraries.iter().find_map(|lib| {
+                if lib.id == preferred_library.id {
+                    return None;
+                }
+                is_valid_full_path(&lib.root_path, &body.path, false)
+                    .filter(|p| p.exists())
+                    .map(|p| (lib.id, lib.root_path.clone(), p))
+            })
+        });

-        let library = libraries::resolve_library_param(&app_state, body.library.as_deref())
-            .ok()
-            .flatten()
-            .unwrap_or_else(|| app_state.primary_library());
+    let Some((resolved_library_id, resolved_root, full_path)) = resolved else {
+        span.set_status(Status::error(format!("invalid path {:?}", &body.path)));
+        return HttpResponse::BadRequest().finish();
+    };

-        // Try the resolved library first, then fall back to any other library
-        // that actually contains the file — handles union-mode requests where
-        // the mobile client passes no library but the file lives in a
-        // non-primary library.
-        let resolved = is_valid_full_path(&library.root_path, &body.path, false)
-            .filter(|p| p.exists())
-            .or_else(|| {
-                app_state.libraries.iter().find_map(|lib| {
-                    if lib.id == library.id {
-                        return None;
-                    }
-                    is_valid_full_path(&lib.root_path, &body.path, false).filter(|p| p.exists())
-                })
-            });
+    // Build the rel_path used to look up the row. Forward-slash normalized
+    // so the lookup matches DB rows on Windows — see `rel_path_for_lookup`.
+    let full_path_str = full_path.to_string_lossy().to_string();
+    let rel_path = rel_path_for_lookup(&full_path_str, &resolved_root);

-        if let Some(path) = resolved {
-            if let Ok(child) = create_playlist(path.to_str().unwrap(), &playlist).await {
-                span.add_event(
-                    "playlist_created".to_string(),
-                    vec![KeyValue::new("playlist-name", filename.to_string())],
+    // DB lookup first. Cheap and avoids re-reading the file off disk for
+    // already-ingested videos.
+    let hash_from_db: Option<String> = {
+        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+        match dao.get_exif_batch(
+            &context,
+            Some(resolved_library_id),
+            std::slice::from_ref(&rel_path),
+        ) {
+            Ok(rows) => rows.into_iter().next().and_then(|r| r.content_hash),
+            Err(e) => {
+                warn!(
+                    "exif_dao.get_exif_batch failed for {} (lib {}): {:?}",
+                    rel_path, resolved_library_id, e
                );
-
-                span.set_status(Status::Ok);
-                app_state.stream_manager.do_send(ProcessMessage(
-                    playlist.clone(),
-                    child,
-                    // opentelemetry::Context::new().with_span(span),
-                ));
-            }
-        } else {
-            span.set_status(Status::error(format!("invalid path {:?}", &body.path)));
-            return HttpResponse::BadRequest().finish();
-        }
-
-        HttpResponse::Ok().json(playlist)
-    } else {
-        let message = format!("Unable to get file name: {:?}", filename);
-        error!("{}", message);
-        span.set_status(Status::error(message));
-
-        HttpResponse::BadRequest().finish()
-    }
-}
-
-#[get("/video/stream")]
-pub async fn stream_video(
-    request: HttpRequest,
-    _: Claims,
-    path: web::Query<ThumbnailRequest>,
-    app_state: Data<AppState>,
-) -> impl Responder {
-    let tracer = global::tracer("image-server");
-    let context = extract_context_from_request(&request);
-    let mut span = tracer.start_with_context("stream_video", &context);
-
-    let playlist = &path.path;
-    debug!("Playlist: {}", playlist);
-
-    // Only serve files under video_path (HLS playlists) or base_path (source videos)
-    if playlist.starts_with(&app_state.video_path)
-        || is_valid_full_path(&app_state.base_path, playlist, false).is_some()
-    {
-        match NamedFile::open(playlist) {
-            Ok(file) => {
-                span.set_status(Status::Ok);
-                file.into_response(&request)
-            }
-            _ => {
-                span.set_status(Status::error(format!("playlist not found {}", playlist)));
-                HttpResponse::NotFound().finish()
+                None
            }
        }
+    };
+
+    // Best-effort fallback: compute on-the-fly when the DB row hasn't
+    // been written or is mid-backfill. Read-only — no library mutation.
+    let content_hash_str = match hash_from_db {
+        Some(h) => h,
+        None => match content_hash::compute(&full_path) {
+            Ok(id) => id.content_hash,
+            Err(e) => {
+                error!(
+                    "Failed to compute content_hash for {}: {}",
+                    full_path.display(),
+                    e
+                );
+                span.set_status(Status::error(format!("hash compute failed: {}", e)));
+                return HttpResponse::InternalServerError().finish();
+            }
+        },
+    };
+
+    let video_dir = std::path::Path::new(&app_state.video_path);
+    let playlist_path = hls_paths::playlist_for_hash(video_dir, &content_hash_str);
+    let sentinel_path = hls_paths::sentinel_for_hash(video_dir, &content_hash_str);
+    let ready = playlist_path.exists();
+
+    if !ready && !sentinel_path.exists() {
+        // Kick off generation via the existing actor pipeline. Fire-and-
+        // forget — the playlist appears at `playlist_path` once ffmpeg
+        // + rename complete. The client polls the URL.
+        info!(
+            "/video/generate: queueing playlist for {} (hash={})",
+            full_path.display(),
+            &content_hash_str[..content_hash_str.len().min(16)]
+        );
+        app_state.playlist_manager.do_send(QueueVideosMessage {
+            videos: vec![VideoToQueue {
+                video_path: full_path.clone(),
+                content_hash: content_hash_str.clone(),
+            }],
+        });
+        span.add_event(
+            "playlist_queued",
+            vec![KeyValue::new("content_hash", content_hash_str.clone())],
+        );
+    } else if ready {
+        span.add_event(
+            "playlist_already_present",
+            vec![KeyValue::new("content_hash", content_hash_str.clone())],
+        );
    } else {
-        span.set_status(Status::error(format!("playlist not valid {}", playlist)));
-        HttpResponse::BadRequest().finish()
+        // Sentinel present — past transcode attempt failed. Return the
+        // URL anyway (it'll 404 / 5xx at fetch time) so the client gets
+        // a deterministic answer. Operator must delete the sentinel to
+        // force a retry.
+        warn!(
+            "/video/generate: unsupported sentinel present for {} (hash={}); not re-queueing",
+            full_path.display(),
+            &content_hash_str[..content_hash_str.len().min(16)]
+        );
    }
+
+    let playlist_url = format!(
+        "/video/hls/{}/{}",
+        content_hash_str,
+        hls_paths::PLAYLIST_FILENAME
+    );
+
+    // Probe the source for frame rate so the mobile scrubber can step at
+    // the right interval. Cheap (~tens of ms) and only runs once per video
+    // open. Probe failures degrade silently — clients have a fallback.
+    let frame_rate = probe_video_stream_meta(&full_path.to_string_lossy())
+        .await
+        .frame_rate;
+
+    span.set_status(Status::Ok);
+    HttpResponse::Ok().json(GenerateVideoResponse {
+        playlist_url,
+        content_hash: content_hash_str,
+        ready,
+        frame_rate,
+    })
 }

-#[get("/video/{path}")]
-pub async fn get_video_part(
+/// Serve HLS playlist or segment files under the hash-keyed layout
+/// `$VIDEO_PATH/<shard>/<hash>/<file>`. The matched `{file}` must be
+/// either `playlist.m3u8` or a `segment_NNN.ts` style segment; any other
+/// shape is 400'd to defend against operators stashing other content in
+/// the hash dir.
+#[get("/video/hls/{hash}/{file}")]
+pub async fn stream_hls_file(
    request: HttpRequest,
    _: Claims,
-    path: web::Path<ThumbnailRequest>,
+    path: web::Path<(String, String)>,
    app_state: Data<AppState>,
 ) -> impl Responder {
    let tracer = global_tracer();
    let context = extract_context_from_request(&request);
-    let mut span = tracer.start_with_context("get_video_part", &context);
+    let mut span = tracer.start_with_context("stream_hls_file", &context);

-    let part = &path.path;
-    debug!("Video part: {}", part);
+    let (hash, file) = path.into_inner();
+    if !is_valid_hash(&hash) {
+        span.set_status(Status::error("invalid hash"));
+        return HttpResponse::BadRequest().body("invalid hash");
+    }
+    if !is_allowed_hls_filename(&file) {
+        span.set_status(Status::error("invalid file"));
+        return HttpResponse::BadRequest().body("invalid file");
+    }

-    let mut file_part = PathBuf::new();
-    file_part.push(app_state.video_path.clone());
-    file_part.push(part);
+    let shard = &hash[..2];
+    let file_path = PathBuf::from(&app_state.video_path)
+        .join(shard)
+        .join(&hash)
+        .join(&file);

-    // Guard against directory traversal attacks
+    // Path-traversal guard: canonicalize both sides and require the file
+    // to live under `app_state.video_path`. `is_valid_hash` /
+    // `is_allowed_hls_filename` already block dangerous strings, but
+    // belt-and-suspenders here is cheap.
    let canonical_base = match std::fs::canonicalize(&app_state.video_path) {
-        Ok(path) => path,
+        Ok(p) => p,
        Err(e) => {
-            error!("Failed to canonicalize video path: {:?}", e);
-            span.set_status(Status::error("Invalid video path configuration"));
+            error!("Failed to canonicalize VIDEO_PATH: {:?}", e);
+            span.set_status(Status::error("VIDEO_PATH not canonicalisable"));
            return HttpResponse::InternalServerError().finish();
        }
    };
-
-    let canonical_file = match std::fs::canonicalize(&file_part) {
-        Ok(path) => path,
+    let canonical_file = match std::fs::canonicalize(&file_path) {
+        Ok(p) => p,
        Err(_) => {
-            warn!("Video part not found or invalid: {:?}", file_part);
-            span.set_status(Status::error(format!("Video part not found '{}'", part)));
+            debug!("HLS file not found: {}", file_path.display());
+            span.set_status(Status::error("not found"));
            return HttpResponse::NotFound().finish();
        }
    };
-
-    // Ensure the resolved path is still within the video directory
    if !canonical_file.starts_with(&canonical_base) {
-        warn!("Directory traversal attempt detected: {:?}", part);
-        span.set_status(Status::error("Invalid video path"));
+        warn!(
+            "Path traversal attempt: {} resolved outside VIDEO_PATH",
+            file_path.display()
+        );
+        span.set_status(Status::error("traversal"));
        return HttpResponse::Forbidden().finish();
    }

    match NamedFile::open(&canonical_file) {
-        Ok(file) => {
+        Ok(f) => {
            span.set_status(Status::Ok);
-            file.into_response(&request)
+            f.into_response(&request)
        }
-        _ => {
-            error!("Video part not found: {:?}", file_part);
-            span.set_status(Status::error(format!(
-                "Video part not found '{}'",
-                file_part.to_str().unwrap()
-            )));
+        Err(_) => {
+            span.set_status(Status::error("not found"));
            HttpResponse::NotFound().finish()
        }
    }
 }

+/// 64 lowercase-or-upper hex chars. Strict so we don't accept arbitrary
+/// strings that might canonicalize into trouble.
+fn is_valid_hash(s: &str) -> bool {
+    s.len() == 64 && s.bytes().all(|b| b.is_ascii_hexdigit())
+}
+
+/// Compute the forward-slash `rel_path` used to look up a video's
+/// `image_exif` row, from its absolute path string and the library root.
+///
+/// Normalizing to forward slashes is essential on Windows: `file_scan`
+/// stores rel_paths forward-slash regardless of OS, but a raw strip of a
+/// backslash Windows path (`Z:\...\pic\Melissa\clip.mp4`) yields
+/// `Melissa\clip.mp4`. `get_exif_batch` does an exact match with no
+/// normalization, so the backslash form misses and the handler falls back
+/// to re-hashing the entire file on every request.
+fn rel_path_for_lookup(full_path_str: &str, resolved_root: &str) -> String {
+    full_path_str
+        .strip_prefix(resolved_root)
+        .unwrap_or(full_path_str)
+        .trim_start_matches(['/', '\\'])
+        .replace('\\', "/")
+}
+
+/// Allowed file names inside a hash dir. `playlist.m3u8` plus segment
+/// files matching the `segment_NNN.ts` template that `PlaylistGenerator`
+/// writes via `hls_paths::SEGMENT_TEMPLATE`. Anything else (including
+/// `.tmp`, `.unsupported`, dotfiles) returns 400 — these are internal
+/// artifacts the client should never request.
+fn is_allowed_hls_filename(name: &str) -> bool {
+    if name == hls_paths::PLAYLIST_FILENAME {
+        return true;
+    }
+    if let Some(rest) = name.strip_prefix("segment_")
+        && let Some(num) = rest.strip_suffix(".ts")
+        && !num.is_empty()
+        && num.bytes().all(|b| b.is_ascii_digit())
+    {
+        return true;
+    }
+    false
+}
+
 #[get("/video/preview")]
 pub async fn get_video_preview(
    _claims: Claims,
@@ -427,6 +549,98 @@ mod tests {
    use crate::testhelpers::TestPreviewDao;
    use actix_web::App;

+    #[test]
+    fn is_valid_hash_requires_64_ascii_hex() {
+        assert!(is_valid_hash(&"a".repeat(64)));
+        assert!(is_valid_hash(&"F".repeat(64)));
+        assert!(is_valid_hash(&format!("ab{}", "0".repeat(62))));
+
+        assert!(!is_valid_hash(&"a".repeat(63)));
+        assert!(!is_valid_hash(&"a".repeat(65)));
+        // Anything outside the hex alphabet — including '/', '.', '..' —
+        // is rejected up front so the path-traversal canonicalisation
+        // never has to defend the boundary alone.
+        assert!(!is_valid_hash(&format!("/{}", "a".repeat(63))));
+        assert!(!is_valid_hash(&format!("..{}", "a".repeat(62))));
+        assert!(!is_valid_hash(&"g".repeat(64)));
+    }
+
+    #[test]
+    fn is_allowed_hls_filename_accepts_only_playlist_and_segments() {
+        assert!(is_allowed_hls_filename("playlist.m3u8"));
+        assert!(is_allowed_hls_filename("segment_000.ts"));
+        assert!(is_allowed_hls_filename("segment_999.ts"));
+        assert!(is_allowed_hls_filename("segment_0.ts"));
+
+        // Internal artifacts the client should never request.
+        assert!(!is_allowed_hls_filename("playlist.m3u8.tmp"));
+        assert!(!is_allowed_hls_filename("playlist.unsupported"));
+        // Traversal / path components — defence in depth alongside
+        // the actix path matcher itself.
+        assert!(!is_allowed_hls_filename(".."));
+        assert!(!is_allowed_hls_filename("../etc/passwd"));
+        assert!(!is_allowed_hls_filename("segment_abc.ts"));
+        assert!(!is_allowed_hls_filename("segment_.ts"));
+        assert!(!is_allowed_hls_filename(""));
+    }
+
+    #[test]
+    fn rel_path_for_lookup_normalizes_windows_separators() {
+        // Windows: backslash root + backslash full path. The stored row is
+        // forward-slash (`Melissa/clip.mp4`), so without normalization the
+        // lookup misses and the handler re-hashes the whole file.
+        assert_eq!(
+            rel_path_for_lookup(r"Z:\Media\pic\Melissa\clip.mp4", r"Z:\Media\pic"),
+            "Melissa/clip.mp4"
+        );
+    }
+
+    #[test]
+    fn rel_path_for_lookup_handles_unix_separators() {
+        assert_eq!(
+            rel_path_for_lookup("/media/pic/Melissa/clip.mp4", "/media/pic"),
+            "Melissa/clip.mp4"
+        );
+    }
+
+    #[test]
+    fn rel_path_for_lookup_file_at_root_has_no_separator() {
+        // A file directly in the library root has no internal separator, so
+        // the bug never manifested here — guard against a regression anyway.
+        assert_eq!(
+            rel_path_for_lookup(r"Z:\Media\pic\clip.mp4", r"Z:\Media\pic"),
+            "clip.mp4"
+        );
+        assert_eq!(
+            rel_path_for_lookup("/media/pic/clip.mp4", "/media/pic"),
+            "clip.mp4"
+        );
+    }
+
+    #[test]
+    fn rel_path_for_lookup_strips_leading_separators() {
+        // Both separator styles are trimmed from the front after the root
+        // is stripped, regardless of which form the join produced.
+        assert_eq!(
+            rel_path_for_lookup(r"Z:\Media\pic\sub\a.mp4", r"Z:\Media\pic"),
+            "sub/a.mp4"
+        );
+        assert_eq!(
+            rel_path_for_lookup("/media/pic//sub/a.mp4", "/media/pic"),
+            "sub/a.mp4"
+        );
+    }
+
+    #[test]
+    fn rel_path_for_lookup_falls_back_when_root_does_not_match() {
+        // If the root doesn't prefix the path (e.g. a stale mount), we keep
+        // the whole path but still normalize separators rather than panic.
+        assert_eq!(
+            rel_path_for_lookup(r"D:\other\Melissa\clip.mp4", r"Z:\Media\pic"),
+            "D:/other/Melissa/clip.mp4"
+        );
+    }
+
    fn make_token() -> String {
        let claims = Claims::valid_user("1".to_string());
        jsonwebtoken::encode(
@@ -0,0 +1,409 @@
+//! Per-library HLS readiness: Prometheus gauges + `/hls/stats` endpoint.
+//!
+//! The new hash-keyed pipeline transcodes lazily — most of a freshly
+//! mounted library is "pending" for the first hour, and operators want
+//! a live read on "how much work is left, am I CPU-bound, do I need to
+//! bump `HLS_CONCURRENCY`." This module supplies both surfaces against
+//! the same compute path:
+//!
+//! - **Prometheus gauges** `imageserver_hls_videos_total{library}`,
+//!   `..._with_playlist{library}`, `..._pending{library}`,
+//!   `..._unsupported{library}`. Updated every watcher full-scan tick
+//!   and on every `/hls/stats` request, so the freshness matches
+//!   whichever surface the operator is watching.
+//!
+//! - **`GET /hls/stats`** returns a JSON snapshot of the same counts
+//!   plus a top-level cross-library aggregate. Claims-protected
+//!   (matches every other authenticated read in this crate).
+//!
+//! Cost is O(distinct video hashes per library), each row needing a
+//! single `stat()` on the playlist file. On a 100k-video library that's
+//! noticeable; on a typical home library (few thousand) it's noise.
+//! We call from explicit triggers only — never per-request from
+//! middleware — so the cost is bounded.
+
+use std::collections::HashSet;
+use std::path::Path;
+use std::sync::{Arc, Mutex};
+
+use actix_web::{HttpResponse, Responder, get, web};
+use lazy_static::lazy_static;
+use log::{info, warn};
+use prometheus::IntGaugeVec;
+use serde::Serialize;
+
+use crate::data::Claims;
+use crate::database::ExifDao;
+use crate::file_types;
+use crate::libraries::Library;
+use crate::state::AppState;
+use crate::video::hls_paths;
+
+lazy_static! {
+    pub static ref HLS_VIDEOS_TOTAL: IntGaugeVec = IntGaugeVec::new(
+        prometheus::Opts::new(
+            "imageserver_hls_videos_total",
+            "Distinct video content hashes per library known to image_exif",
+        ),
+        &["library"],
+    )
+    .expect("HLS_VIDEOS_TOTAL");
+    pub static ref HLS_VIDEOS_WITH_PLAYLIST: IntGaugeVec = IntGaugeVec::new(
+        prometheus::Opts::new(
+            "imageserver_hls_videos_with_playlist",
+            "Videos whose hash-keyed HLS playlist is already on disk",
+        ),
+        &["library"],
+    )
+    .expect("HLS_VIDEOS_WITH_PLAYLIST");
+    pub static ref HLS_VIDEOS_PENDING: IntGaugeVec = IntGaugeVec::new(
+        prometheus::Opts::new(
+            "imageserver_hls_videos_pending",
+            "Videos whose hash-keyed HLS playlist is not yet on disk",
+        ),
+        &["library"],
+    )
+    .expect("HLS_VIDEOS_PENDING");
+    pub static ref HLS_VIDEOS_UNSUPPORTED: IntGaugeVec = IntGaugeVec::new(
+        prometheus::Opts::new(
+            "imageserver_hls_videos_unsupported",
+            "Videos with an `.unsupported` sentinel — ffmpeg refused; \
+             operator must delete to retry",
+        ),
+        &["library"],
+    )
+    .expect("HLS_VIDEOS_UNSUPPORTED");
+}
+
+/// Per-library HLS readiness snapshot.
+#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
+pub struct HlsLibraryStats {
+    pub library_id: i32,
+    pub library: String,
+    /// Distinct video content hashes (dedupes intra-library bytes-at-N-paths).
+    pub total: usize,
+    /// Of `total`, hashes whose `playlist.m3u8` is on disk.
+    pub with_playlist: usize,
+    /// Of `total`, hashes whose ffmpeg attempt left a `.unsupported`
+    /// sentinel. Counted separately because they won't progress without
+    /// operator intervention (delete the sentinel to retry).
+    pub unsupported: usize,
+    /// `total - (with_playlist + unsupported)` — videos awaiting transcode.
+    pub pending: usize,
+    /// Distinct rel_paths under this library that are video files but
+    /// whose `image_exif.content_hash` is still NULL (mid-backfill).
+    /// These don't yet count toward `total` because they're invisible
+    /// to the hash-keyed pipeline; surfaced so the operator can see
+    /// "hash backfill, then transcode" pipeline depth.
+    pub hashless_videos: usize,
+}
+
+/// JSON response body for `GET /hls/stats`.
+#[derive(Serialize, Debug)]
+pub struct HlsStatsResponse {
+    pub libraries: Vec<HlsLibraryStats>,
+    pub total: usize,
+    pub with_playlist: usize,
+    pub pending: usize,
+    pub unsupported: usize,
+    pub hashless_videos: usize,
+}
+
+/// Compute current readiness per library and publish to Prometheus.
+/// Returns the same data so callers can serialise it. The publish step
+/// is idempotent on the gauge — old values get overwritten.
+pub fn compute_and_publish(
+    libraries: &[Library],
+    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
+    video_dir: &Path,
+) -> Vec<HlsLibraryStats> {
+    let ctx = opentelemetry::Context::new();
+    let mut out = Vec::with_capacity(libraries.len());
+    for lib in libraries {
+        let stats = compute_for_library(&ctx, lib, exif_dao, video_dir);
+        publish_gauges(&stats);
+        out.push(stats);
+    }
+    out
+}
+
+fn publish_gauges(s: &HlsLibraryStats) {
+    HLS_VIDEOS_TOTAL
+        .with_label_values(&[s.library.as_str()])
+        .set(s.total as i64);
+    HLS_VIDEOS_WITH_PLAYLIST
+        .with_label_values(&[s.library.as_str()])
+        .set(s.with_playlist as i64);
+    HLS_VIDEOS_PENDING
+        .with_label_values(&[s.library.as_str()])
+        .set(s.pending as i64);
+    HLS_VIDEOS_UNSUPPORTED
+        .with_label_values(&[s.library.as_str()])
+        .set(s.unsupported as i64);
+}
+
+fn compute_for_library(
+    ctx: &opentelemetry::Context,
+    lib: &Library,
+    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
+    video_dir: &Path,
+) -> HlsLibraryStats {
+    let rows = {
+        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+        match dao.list_paths_and_hashes_for_library(ctx, lib.id) {
+            Ok(r) => r,
+            Err(e) => {
+                warn!(
+                    "hls_stats: list_paths_and_hashes_for_library failed for lib {}: {:?}",
+                    lib.id, e
+                );
+                Vec::new()
+            }
+        }
+    };
+    stats_from_rows(lib, &rows, video_dir)
+}
+
+/// Pure function — same compute as [`compute_for_library`] but works
+/// on caller-supplied rows. Split out so tests don't need a full
+/// `ExifDao` mock; the integration path is exercised through
+/// `compute_and_publish` against the real SQLite DAO at runtime.
+fn stats_from_rows(
+    lib: &Library,
+    rows: &[(String, Option<String>)],
+    video_dir: &Path,
+) -> HlsLibraryStats {
+    let mut hashes: HashSet<String> = HashSet::new();
+    let mut hashless_videos = 0usize;
+    for (rel_path, hash_opt) in rows {
+        if !file_types::is_video_file(Path::new(rel_path)) {
+            continue;
+        }
+        match hash_opt {
+            Some(h) => {
+                hashes.insert(h.clone());
+            }
+            None => {
+                hashless_videos += 1;
+            }
+        }
+    }
+
+    let mut with_playlist = 0usize;
+    let mut unsupported = 0usize;
+    for h in &hashes {
+        if hls_paths::playlist_for_hash(video_dir, h).exists() {
+            with_playlist += 1;
+        } else if hls_paths::sentinel_for_hash(video_dir, h).exists() {
+            unsupported += 1;
+        }
+    }
+    let total = hashes.len();
+    let pending = total.saturating_sub(with_playlist + unsupported);
+
+    HlsLibraryStats {
+        library_id: lib.id,
+        library: lib.name.clone(),
+        total,
+        with_playlist,
+        unsupported,
+        pending,
+        hashless_videos,
+    }
+}
+
+/// Log a single info line summarising readiness across all libraries.
+/// Called by the watcher at the end of a full-scan tick so operators
+/// who tail the log see the headline number without scraping
+/// Prometheus.
+pub fn log_summary(stats: &[HlsLibraryStats]) {
+    let total: usize = stats.iter().map(|s| s.total).sum();
+    let with_playlist: usize = stats.iter().map(|s| s.with_playlist).sum();
+    let pending: usize = stats.iter().map(|s| s.pending).sum();
+    let unsupported: usize = stats.iter().map(|s| s.unsupported).sum();
+    let hashless: usize = stats.iter().map(|s| s.hashless_videos).sum();
+
+    let per_lib: Vec<String> = stats
+        .iter()
+        .map(|s| {
+            format!(
+                "{}={}/{} pending={} unsupported={} hashless={}",
+                s.library, s.with_playlist, s.total, s.pending, s.unsupported, s.hashless_videos,
+            )
+        })
+        .collect();
+
+    info!(
+        "HLS readiness: {}/{} playlists on disk, {} pending, {} unsupported, {} hashless videos | per-library: [{}]",
+        with_playlist,
+        total,
+        pending,
+        unsupported,
+        hashless,
+        per_lib.join(", "),
+    );
+}
+
+#[get("/hls/stats")]
+pub async fn hls_stats_handler(
+    _claims: Claims,
+    app_state: web::Data<AppState>,
+    exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
+) -> impl Responder {
+    let libraries = app_state.libraries.clone();
+    let video_dir = std::path::PathBuf::from(&app_state.video_path);
+    let exif_dao = exif_dao.into_inner();
+
+    // Synchronous file IO + DB query — run on a blocking pool so the
+    // actix worker thread stays free for other requests.
+    let stats =
+        match web::block(move || compute_and_publish(&libraries, &exif_dao, &video_dir)).await {
+            Ok(s) => s,
+            Err(e) => {
+                warn!("/hls/stats: blocking task failed: {:?}", e);
+                Vec::new()
+            }
+        };
+
+    let total: usize = stats.iter().map(|s| s.total).sum();
+    let with_playlist: usize = stats.iter().map(|s| s.with_playlist).sum();
+    let pending: usize = stats.iter().map(|s| s.pending).sum();
+    let unsupported: usize = stats.iter().map(|s| s.unsupported).sum();
+    let hashless_videos: usize = stats.iter().map(|s| s.hashless_videos).sum();
+
+    HttpResponse::Ok().json(HlsStatsResponse {
+        libraries: stats,
+        total,
+        with_playlist,
+        pending,
+        unsupported,
+        hashless_videos,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+
+    fn lib(id: i32, name: &str) -> Library {
+        Library {
+            id,
+            name: name.into(),
+            root_path: String::new(),
+            enabled: true,
+            excluded_dirs: Vec::new(),
+        }
+    }
+
+    fn rows(vs: Vec<(&str, Option<&str>)>) -> Vec<(String, Option<String>)> {
+        vs.into_iter()
+            .map(|(p, h)| (p.to_string(), h.map(|s| s.to_string())))
+            .collect()
+    }
+
+    fn touch(dir: &Path, rel: &str) {
+        let p = dir.join(rel);
+        std::fs::create_dir_all(p.parent().unwrap()).unwrap();
+        std::fs::write(p, b"").unwrap();
+    }
+
+    #[test]
+    fn videos_only_count_in_total() {
+        let tmp = tempdir().unwrap();
+        let r = rows(vec![
+            ("photos/IMG.jpg", Some(&"a".repeat(64))), // image: ignored
+            ("clip.mp4", Some(&"b".repeat(64))),
+            ("vid.mov", Some(&"c".repeat(64))),
+        ]);
+        let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
+        assert_eq!(stats.total, 2);
+        assert_eq!(stats.with_playlist, 0);
+        assert_eq!(stats.pending, 2);
+        assert_eq!(stats.unsupported, 0);
+        assert_eq!(stats.hashless_videos, 0);
+    }
+
+    #[test]
+    fn hash_dedup_collapses_duplicate_rel_paths() {
+        let tmp = tempdir().unwrap();
+        let r = rows(vec![
+            ("a/clip.mp4", Some(&"a".repeat(64))),
+            ("b/clip.mp4", Some(&"a".repeat(64))), // same bytes, dup
+            ("other.mp4", Some(&"b".repeat(64))),
+        ]);
+        let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
+        assert_eq!(stats.total, 2, "duplicate hashes collapse");
+    }
+
+    #[test]
+    fn playlist_existence_promotes_to_with_playlist() {
+        let tmp = tempdir().unwrap();
+        let hash = "a".repeat(64);
+        touch(tmp.path(), &format!("aa/{}/playlist.m3u8", hash));
+
+        let r = rows(vec![("clip.mp4", Some(&hash))]);
+        let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
+        assert_eq!(stats.total, 1);
+        assert_eq!(stats.with_playlist, 1);
+        assert_eq!(stats.pending, 0);
+    }
+
+    #[test]
+    fn sentinel_existence_promotes_to_unsupported() {
+        let tmp = tempdir().unwrap();
+        let hash = "b".repeat(64);
+        touch(tmp.path(), &format!("bb/{}/playlist.unsupported", hash));
+
+        let r = rows(vec![("clip.mov", Some(&hash))]);
+        let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
+        assert_eq!(stats.total, 1);
+        assert_eq!(stats.unsupported, 1);
+        assert_eq!(stats.with_playlist, 0);
+        assert_eq!(stats.pending, 0);
+    }
+
+    #[test]
+    fn null_hash_videos_are_hashless_not_total() {
+        let tmp = tempdir().unwrap();
+        let r = rows(vec![
+            ("clip.mp4", None),
+            ("other.mp4", Some(&"a".repeat(64))),
+        ]);
+        let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
+        assert_eq!(stats.total, 1, "hashless row excluded from total");
+        assert_eq!(stats.hashless_videos, 1);
+    }
+
+    #[test]
+    fn publish_gauges_sets_per_library_value() {
+        let s = HlsLibraryStats {
+            library_id: 7,
+            library: "test_publish_a".into(),
+            total: 5,
+            with_playlist: 2,
+            pending: 3,
+            unsupported: 0,
+            hashless_videos: 0,
+        };
+        publish_gauges(&s);
+        assert_eq!(
+            HLS_VIDEOS_TOTAL
+                .with_label_values(&["test_publish_a"])
+                .get(),
+            5
+        );
+        assert_eq!(
+            HLS_VIDEOS_PENDING
+                .with_label_values(&["test_publish_a"])
+                .get(),
+            3
+        );
+        assert_eq!(
+            HLS_VIDEOS_WITH_PLAYLIST
+                .with_label_values(&["test_publish_a"])
+                .get(),
+            2
+        );
+    }
+}
@@ -444,8 +444,7 @@ where
            )
            .service(web::resource("/graph").route(web::get().to(get_graph::<D>)))
            .service(
-                web::resource("/predicate-stats")
-                    .route(web::get().to(get_predicate_stats::<D>)),
+                web::resource("/predicate-stats").route(web::get().to(get_predicate_stats::<D>)),
            )
            .service(
                web::resource("/predicates/{predicate}/bulk-reject")
@@ -804,38 +803,36 @@ async fn synthesize_merge<D: KnowledgeDao + 'static>(
            .json(serde_json::json!({"error": "source_id and target_id must differ"}));
    }

-    let cx = opentelemetry::Context::current();
-    let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
+    let (source, target) = {
+        let cx = opentelemetry::Context::current();
+        let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");

-    let source = match dao.get_entity_by_id(&cx, body.source_id) {
-        Ok(Some(e)) => e,
-        Ok(None) => {
-            return HttpResponse::BadRequest()
-                .json(serde_json::json!({"error": "source entity not found"}));
-        }
-        Err(e) => {
-            log::error!("synthesize_merge source lookup: {:?}", e);
-            return HttpResponse::InternalServerError()
-                .json(serde_json::json!({"error": "Database error"}));
-        }
+        let source = match dao.get_entity_by_id(&cx, body.source_id) {
+            Ok(Some(e)) => e,
+            Ok(None) => {
+                return HttpResponse::BadRequest()
+                    .json(serde_json::json!({"error": "source entity not found"}));
+            }
+            Err(e) => {
+                log::error!("synthesize_merge source lookup: {:?}", e);
+                return HttpResponse::InternalServerError()
+                    .json(serde_json::json!({"error": "Database error"}));
+            }
+        };
+        let target = match dao.get_entity_by_id(&cx, body.target_id) {
+            Ok(Some(e)) => e,
+            Ok(None) => {
+                return HttpResponse::BadRequest()
+                    .json(serde_json::json!({"error": "target entity not found"}));
+            }
+            Err(e) => {
+                log::error!("synthesize_merge target lookup: {:?}", e);
+                return HttpResponse::InternalServerError()
+                    .json(serde_json::json!({"error": "Database error"}));
+            }
+        };
+        (source, target)
    };
-    let target = match dao.get_entity_by_id(&cx, body.target_id) {
-        Ok(Some(e)) => e,
-        Ok(None) => {
-            return HttpResponse::BadRequest()
-                .json(serde_json::json!({"error": "target entity not found"}));
-        }
-        Err(e) => {
-            log::error!("synthesize_merge target lookup: {:?}", e);
-            return HttpResponse::InternalServerError()
-                .json(serde_json::json!({"error": "Database error"}));
-        }
-    };
-
-    // Drop the DAO lock before the LLM call — the generate request
-    // is the slow part (seconds) and we don't want to block other
-    // knowledge reads while it runs.
-    drop(dao);

    let source_desc = if source.description.trim().is_empty() {
        "(none)".to_string()
@@ -1261,12 +1258,8 @@ async fn bulk_reject_predicate<D: KnowledgeDao + 'static>(
    let persona = resolve_persona_filter(&req, &claims, &persona_dao);
    let cx = opentelemetry::Context::current();
    let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
-    match dao.bulk_reject_facts_by_predicate(
-        &cx,
-        &persona,
-        &predicate,
-        Some(("manual", "manual")),
-    ) {
+    match dao.bulk_reject_facts_by_predicate(&cx, &persona, &predicate, Some(("manual", "manual")))
+    {
        Ok(rejected) => HttpResponse::Ok().json(BulkRejectResponse { rejected }),
        Err(e) => {
            log::error!("bulk_reject_predicate error: {:?}", e);
@@ -7,6 +7,8 @@ pub mod ai;
 pub mod auth;
 pub mod bin_progress;
 pub mod cleanup;
+pub mod clip_search;
+pub mod clip_watch;
 pub mod content_hash;
 pub mod data;
 pub mod database;
@@ -94,7 +94,7 @@ pub fn parse_excluded_dirs_column(raw: Option<&str>) -> Vec<String> {
    match raw {
        None => Vec::new(),
        Some(s) => s
-            .split(|c: char| matches!(c, ',' | '\n' | '\r'))
+            .split([',', '\n', '\r'])
            .map(str::trim)
            .filter(|s| !s.is_empty())
            .map(String::from)
@@ -148,10 +148,7 @@ pub fn validate_excluded_dirs_entry(entry: &str) -> Result<String, String> {
    if let Some(rel) = trimmed.strip_prefix('/') {
        // Path form. Reject `..` traversal — `base.join(\"../x\")` doesn't
        // canonicalise, so `path.starts_with(...)` never matches.
-        if rel
-            .split('/')
-            .any(|seg| seg == "..")
-        {
+        if rel.split('/').any(|seg| seg == "..") {
            return Err(format!(
                "'{}': '..' segments don't normalise — the prefix-match never fires",
                trimmed
@@ -542,7 +539,10 @@ pub async fn patch_library(
        {
            Ok(n) => affected = affected.max(n),
            Err(e) => {
-                warn!("PATCH /libraries/{}: enabled update failed: {:?}", lib_id, e);
+                warn!(
+                    "PATCH /libraries/{}: enabled update failed: {:?}",
+                    lib_id, e
+                );
                return HttpResponse::InternalServerError().body(format!("{}", e));
            }
        }
@@ -600,7 +600,9 @@ pub async fn patch_library(
            );
            HttpResponse::Ok().json(lib)
        }
-        None => HttpResponse::NotFound().body(format!("library id {} not found after update", lib_id)),
+        None => {
+            HttpResponse::NotFound().body(format!("library id {} not found after update", lib_id))
+        }
    }
 }

@@ -930,10 +932,7 @@ mod tests {

    #[test]
    fn validate_strips_trailing_slash_on_path_entries() {
-        assert_eq!(
-            validate_excluded_dirs_entry("/photos/").unwrap(),
-            "/photos"
-        );
+        assert_eq!(validate_excluded_dirs_entry("/photos/").unwrap(), "/photos");
        assert_eq!(
            validate_excluded_dirs_entry("/photos//").unwrap(),
            "/photos"
@@ -1053,7 +1052,7 @@ mod tests {
            enabled: true,
            excluded_dirs: Vec::new(),
        };
-        let map = new_health_map(&[lib.clone()]);
+        let map = new_health_map(std::slice::from_ref(&lib));

        // First probe: empty dir, no prior data — Online.
        let s1 = refresh_health(&map, &lib, false);
@@ -296,6 +296,7 @@ impl GcStats {
            || self.revived > 0
    }

+    #[allow(dead_code)]
    pub fn total_deleted(&self) -> usize {
        self.deleted_face_detections + self.deleted_tagged_photo + self.deleted_photo_insights
    }
@@ -26,12 +26,13 @@ use crate::files::{RealFileSystem, move_file};
 use crate::service::ServiceBuilder;
 use crate::state::AppState;
 use crate::tags::*;
-use crate::video::actors::ScanDirectoryMessage;
 use log::{error, info};

 mod ai;
 mod auth;
 mod backfill;
+mod clip_search;
+mod clip_watch;
 mod content_hash;
 mod data;
 mod database;
@@ -46,6 +47,7 @@ mod file_types;
 mod files;
 mod geo;
 mod handlers;
+mod hls_stats;
 mod libraries;
 mod library_maintenance;
 mod perceptual_hash;
@@ -73,6 +75,32 @@ fn main() -> std::io::Result<()> {

    run_migrations(&mut connect()).expect("Failed to run migrations");

+    // Recover orphaned insight generation jobs from a previous crash.
+    {
+        use crate::database::{InsightGenerationJobDao, SqliteInsightGenerationJobDao};
+        let mut dao = SqliteInsightGenerationJobDao::new();
+        let ctx = opentelemetry::Context::new();
+        match dao.recover_orphaned_jobs(&ctx) {
+            Ok(n) if n > 0 => {
+                info!("Recovered {} orphaned insight generation jobs", n);
+            }
+            Ok(_) => {}
+            Err(e) => {
+                log::warn!("Failed to recover orphaned insight jobs: {:?}", e);
+            }
+        }
+    }
+
+    // One-shot retirement of the pre-content-hash HLS layout. Idempotent
+    // — a second boot finds nothing and reports zero deletions, so it's
+    // safe to leave wired in until the module is removed in a later
+    // release. Runs before the actor pipeline starts so we never race a
+    // PlaylistGenerator write against this rm.
+    {
+        let video_path = env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env");
+        video::legacy_migration::retire_legacy_hls_output(std::path::Path::new(&video_path));
+    }
+
    let system = actix::System::new();
    system.block_on(async {
        // Just use basic logger when running a non-release build
@@ -117,15 +145,32 @@ fn main() -> std::io::Result<()> {
            .registry
            .register(Box::new(thumbnails::VIDEO_GAUGE.clone()))
            .unwrap();
+        // HLS readiness gauges. Updated by the watcher every full-scan
+        // tick and on every `/hls/stats` request. See `hls_stats`.
+        prometheus
+            .registry
+            .register(Box::new(hls_stats::HLS_VIDEOS_TOTAL.clone()))
+            .unwrap();
+        prometheus
+            .registry
+            .register(Box::new(hls_stats::HLS_VIDEOS_WITH_PLAYLIST.clone()))
+            .unwrap();
+        prometheus
+            .registry
+            .register(Box::new(hls_stats::HLS_VIDEOS_PENDING.clone()))
+            .unwrap();
+        prometheus
+            .registry
+            .register(Box::new(hls_stats::HLS_VIDEOS_UNSUPPORTED.clone()))
+            .unwrap();

        let app_state = app_data.clone();
-        for lib in &app_state.libraries {
-            app_state.playlist_manager.do_send(ScanDirectoryMessage {
-                directory: lib.root_path.clone(),
-            });
-        }

-        // Start file watcher with playlist manager and preview generator
+        // Start file watcher with playlist manager and preview generator.
+        // The watcher's first tick is configured to be a full scan (see
+        // `watch_files`), so every library's missing HLS playlists are
+        // queued on that first iteration — no separate startup walk
+        // needed.
        let playlist_mgr_for_watcher = app_state.playlist_manager.as_ref().clone();
        let preview_gen_for_watcher = app_state.preview_clip_generator.as_ref().clone();
        // Both background jobs read from the shared `live_libraries` lock
@@ -137,6 +182,7 @@ fn main() -> std::io::Result<()> {
            playlist_mgr_for_watcher,
            preview_gen_for_watcher,
            app_state.face_client.clone(),
+            app_state.clip_client.clone(),
            app_state.excluded_dirs.clone(),
            app_state.library_health.clone(),
        );
@@ -151,6 +197,28 @@ fn main() -> std::io::Result<()> {
            app_state.library_health.clone(),
        );

+        // Periodically clean up stale turn entries from the in-memory
+        // registry. Runs at the same interval as the configured timeout,
+        // drops entries older than that timeout.
+        {
+            let registry = app_state.turn_registry.clone();
+            let timeout_secs = registry.timeout_secs();
+            tokio::spawn(async move {
+                // Sweep at most every 5 minutes, and never less often than the
+                // timeout itself — otherwise entries could linger up to ~2× the
+                // configured timeout before being reclaimed.
+                let interval_secs = timeout_secs.clamp(1, 300);
+                let interval = tokio::time::Duration::from_secs(interval_secs);
+                loop {
+                    tokio::time::sleep(interval).await;
+                    let cleaned = registry.cleanup_stale().await;
+                    if cleaned > 0 {
+                        log::info!("TurnRegistry: cleaned up {cleaned} stale entries");
+                    }
+                }
+            });
+        }
+
        // Spawn background job to generate daily conversation summaries
        {
            use crate::ai::generate_daily_summaries;
@@ -253,14 +321,20 @@ fn main() -> std::io::Result<()> {
                .service(
                    web::resource("/photos/exif").route(web::get().to(files::list_exif_summary)),
                )
+                .service(
+                    // Semantic search via CLIP embeddings. See
+                    // src/clip_search.rs for the request/response shape.
+                    web::resource("/photos/search")
+                        .route(web::get().to(clip_search::search_photos)),
+                )
                .service(web::resource("/file/move").post(move_file::<RealFileSystem>))
                .service(handlers::image::get_image)
                .service(handlers::image::upload_image)
                .service(handlers::video::generate_video)
-                .service(handlers::video::stream_video)
+                .service(handlers::video::stream_hls_file)
                .service(handlers::video::get_video_preview)
                .service(handlers::video::get_preview_status)
-                .service(handlers::video::get_video_part)
+                .service(hls_stats::hls_stats_handler)
                .service(handlers::favorites::favorites)
                .service(handlers::favorites::put_add_favorite)
                .service(handlers::favorites::delete_favorite)
@@ -272,17 +346,27 @@ fn main() -> std::io::Result<()> {
                .service(memories::list_memories)
                .service(ai::generate_insight_handler)
                .service(ai::generate_agentic_insight_handler)
+                .service(ai::generation_status_handler)
+                .service(ai::cancel_generation_handler)
                .service(ai::get_insight_handler)
                .service(ai::delete_insight_handler)
                .service(ai::get_all_insights_handler)
+                .service(ai::get_insight_history_handler)
                .service(ai::get_available_models_handler)
                .service(ai::get_openrouter_models_handler)
                .service(ai::chat_turn_handler)
                .service(ai::chat_stream_handler)
                .service(ai::chat_history_handler)
                .service(ai::chat_rewind_handler)
+                .service(ai::turn_async_handler)
+                .service(ai::turn_replay_handler)
+                .service(ai::cancel_turn_handler)
                .service(ai::rate_insight_handler)
                .service(ai::export_training_data_handler)
+                .service(ai::tts_speech_handler)
+                .service(ai::list_voices_handler)
+                .service(ai::create_voice_upload_handler)
+                .service(ai::create_voice_from_library_handler)
                .service(libraries::list_libraries)
                .service(libraries::patch_library)
                .add_feature(add_tag_services::<_, SqliteTagDao>)
@@ -1,13 +1,16 @@
 use crate::ai::apollo_client::ApolloClient;
+use crate::ai::clip_client::ClipClient;
 use crate::ai::face_client::FaceClient;
 use crate::ai::insight_chat::{ChatLockMap, InsightChatService};
+use crate::ai::llamacpp::LlamaCppClient;
 use crate::ai::openrouter::OpenRouterClient;
+use crate::ai::turn_registry::TurnRegistry;
 use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
 use crate::database::{
-    CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao,
-    SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, SqliteExifDao,
-    SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao,
-    connect,
+    CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, InsightGenerationJobDao, KnowledgeDao,
+    LocationHistoryDao, SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao,
+    SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao, SqliteKnowledgeDao,
+    SqliteLocationHistoryDao, SqliteSearchHistoryDao, connect,
 };
 use crate::database::{PreviewDao, SqlitePreviewDao};
 use crate::faces;
@@ -17,6 +20,7 @@ use crate::video::actors::{
    PlaylistGenerator, PreviewClipGenerator, StreamActor, VideoPlaylistManager,
 };
 use actix::{Actor, Addr};
+use std::collections::HashMap;
 use std::env;
 use std::sync::{Arc, Mutex, RwLock};

@@ -61,15 +65,25 @@ pub struct AppState {
    /// Curated list of OpenRouter model ids exposed to clients. Sourced from
    /// `OPENROUTER_ALLOWED_MODELS` (comma-separated). Empty when unset.
    pub openrouter_allowed_models: Vec<String>,
+    /// `None` when `LLAMA_SWAP_URL` is not configured. Consulted only when a
+    /// request explicitly opts into `backend=llamacpp`. Same shape as the
+    /// `openrouter` slot — present here so handlers can route to it without
+    /// threading through the generator.
+    #[allow(dead_code)]
+    pub llamacpp: Option<Arc<LlamaCppClient>>,
+    /// Curated list of llama-swap model ids exposed to clients. Sourced from
+    /// `LLAMA_SWAP_ALLOWED_MODELS` (comma-separated). Empty when unset; the
+    /// server then falls back to `LLAMA_SWAP_PRIMARY_MODEL`.
+    pub llamacpp_allowed_models: Vec<String>,
    pub sms_client: SmsApiClient,
    pub insight_generator: InsightGenerator,
    /// Chat continuation service. Hold an Arc so handlers can clone cheaply.
    pub insight_chat: Arc<InsightChatService>,
-    /// Face inference client (calls Apollo's `/api/internal/faces/*`).
-    /// Disabled (`is_enabled() == false`) when neither `APOLLO_FACE_API_BASE_URL`
-    /// nor `APOLLO_API_BASE_URL` is set; the file-watch hook (Phase 3) and
-    /// manual-face-create handler short-circuit in that case.
+    pub turn_registry: Arc<TurnRegistry>,
    pub face_client: FaceClient,
+    pub clip_client: ClipClient,
+    pub insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
+    pub insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
 }

 impl AppState {
@@ -100,18 +114,24 @@ impl AppState {
        ollama: OllamaClient,
        openrouter: Option<Arc<OpenRouterClient>>,
        openrouter_allowed_models: Vec<String>,
+        llamacpp: Option<Arc<LlamaCppClient>>,
+        llamacpp_allowed_models: Vec<String>,
        sms_client: SmsApiClient,
        insight_generator: InsightGenerator,
        insight_chat: Arc<InsightChatService>,
+        turn_registry: Arc<TurnRegistry>,
        preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
        face_client: FaceClient,
+        clip_client: ClipClient,
+        insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
+        insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
    ) -> Self {
        assert!(
            !libraries_vec.is_empty(),
            "AppState::new requires at least one library"
        );
        let base_path = libraries_vec[0].root_path.clone();
-        let playlist_generator = PlaylistGenerator::new();
+        let playlist_generator = PlaylistGenerator::new(video_path.clone());
        let video_playlist_manager =
            VideoPlaylistManager::new(video_path.clone(), playlist_generator.start());

@@ -139,10 +159,16 @@ impl AppState {
            ollama,
            openrouter,
            openrouter_allowed_models,
+            llamacpp,
+            llamacpp_allowed_models,
            sms_client,
            insight_generator,
            insight_chat,
+            turn_registry,
            face_client,
+            clip_client,
+            insight_job_dao,
+            insight_job_handles,
        }
    }

@@ -179,6 +205,9 @@ impl Default for AppState {
        let openrouter = build_openrouter_from_env();
        let openrouter_allowed_models = parse_openrouter_allowed_models();

+        let llamacpp = build_llamacpp_from_env();
+        let llamacpp_allowed_models = parse_llamacpp_allowed_models();
+
        let sms_api_url =
            env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
        let sms_api_token = env::var("SMS_API_TOKEN").ok();
@@ -198,6 +227,9 @@ impl Default for AppState {
            .or_else(|| env::var("APOLLO_API_BASE_URL").ok());
        let face_client = FaceClient::new(face_client_url);

+        // CLIP inference client. Same env var fallback as face_client.
+        let clip_client = ClipClient::from_env();
+
        // Initialize DAOs
        let insight_dao: Arc<Mutex<Box<dyn InsightDao>>> =
            Arc::new(Mutex::new(Box::new(SqliteInsightDao::new())));
@@ -225,6 +257,12 @@ impl Default for AppState {
        let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> =
            Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new())));

+        // Initialize insight generation job DAO (async generation tracking)
+        let insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>> =
+            Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new())));
+        let insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>> =
+            Arc::new(Mutex::new(HashMap::new()));
+
        // Load base path and ensure the primary library row reflects it.
        let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
        let mut seed_conn = connect();
@@ -240,6 +278,7 @@ impl Default for AppState {
        let insight_generator = InsightGenerator::new(
            ollama.clone(),
            openrouter.clone(),
+            llamacpp.clone(),
            sms_client.clone(),
            apollo_client.clone(),
            insight_dao.clone(),
@@ -261,12 +300,18 @@ impl Default for AppState {
            Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
        let insight_chat = Arc::new(InsightChatService::new(
            Arc::new(insight_generator.clone()),
-            ollama.clone(),
-            openrouter.clone(),
            insight_dao.clone(),
            chat_locks,
        ));

+        // Turn registry for reconnectable chat turns. 5-minute timeout for
+        // stale turns (background cleaner drops entries older than this).
+        let timeout_secs: u64 = env::var("INSIGHT_CHAT_TURN_TIMEOUT_SECS")
+            .ok()
+            .and_then(|v| v.parse().ok())
+            .unwrap_or(300);
+        let turn_registry = Arc::new(TurnRegistry::new(timeout_secs));
+
        // Ensure preview clips directory exists
        let preview_clips_path =
            env::var("PREVIEW_CLIPS_DIRECTORY").unwrap_or_else(|_| "preview_clips".to_string());
@@ -284,11 +329,17 @@ impl Default for AppState {
            ollama,
            openrouter,
            openrouter_allowed_models,
+            llamacpp,
+            llamacpp_allowed_models,
            sms_client,
            insight_generator,
            insight_chat,
+            turn_registry,
            preview_dao,
            face_client,
+            clip_client,
+            insight_job_dao,
+            insight_job_handles,
        )
    }
 }
@@ -324,10 +375,45 @@ fn parse_openrouter_allowed_models() -> Vec<String> {
        .collect()
 }

+/// Build a `LlamaCppClient` from environment variables. Returns `None` when
+/// `LLAMA_SWAP_URL` is unset. The client is constructed unconditionally
+/// when the URL is set (so it's available even under `LLM_BACKEND=ollama`
+/// for ad-hoc tooling), but the agentic / chat paths only route through it
+/// when `LLM_BACKEND=llamacpp`. Slot ids default to the names the bundled
+/// `llama-swap/config.yaml` uses — `chat` / `vision` / `embed`.
+fn build_llamacpp_from_env() -> Option<Arc<LlamaCppClient>> {
+    let base_url = env::var("LLAMA_SWAP_URL").ok()?;
+    let primary_model = env::var("LLAMA_SWAP_PRIMARY_MODEL").ok();
+    let mut client = LlamaCppClient::new(Some(base_url), primary_model);
+    if let Ok(model) = env::var("LLAMA_SWAP_EMBEDDING_MODEL") {
+        client.set_embedding_model(model);
+    }
+    if let Ok(model) = env::var("LLAMA_SWAP_VISION_MODEL") {
+        client.set_vision_model(model);
+    }
+    if let Ok(model) = env::var("LLAMA_SWAP_TTS_MODEL") {
+        client.set_tts_model(model);
+    }
+    Some(Arc::new(client))
+}
+
+/// Parse `LLAMA_SWAP_ALLOWED_MODELS` (comma-separated) into a vec. Used to
+/// populate the model picker when `LLM_BACKEND=llamacpp` — `/insights/models`
+/// surfaces these slots with capabilities. Empty when unset.
+fn parse_llamacpp_allowed_models() -> Vec<String> {
+    env::var("LLAMA_SWAP_ALLOWED_MODELS")
+        .unwrap_or_default()
+        .split(',')
+        .map(|s| s.trim().to_string())
+        .filter(|s| !s.is_empty())
+        .collect()
+}
+
 #[cfg(test)]
 impl AppState {
    /// Creates an AppState instance for testing with temporary directories
    pub fn test_state() -> Self {
+        use crate::database::insight_generation_job_dao::SqliteInsightGenerationJobDao;
        use actix::Actor;
        // Create a base temporary directory
        let temp_dir = tempfile::tempdir().expect("Failed to create temp directory");
@@ -386,6 +472,7 @@ impl AppState {
        let insight_generator = InsightGenerator::new(
            ollama.clone(),
            None,
+            None,
            sms_client.clone(),
            apollo_client.clone(),
            insight_dao.clone(),
@@ -405,12 +492,13 @@ impl AppState {
            Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
        let insight_chat = Arc::new(InsightChatService::new(
            Arc::new(insight_generator.clone()),
-            ollama.clone(),
-            None,
            insight_dao.clone(),
            chat_locks,
        ));

+        // Turn registry for test state.
+        let turn_registry = Arc::new(TurnRegistry::new(300));
+
        // Initialize test preview DAO
        let preview_dao: Arc<Mutex<Box<dyn PreviewDao>>> =
            Arc::new(Mutex::new(Box::new(SqlitePreviewDao::new())));
@@ -434,11 +522,17 @@ impl AppState {
            ollama,
            None,
            Vec::new(),
+            None,
+            Vec::new(),
            sms_client,
            insight_generator,
            insight_chat,
+            turn_registry,
            preview_dao,
            FaceClient::new(None), // disabled in test
+            ClipClient::new(None), // disabled in test
+            Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))), // placeholder for test
+            Arc::new(Mutex::new(HashMap::new())), // placeholder for test
        )
    }
 }
@@ -144,6 +144,7 @@ impl PreviewDao for TestPreviewDao {
        } else {
            Err(DbError {
                kind: DbErrorKind::UpdateError,
+                source: None,
            })
        }
    }
@@ -8,7 +8,10 @@
 //! skip them silently.

 use std::path::{Path, PathBuf};
+use std::process::Command;

+use image::GenericImageView;
+use image::codecs::jpeg::JpegEncoder;
 use lazy_static::lazy_static;
 use log::{debug, error, info, warn};
 use opentelemetry::{
@@ -26,6 +29,26 @@ use crate::libraries;
 use crate::otel::global_tracer;
 use crate::video::actors::{generate_image_thumbnail_ffmpeg, generate_video_thumbnail};

+/// Maximum long-edge size (px) for the large preview tier. Tuned to look
+/// crisp full-screen on a 3× phone (≈1290×2796 native) and to hold up
+/// through a few stops of pinch-zoom before the original streams in.
+/// Bigger doesn't help: callers that need true full resolution request
+/// `size=full` and the handler streams the original bytes.
+pub const LARGE_PREVIEW_MAX_DIM: u32 = 2048;
+
+/// JPEG quality for the large and xlarge preview tiers. 85 is the
+/// conventional "indistinguishable from source at viewing size" point —
+/// well above the `image` crate's default ~75, but well below quality-90+
+/// territory where file size doubles for no perceptible win.
+const LARGE_PREVIEW_JPEG_QUALITY: u8 = 85;
+
+/// Maximum long-edge size (px) for the xlarge preview tier. Bridges the
+/// gap between `large` (2048px, ~16MB decoded) and the original bytes
+/// (potentially 48+ MP / ~192MB decoded). At 4096px the decoded bitmap is
+/// ~64MB — enough for 2-3× pinch-zoom on any phone before the viewer
+/// needs to stream the true original.
+pub const XLARGE_PREVIEW_MAX_DIM: u32 = 4096;
+
 lazy_static! {
    pub static ref IMAGE_GAUGE: IntGauge = IntGauge::new(
        "imageserver_image_total",
@@ -89,6 +112,186 @@ pub fn generate_image_thumbnail(src: &Path, thumb_path: &Path) -> std::io::Resul
    Ok(())
 }

+/// Generate the on-demand large-preview tier (≈2048 long edge JPEG).
+///
+/// Mirrors [`generate_image_thumbnail`]'s decode waterfall — embedded RAW
+/// preview, then ffmpeg for HEIC/HEIF, then the `image` crate — but
+/// resizes to [`LARGE_PREVIEW_MAX_DIM`] instead of 200 and encodes at
+/// quality 85 rather than the crate default. Caller is expected to have
+/// already created the destination's parent dir.
+///
+/// Does not upscale: if the source's long edge is already below the cap,
+/// the file is encoded at its native size (still re-saved as JPEG so the
+/// served bytes match for callers that key off `Content-Length`).
+pub fn generate_large_preview(src: &Path, dest: &Path) -> std::io::Result<()> {
+    let orientation = exif::read_orientation(src).unwrap_or(1);
+
+    // RAW: prefer the in-file embedded JPEG preview over raw-sensor decode.
+    // The preview is typically already 1–2 MP and avoids RAW codec quirks.
+    if let Some(preview) = exif::extract_embedded_jpeg_preview(src) {
+        let img = image::load_from_memory(&preview).map_err(|e| {
+            std::io::Error::new(
+                std::io::ErrorKind::InvalidData,
+                format!("decode embedded preview {:?}: {}", src, e),
+            )
+        })?;
+        let img = exif::apply_orientation(img, orientation);
+        return encode_large_jpeg(img, dest);
+    }
+
+    if file_types::needs_ffmpeg_thumbnail(src) {
+        return generate_large_preview_ffmpeg(src, dest);
+    }
+
+    let img = image::open(src).map_err(|e| {
+        std::io::Error::new(std::io::ErrorKind::InvalidData, format!("{:?}: {}", src, e))
+    })?;
+    let img = exif::apply_orientation(img, orientation);
+    encode_large_jpeg(img, dest)
+}
+
+/// Resize-if-needed + JPEG-encode at q85. Used by both the embedded-preview
+/// and image-crate-decode branches of `generate_large_preview`.
+fn encode_large_jpeg(img: image::DynamicImage, dest: &Path) -> std::io::Result<()> {
+    let (w, h) = img.dimensions();
+    let max_dim = w.max(h);
+    // Avoid upscaling tiny sources — pointless work and adds nothing for
+    // the viewer. `thumbnail` would scale up freely; explicit guard.
+    let scaled = if max_dim > LARGE_PREVIEW_MAX_DIM {
+        img.thumbnail(LARGE_PREVIEW_MAX_DIM, LARGE_PREVIEW_MAX_DIM)
+    } else {
+        img
+    };
+    let file = std::fs::File::create(dest)
+        .map_err(|e| std::io::Error::other(format!("create {:?}: {}", dest, e)))?;
+    let mut writer = std::io::BufWriter::new(file);
+    let mut encoder = JpegEncoder::new_with_quality(&mut writer, LARGE_PREVIEW_JPEG_QUALITY);
+    encoder
+        .encode_image(&scaled)
+        .map_err(|e| std::io::Error::other(format!("encode {:?}: {}", dest, e)))?;
+    Ok(())
+}
+
+/// ffmpeg path for HEIC/HEIF (image crate can't decode these). Mirrors
+/// [`crate::video::actors::generate_image_thumbnail_ffmpeg`] but scales
+/// to the large-preview cap instead of 200.
+fn generate_large_preview_ffmpeg(src: &Path, dest: &Path) -> std::io::Result<()> {
+    // scale=W:-1 with force_original_aspect_ratio=decrease + the min(iw,W)
+    // trick caps the long edge regardless of orientation, mirroring what
+    // image::thumbnail does for the non-ffmpeg branch.
+    let vf = format!(
+        "scale='if(gt(iw,ih),min(iw,{cap}),-1)':'if(gt(iw,ih),-1,min(ih,{cap}))'",
+        cap = LARGE_PREVIEW_MAX_DIM
+    );
+    let output = Command::new("ffmpeg")
+        .arg("-y")
+        .arg("-i")
+        .arg(src)
+        .arg("-vframes")
+        .arg("1")
+        .arg("-vf")
+        .arg(&vf)
+        .arg("-q:v")
+        // ffmpeg's mjpeg qscale: 2 ≈ ~q95, 5 ≈ ~q85, 10 ≈ ~q70. We pick
+        // 5 to match the non-ffmpeg branch's q85 target.
+        .arg("5")
+        .arg("-f")
+        .arg("image2")
+        .arg("-c:v")
+        .arg("mjpeg")
+        .arg(dest)
+        .output()?;
+
+    if !output.status.success() {
+        return Err(std::io::Error::other(format!(
+            "ffmpeg failed ({}): {}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr).trim()
+        )));
+    }
+    Ok(())
+}
+
+/// Generate the on-demand xlarge-preview tier (≈4096 long edge JPEG).
+///
+/// Same waterfall as [`generate_large_preview`] but targeting
+/// [`XLARGE_PREVIEW_MAX_DIM`]. Sources whose long edge is already below
+/// the cap are encoded at native size (no upscale).
+pub fn generate_xlarge_preview(src: &Path, dest: &Path) -> std::io::Result<()> {
+    let orientation = exif::read_orientation(src).unwrap_or(1);
+
+    if let Some(preview) = exif::extract_embedded_jpeg_preview(src) {
+        let img = image::load_from_memory(&preview).map_err(|e| {
+            std::io::Error::new(
+                std::io::ErrorKind::InvalidData,
+                format!("decode embedded preview {:?}: {}", src, e),
+            )
+        })?;
+        let img = exif::apply_orientation(img, orientation);
+        return encode_xlarge_jpeg(img, dest);
+    }
+
+    if file_types::needs_ffmpeg_thumbnail(src) {
+        return generate_xlarge_preview_ffmpeg(src, dest);
+    }
+
+    let img = image::open(src).map_err(|e| {
+        std::io::Error::new(std::io::ErrorKind::InvalidData, format!("{:?}: {}", src, e))
+    })?;
+    let img = exif::apply_orientation(img, orientation);
+    encode_xlarge_jpeg(img, dest)
+}
+
+fn encode_xlarge_jpeg(img: image::DynamicImage, dest: &Path) -> std::io::Result<()> {
+    let (w, h) = img.dimensions();
+    let max_dim = w.max(h);
+    let scaled = if max_dim > XLARGE_PREVIEW_MAX_DIM {
+        img.thumbnail(XLARGE_PREVIEW_MAX_DIM, XLARGE_PREVIEW_MAX_DIM)
+    } else {
+        img
+    };
+    let file = std::fs::File::create(dest)
+        .map_err(|e| std::io::Error::other(format!("create {:?}: {}", dest, e)))?;
+    let mut writer = std::io::BufWriter::new(file);
+    let mut encoder = JpegEncoder::new_with_quality(&mut writer, LARGE_PREVIEW_JPEG_QUALITY);
+    encoder
+        .encode_image(&scaled)
+        .map_err(|e| std::io::Error::other(format!("encode {:?}: {}", dest, e)))?;
+    Ok(())
+}
+
+fn generate_xlarge_preview_ffmpeg(src: &Path, dest: &Path) -> std::io::Result<()> {
+    let vf = format!(
+        "scale='if(gt(iw,ih),min(iw,{cap}),-1)':'if(gt(iw,ih),-1,min(ih,{cap}))'",
+        cap = XLARGE_PREVIEW_MAX_DIM
+    );
+    let output = Command::new("ffmpeg")
+        .arg("-y")
+        .arg("-i")
+        .arg(src)
+        .arg("-vframes")
+        .arg("1")
+        .arg("-vf")
+        .arg(&vf)
+        .arg("-q:v")
+        .arg("5")
+        .arg("-f")
+        .arg("image2")
+        .arg("-c:v")
+        .arg("mjpeg")
+        .arg(dest)
+        .output()?;
+
+    if !output.status.success() {
+        return Err(std::io::Error::other(format!(
+            "ffmpeg failed ({}): {}",
+            output.status,
+            String::from_utf8_lossy(&output.stderr).trim()
+        )));
+    }
+    Ok(())
+}
+
 pub fn create_thumbnails(libs: &[libraries::Library], excluded_dirs: &[String]) {
    let tracer = global_tracer();
    let span = tracer.start("creating thumbnails");
@@ -1,18 +1,18 @@
+use crate::content_hash;
 use crate::database::PreviewDao;
 use crate::libraries::Library;
 use crate::otel::global_tracer;
-use crate::thumbnails::is_video;
 use crate::video::ffmpeg::{generate_preview_clip, get_duration_seconds_blocking};
+use crate::video::hls_paths;
 use actix::prelude::*;
-use log::{debug, error, info, trace, warn};
+use log::{debug, error, info, warn};
 use opentelemetry::KeyValue;
 use opentelemetry::trace::{Span, Status, Tracer};
 use std::io::Result;
 use std::path::{Path, PathBuf};
-use std::process::{Child, Command, ExitStatus, Stdio};
+use std::process::{Command, Stdio};
 use std::sync::{Arc, Mutex};
 use tokio::sync::Semaphore;
-use walkdir::{DirEntry, WalkDir};
 // ffmpeg -i test.mp4 -c:v h264 -flags +cgop -g 30 -hls_time 3 out.m3u8
 // ffmpeg -i "filename.mp4" -preset veryfast -c:v libx264 -f hls -hls_list_size 100 -hls_time 2 -crf 24 -vf scale=1080:-2,setsar=1:1 attempt/vid_out.m3u8

@@ -22,89 +22,14 @@ impl Actor for StreamActor {
    type Context = Context<Self>;
 }

-pub struct ProcessMessage(pub String, pub Child);
-
-impl Message for ProcessMessage {
-    type Result = Result<ExitStatus>;
-}
-
-impl Handler<ProcessMessage> for StreamActor {
-    type Result = Result<ExitStatus>;
-
-    fn handle(&mut self, msg: ProcessMessage, _ctx: &mut Self::Context) -> Self::Result {
-        trace!("Message received");
-        let mut process = msg.1;
-        let result = process.wait();
-
-        debug!(
-            "Finished waiting for: {:?}. Code: {:?}",
-            msg.0,
-            result
-                .as_ref()
-                .map_or(-1, |status| status.code().unwrap_or(-1))
-        );
-        result
-    }
-}
-
-pub fn playlist_file_for(playlist_dir: &str, video_path: &Path) -> PathBuf {
-    let filename = video_path
-        .file_name()
-        .and_then(|n| n.to_str())
-        .unwrap_or("unknown");
-    PathBuf::from(format!("{}/{}.m3u8", playlist_dir, filename))
-}
-
-/// Sentinel path written next to a would-be playlist when ffmpeg cannot
-/// transcode the source (e.g. truncated mp4 with no moov atom). Its presence
-/// causes future scans to skip the file instead of re-running ffmpeg every
-/// pass. Delete the `.unsupported` file to force a retry.
-pub fn playlist_unsupported_sentinel(playlist_file: &Path) -> PathBuf {
-    let mut s = playlist_file.as_os_str().to_owned();
-    s.push(".unsupported");
-    PathBuf::from(s)
-}
-
-pub async fn create_playlist(video_path: &str, playlist_file: &str) -> Result<Child> {
-    if Path::new(playlist_file).exists() {
-        debug!("Playlist already exists: {}", playlist_file);
-        return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists));
-    }
-
-    let result = Command::new("ffmpeg")
-        .arg("-i")
-        .arg(video_path)
-        .arg("-c:v")
-        .arg("h264")
-        .arg("-crf")
-        .arg("21")
-        .arg("-preset")
-        .arg("veryfast")
-        .arg("-hls_time")
-        .arg("3")
-        .arg("-hls_list_size")
-        .arg("0")
-        .arg("-hls_playlist_type")
-        .arg("vod")
-        .arg("-vf")
-        .arg("scale='min(1080,iw)':-2,setsar=1:1")
-        .arg(playlist_file)
-        .stdout(Stdio::null())
-        .stderr(Stdio::null())
-        .spawn();
-
-    let start_time = std::time::Instant::now();
-    loop {
-        actix::clock::sleep(std::time::Duration::from_secs(1)).await;
-
-        if Path::new(playlist_file).exists()
-            || std::time::Instant::now() - start_time > std::time::Duration::from_secs(5)
-        {
-            break;
-        }
-    }
-
-    result
+/// A video paired with its content hash, ready to be queued for HLS
+/// playlist generation. Hash is required because all output paths are
+/// keyed on it; callers that lack a hash (rows mid-backfill) must skip
+/// the video rather than fabricate one.
+#[derive(Debug, Clone)]
+pub struct VideoToQueue {
+    pub video_path: PathBuf,
+    pub content_hash: String,
 }

 pub fn generate_video_thumbnail(path: &Path, destination: &Path) -> std::io::Result<()> {
@@ -197,16 +122,36 @@ pub fn generate_image_thumbnail_ffmpeg(path: &Path, destination: &Path) -> std::
 /// Video stream metadata needed to pick HLS encode settings. Populated by
 /// a single ffprobe call to avoid spawning multiple subprocesses per video.
 #[derive(Debug, Default)]
-struct VideoStreamMeta {
-    is_h264: bool,
+pub struct VideoStreamMeta {
+    pub is_h264: bool,
    /// Rotation in degrees (0/90/180/270). Checks both the legacy `rotate`
    /// stream tag and the modern display-matrix side data.
-    rotation: i32,
+    pub rotation: i32,
+    /// Frames per second. Prefers `avg_frame_rate` (handles VFR better than
+    /// `r_frame_rate`, which lies on variable-framerate sources). `None`
+    /// when ffprobe couldn't parse either field — caller picks a fallback.
+    pub frame_rate: Option<f32>,
+}
+
+/// Parse ffprobe's rational frame-rate strings (`"30000/1001"`,
+/// `"60/1"`, `"0/0"`). Rejects 0/0 (ffprobe's "unknown" sentinel),
+/// non-positive results, and anything wildly out of range so a malformed
+/// probe can't poison the scrubber's step size.
+fn parse_ffprobe_rational(s: &str) -> Option<f32> {
+    let (num, den) = s.split_once('/')?;
+    let num: f32 = num.parse().ok()?;
+    let den: f32 = den.parse().ok()?;
+    if den.abs() < f32::EPSILON {
+        return None;
+    }
+    let v = num / den;
+    (v.is_finite() && v > 0.0 && v < 1000.0).then_some(v)
 }

 /// Probe video stream metadata in one ffprobe call. Returns default (codec
-/// unknown, rotation 0) on any failure — callers fall back to transcoding.
-async fn probe_video_stream_meta(video_path: &str) -> VideoStreamMeta {
+/// unknown, rotation 0, fps None) on any failure — callers fall back to
+/// transcoding / a default framerate.
+pub async fn probe_video_stream_meta(video_path: &str) -> VideoStreamMeta {
    let output = tokio::process::Command::new("ffprobe")
        .arg("-v")
        .arg("error")
@@ -214,8 +159,16 @@ async fn probe_video_stream_meta(video_path: &str) -> VideoStreamMeta {
        .arg("v:0")
        .arg("-print_format")
        .arg("json")
+        // NOTE: request `stream_side_data_list` (stream-level side data, read
+        // from the moov atom), NOT the bare `side_data_list` section. On modern
+        // ffprobe the latter is the *frame* side-data section, which forces
+        // ffprobe to enumerate every frame — reading the entire mdat over the
+        // network. For non-faststart phone clips on an SMB mount that turned a
+        // metadata probe into a full-file read (tens of seconds per open). The
+        // Display Matrix rotation we need is present at stream level, so this
+        // keeps codec/fps/rotation while reading only the header.
        .arg("-show_entries")
-        .arg("stream=codec_name:stream_tags=rotate:side_data_list")
+        .arg("stream=codec_name,r_frame_rate,avg_frame_rate:stream_tags=rotate:stream_side_data_list")
        .arg(video_path)
        .output()
        .await;
@@ -266,12 +219,29 @@ async fn probe_video_stream_meta(video_path: &str) -> VideoStreamMeta {
        })
        .unwrap_or(0);

+    // ffprobe reports frame rates as rational strings like "30000/1001".
+    // Prefer avg_frame_rate (handles VFR) and fall back to r_frame_rate.
+    let frame_rate = stream
+        .get("avg_frame_rate")
+        .and_then(|v| v.as_str())
+        .and_then(parse_ffprobe_rational)
+        .or_else(|| {
+            stream
+                .get("r_frame_rate")
+                .and_then(|v| v.as_str())
+                .and_then(parse_ffprobe_rational)
+        });
+
    debug!(
-        "Probed {}: codec_h264={}, rotation={}°",
-        video_path, is_h264, rotation
+        "Probed {}: codec_h264={}, rotation={}°, fps={:?}",
+        video_path, is_h264, rotation, frame_rate
    );

-    VideoStreamMeta { is_h264, rotation }
+    VideoStreamMeta {
+        is_h264,
+        rotation,
+        frame_rate,
+    }
 }

 /// Probe the max keyframe interval (GOP) in the first ~30s of a video.
@@ -331,17 +301,17 @@ async fn get_max_gop_seconds(video_path: &str) -> Option<f64> {
 }

 pub struct VideoPlaylistManager {
-    playlist_dir: PathBuf,
+    video_dir: PathBuf,
    playlist_generator: Addr<PlaylistGenerator>,
 }

 impl VideoPlaylistManager {
    pub fn new<P: Into<PathBuf>>(
-        playlist_dir: P,
+        video_dir: P,
        playlist_generator: Addr<PlaylistGenerator>,
    ) -> Self {
        Self {
-            playlist_dir: playlist_dir.into(),
+            video_dir: video_dir.into(),
            playlist_generator,
        }
    }
@@ -351,144 +321,68 @@ impl Actor for VideoPlaylistManager {
    type Context = Context<Self>;
 }

-impl Handler<ScanDirectoryMessage> for VideoPlaylistManager {
-    type Result = ResponseFuture<()>;
-
-    fn handle(&mut self, msg: ScanDirectoryMessage, _ctx: &mut Self::Context) -> Self::Result {
-        let tracer = global_tracer();
-        let mut span = tracer.start("videoplaylistmanager.scan_directory");
-
-        let start = std::time::Instant::now();
-        info!(
-            "Starting scan directory for video playlist generation: {}",
-            msg.directory
-        );
-
-        let playlist_output_dir = self.playlist_dir.clone();
-        let playlist_dir_str = playlist_output_dir.to_str().unwrap().to_string();
-
-        let video_files = WalkDir::new(&msg.directory)
-            .into_iter()
-            .filter_map(|e| e.ok())
-            .filter(|e| e.file_type().is_file())
-            .filter(is_video)
-            .filter(|e| {
-                let playlist = playlist_file_for(&playlist_dir_str, e.path());
-                !playlist.exists() && !playlist_unsupported_sentinel(&playlist).exists()
-            })
-            .collect::<Vec<DirEntry>>();
-
-        let scan_dir_name = msg.directory.clone();
-        let playlist_generator = self.playlist_generator.clone();
-
-        Box::pin(async move {
-            for e in video_files {
-                let path = e.path();
-                let path_as_str = path.to_str().unwrap();
-                debug!(
-                    "Sending generate playlist message for path: {}",
-                    path_as_str
-                );
-
-                match playlist_generator
-                    .send(GeneratePlaylistMessage {
-                        playlist_path: playlist_output_dir.to_str().unwrap().to_string(),
-                        video_path: PathBuf::from(path),
-                    })
-                    .await
-                    .expect("Failed to send generate playlist message")
-                {
-                    Ok(_) => {
-                        span.add_event(
-                            "Playlist generated",
-                            vec![KeyValue::new("video_path", path_as_str.to_string())],
-                        );
-
-                        debug!(
-                            "Successfully generated playlist for file: '{}'",
-                            path_as_str
-                        );
-                    }
-                    Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
-                        debug!("Playlist already exists for '{:?}', skipping", path);
-                    }
-                    Err(e) => {
-                        warn!("Failed to generate playlist for path '{:?}'. {:?}", path, e);
-                    }
-                }
-            }
-
-            span.add_event(
-                "Finished directory scan",
-                vec![KeyValue::new("directory", scan_dir_name.to_string())],
-            );
-            info!(
-                "Finished directory scan of '{}' in {:?}",
-                scan_dir_name,
-                start.elapsed()
-            );
-        })
-    }
-}
-
 impl Handler<QueueVideosMessage> for VideoPlaylistManager {
    type Result = ();

    fn handle(&mut self, msg: QueueVideosMessage, _ctx: &mut Self::Context) -> Self::Result {
-        if msg.video_paths.is_empty() {
+        if msg.videos.is_empty() {
            return;
        }

-        info!(
-            "Queueing {} videos for HLS playlist generation",
-            msg.video_paths.len()
-        );
-
-        let playlist_output_dir = self.playlist_dir.clone();
-        let playlist_dir_str = playlist_output_dir.to_str().unwrap().to_string();
+        let video_dir = self.video_dir.clone();
        let playlist_generator = self.playlist_generator.clone();

-        for video_path in msg.video_paths {
-            let playlist = playlist_file_for(&playlist_dir_str, &video_path);
-            if playlist.exists() || playlist_unsupported_sentinel(&playlist).exists() {
+        let mut queued = 0usize;
+        let mut already_present = 0usize;
+        for VideoToQueue {
+            video_path,
+            content_hash,
+        } in msg.videos
+        {
+            let playlist = hls_paths::playlist_for_hash(&video_dir, &content_hash);
+            let sentinel = hls_paths::sentinel_for_hash(&video_dir, &content_hash);
+            if playlist.exists() || sentinel.exists() {
+                already_present += 1;
                continue;
            }
-            let path_str = video_path.to_string_lossy().to_string();
-            debug!("Queueing playlist generation for: {}", path_str);
-
+            debug!(
+                "Queueing playlist generation for {} (hash={})",
+                video_path.display(),
+                short_hash(&content_hash)
+            );
            playlist_generator.do_send(GeneratePlaylistMessage {
-                playlist_path: playlist_dir_str.clone(),
                video_path,
+                content_hash,
            });
+            queued += 1;
        }
+        info!(
+            "Queue tick: {} queued, {} skipped (playlist or sentinel already on disk)",
+            queued, already_present
+        );
    }
 }

-#[derive(Message)]
-#[rtype(result = "()")]
-pub struct ScanDirectoryMessage {
-    pub(crate) directory: String,
-}
-
 #[derive(Message)]
 #[rtype(result = "()")]
 pub struct QueueVideosMessage {
-    pub video_paths: Vec<PathBuf>,
+    pub videos: Vec<VideoToQueue>,
 }

 #[derive(Message)]
 #[rtype(result = "Result<()>")]
 pub struct GeneratePlaylistMessage {
    pub video_path: PathBuf,
-    pub playlist_path: String,
+    pub content_hash: String,
 }

 pub struct PlaylistGenerator {
    semaphore: Arc<Semaphore>,
+    video_dir: PathBuf,
 }

 impl PlaylistGenerator {
-    pub(crate) fn new() -> Self {
+    pub(crate) fn new<P: Into<PathBuf>>(video_dir: P) -> Self {
        // Concurrency is tunable via HLS_CONCURRENCY so operators can dial
        // it to their hardware: 1 on weak Synology boxes to avoid thermal
        // throttling, higher on desktops with spare cores.
@@ -500,6 +394,7 @@ impl PlaylistGenerator {
        info!("PlaylistGenerator: concurrency={}", concurrency);
        PlaylistGenerator {
            semaphore: Arc::new(Semaphore::new(concurrency)),
+            video_dir: video_dir.into(),
        }
    }
 }
@@ -513,20 +408,23 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {

    fn handle(&mut self, msg: GeneratePlaylistMessage, _ctx: &mut Self::Context) -> Self::Result {
        let video_file = msg.video_path.to_str().unwrap().to_owned();
-        let playlist_path = msg.playlist_path.as_str().to_owned();
+        let content_hash_str = msg.content_hash.clone();
        let semaphore = self.semaphore.clone();
+        let video_dir = self.video_dir.clone();

-        let playlist_file = format!(
-            "{}/{}.m3u8",
-            playlist_path,
-            msg.video_path.file_name().unwrap().to_str().unwrap()
-        );
+        let hash_dir = content_hash::hls_dir(&video_dir, &content_hash_str);
+        let playlist_path = hls_paths::playlist_for_hash(&video_dir, &content_hash_str);
+        let sentinel_path = hls_paths::sentinel_for_hash(&video_dir, &content_hash_str);
+        let segment_template = hls_paths::segment_template_for_hash(&video_dir, &content_hash_str);
+        let playlist_file = playlist_path.to_string_lossy().to_string();
+        let segment_pattern = segment_template.to_string_lossy().to_string();

        let tracer = global_tracer();
        let mut span = tracer
            .span_builder("playlistgenerator.generate_playlist")
            .with_attributes(vec![
                KeyValue::new("video_file", video_file.clone()),
+                KeyValue::new("content_hash", content_hash_str.clone()),
                KeyValue::new("playlist_file", playlist_file.clone()),
            ])
            .start(&tracer);
@@ -550,7 +448,7 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
                )],
            );

-            if Path::new(&playlist_file).exists() {
+            if playlist_path.exists() {
                debug!("Playlist already exists: {}", playlist_file);
                span.set_status(Status::error(format!(
                    "Playlist already exists: {}",
@@ -559,6 +457,19 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
                return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists));
            }

+            // Ensure the shard + hash directory exist. Idempotent — the
+            // dir may already be present from a prior attempt that wrote
+            // a sentinel before being cleared for retry.
+            if let Err(e) = tokio::fs::create_dir_all(&hash_dir).await {
+                error!(
+                    "Failed to create HLS hash dir {}: {}",
+                    hash_dir.display(),
+                    e
+                );
+                span.set_status(Status::error(format!("mkdir failed: {}", e)));
+                return Err(e);
+            }
+
            // One ffprobe call for codec + rotation metadata.
            let stream_meta = probe_video_stream_meta(&video_file).await;
            let is_h264 = stream_meta.is_h264;
@@ -619,16 +530,11 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
                span.add_event("Transcoding to h264", vec![]);
            }

-            // Encode to a .tmp playlist and explicit segment names so a failed
-            // encode leaves predictable artifacts we can clean up — and so a
-            // concurrent scan doesn't see a half-written .m3u8 as "done".
+            // Encode to a .tmp playlist alongside the final inside the
+            // hash dir, so a concurrent scan never sees a half-written
+            // .m3u8 as "done". Segments use the hash-keyed template;
+            // ffmpeg writes them next to the playlist (relative refs).
            let playlist_tmp = format!("{}.tmp", playlist_file);
-            let video_stem = msg
-                .video_path
-                .file_name()
-                .and_then(|n| n.to_str())
-                .unwrap_or("video");
-            let segment_pattern = format!("{}/{}_%03d.ts", playlist_path, video_stem);

            let mut cmd = tokio::process::Command::new("ffmpeg");
            cmd.arg("-y").arg("-i").arg(&video_file);
@@ -717,12 +623,12 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
            let success = matches!(&ffmpeg_result, Ok(out) if out.status.success());

            if success {
-                if let Err(e) = tokio::fs::rename(&playlist_tmp, &playlist_file).await {
+                if let Err(e) = tokio::fs::rename(&playlist_tmp, &playlist_path).await {
                    error!(
                        "ffmpeg succeeded but rename {} -> {} failed: {}",
                        playlist_tmp, playlist_file, e
                    );
-                    cleanup_partial_hls(&playlist_tmp, playlist_path.as_str(), video_stem).await;
+                    cleanup_partial_hls(&hash_dir).await;
                    span.set_status(Status::error(format!("rename failed: {}", e)));
                    return Err(e);
                }
@@ -739,18 +645,17 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
                    Err(e) => format!("ffmpeg failed: {}", e),
                };
                error!("ffmpeg failed for {}: {}", video_file, detail);
-                cleanup_partial_hls(&playlist_tmp, playlist_path.as_str(), video_stem).await;
-                let sentinel = playlist_unsupported_sentinel(Path::new(&playlist_file));
-                if let Err(se) = tokio::fs::write(&sentinel, b"").await {
+                cleanup_partial_hls(&hash_dir).await;
+                if let Err(se) = tokio::fs::write(&sentinel_path, b"").await {
                    warn!(
                        "Failed to write playlist sentinel {}: {}",
-                        sentinel.display(),
+                        sentinel_path.display(),
                        se
                    );
                } else {
                    info!(
                        "Wrote playlist sentinel {} so future scans skip {}",
-                        sentinel.display(),
+                        sentinel_path.display(),
                        video_file
                    );
                }
@@ -761,29 +666,47 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
    }
 }

-/// Delete the temp playlist and any segment files that ffmpeg may have written
-/// before failing. Called both on ffmpeg error and on rename failure so a
-/// retry on the next scan starts from a clean slate.
-async fn cleanup_partial_hls(playlist_tmp: &str, playlist_dir: &str, video_stem: &str) {
-    let _ = tokio::fs::remove_file(playlist_tmp).await;
-
-    let segment_prefix = format!("{}_", video_stem);
-    let Ok(mut entries) = tokio::fs::read_dir(playlist_dir).await else {
+/// Delete the partial playlist (.tmp) and any segment files left behind by
+/// a failed ffmpeg run. Wipes every non-sentinel file in the hash dir;
+/// retains the sentinel if one has already been written by an earlier
+/// caller in the same path (today there is none, but kept defensively so
+/// the function is safe to call after sentinel write too).
+async fn cleanup_partial_hls(hash_dir: &Path) {
+    let Ok(mut entries) = tokio::fs::read_dir(hash_dir).await else {
        return;
    };
    while let Ok(Some(entry)) = entries.next_entry().await {
-        let Some(name) = entry.file_name().to_str().map(str::to_owned) else {
+        let path = entry.path();
+        let is_sentinel = path
+            .file_name()
+            .and_then(|n| n.to_str())
+            .map(|n| n == hls_paths::UNSUPPORTED_SENTINEL_FILENAME)
+            .unwrap_or(false);
+        if is_sentinel {
            continue;
-        };
-        if name.starts_with(&segment_prefix)
-            && name.ends_with(".ts")
-            && let Err(e) = tokio::fs::remove_file(entry.path()).await
-        {
-            warn!("Failed to remove partial segment {}: {}", name, e);
+        }
+        if let Err(e) = tokio::fs::remove_file(&path).await {
+            warn!(
+                "Failed to remove partial HLS file {}: {}",
+                path.display(),
+                e
+            );
        }
    }
 }

+/// First 16 chars of a content hash for log lines. Short enough to keep
+/// log volume sane, long enough that distinct hashes don't collide in
+/// practice.
+fn short_hash(hash: &str) -> &str {
+    let end = hash
+        .char_indices()
+        .nth(16)
+        .map(|(i, _)| i)
+        .unwrap_or(hash.len());
+    &hash[..end]
+}
+
 #[derive(Message)]
 #[rtype(result = "()")]
 pub struct GeneratePreviewClipMessage {
@@ -908,3 +831,50 @@ impl Handler<GeneratePreviewClipMessage> for PreviewClipGenerator {
        })
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::parse_ffprobe_rational;
+
+    #[test]
+    fn parses_common_rational_framerates() {
+        // NTSC 29.97 fps
+        assert!((parse_ffprobe_rational("30000/1001").unwrap() - 29.970_03).abs() < 1e-3);
+        // Plain integer fps
+        assert!((parse_ffprobe_rational("30/1").unwrap() - 30.0).abs() < 1e-6);
+        assert!((parse_ffprobe_rational("60/1").unwrap() - 60.0).abs() < 1e-6);
+        // iPhone slow-mo
+        assert!((parse_ffprobe_rational("240/1").unwrap() - 240.0).abs() < 1e-6);
+    }
+
+    #[test]
+    fn rejects_ffprobe_unknown_sentinel() {
+        // 0/0 is ffprobe's way of saying "I don't know" — must not be
+        // interpreted as 0 fps.
+        assert_eq!(parse_ffprobe_rational("0/0"), None);
+    }
+
+    #[test]
+    fn rejects_malformed_input() {
+        assert_eq!(parse_ffprobe_rational(""), None);
+        assert_eq!(parse_ffprobe_rational("30"), None);
+        assert_eq!(parse_ffprobe_rational("/1"), None);
+        assert_eq!(parse_ffprobe_rational("30/"), None);
+        assert_eq!(parse_ffprobe_rational("abc/def"), None);
+    }
+
+    #[test]
+    fn rejects_non_positive_results() {
+        // Negative numerator -> negative fps; meaningless.
+        assert_eq!(parse_ffprobe_rational("-30/1"), None);
+        // Zero numerator -> zero fps; also meaningless for frame stepping.
+        assert_eq!(parse_ffprobe_rational("0/1"), None);
+    }
+
+    #[test]
+    fn rejects_out_of_range() {
+        // Anything > 1000 fps is almost certainly garbage probe output,
+        // not a real source. (Real high-speed capture maxes near 1 kHz.)
+        assert_eq!(parse_ffprobe_rational("999999/1"), None);
+    }
+}
@@ -0,0 +1,84 @@
+//! Path layout for hash-keyed HLS output.
+//!
+//! Source-of-truth is [`crate::content_hash::hls_dir`], which gives
+//! `<video_dir>/<hash[..2]>/<hash>/`. The playlist, the per-segment files,
+//! and the "ffmpeg refused" sentinel all live inside that directory so a
+//! `.m3u8` written with relative segment references resolves correctly
+//! at serve time without any URL rewriting.
+
+use std::path::{Path, PathBuf};
+
+use crate::content_hash;
+
+/// Standard filename for the HLS playlist inside a hash dir. Fixed so
+/// the URL contract is `playlist.m3u8` regardless of the source video's
+/// original basename.
+pub const PLAYLIST_FILENAME: &str = "playlist.m3u8";
+
+/// Sentinel filename written when ffmpeg refused to transcode the
+/// source. Presence in the hash dir tells future scans to skip the file
+/// instead of re-running ffmpeg every tick. Delete to force a retry.
+pub const UNSUPPORTED_SENTINEL_FILENAME: &str = "playlist.unsupported";
+
+/// Segment-name template passed to ffmpeg via `-hls_segment_filename`.
+/// Segments live inside the hash dir; the playlist's relative refs
+/// resolve to siblings automatically.
+pub const SEGMENT_TEMPLATE: &str = "segment_%03d.ts";
+
+/// Path to the HLS playlist for a video identified by content hash.
+pub fn playlist_for_hash(video_dir: &Path, hash: &str) -> PathBuf {
+    content_hash::hls_dir(video_dir, hash).join(PLAYLIST_FILENAME)
+}
+
+/// Path to the unsupported-source sentinel for a hash.
+pub fn sentinel_for_hash(video_dir: &Path, hash: &str) -> PathBuf {
+    content_hash::hls_dir(video_dir, hash).join(UNSUPPORTED_SENTINEL_FILENAME)
+}
+
+/// Absolute path used as ffmpeg's `-hls_segment_filename` value.
+pub fn segment_template_for_hash(video_dir: &Path, hash: &str) -> PathBuf {
+    content_hash::hls_dir(video_dir, hash).join(SEGMENT_TEMPLATE)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn playlist_path_lives_inside_sharded_hash_dir() {
+        let video = Path::new("/var/video");
+        let p = playlist_for_hash(video, "abcdef0123456789");
+        assert_eq!(
+            p,
+            PathBuf::from("/var/video/ab/abcdef0123456789/playlist.m3u8")
+        );
+    }
+
+    #[test]
+    fn sentinel_path_lives_alongside_playlist() {
+        let video = Path::new("/var/video");
+        let s = sentinel_for_hash(video, "abcdef0123456789");
+        assert_eq!(
+            s,
+            PathBuf::from("/var/video/ab/abcdef0123456789/playlist.unsupported")
+        );
+    }
+
+    #[test]
+    fn segment_template_lives_alongside_playlist() {
+        let video = Path::new("/var/video");
+        let t = segment_template_for_hash(video, "abcdef0123456789");
+        assert_eq!(
+            t,
+            PathBuf::from("/var/video/ab/abcdef0123456789/segment_%03d.ts")
+        );
+    }
+
+    #[test]
+    fn distinct_hashes_yield_distinct_dirs() {
+        let video = Path::new("/var/video");
+        let a = playlist_for_hash(video, "1111aaaa");
+        let b = playlist_for_hash(video, "2222bbbb");
+        assert_ne!(a.parent(), b.parent());
+    }
+}
@@ -0,0 +1,243 @@
+//! One-shot retirement of the pre-content-hash HLS output layout.
+//!
+//! Before the hash-keyed layout landed, the actor pipeline wrote every
+//! playlist as `$VIDEO_PATH/<source-basename>.m3u8` with sibling
+//! `<source-basename>_NNN.ts` segments and a `<source-basename>.m3u8.unsupported`
+//! sentinel on ffmpeg failure. The new pipeline (see
+//! [`crate::video::hls_paths`]) puts everything inside a hash-keyed
+//! subdirectory, so the legacy flat files are orphaned the moment the
+//! upgraded binary boots — they're not served, not refreshed, and not
+//! GC'd by the new orphan cleanup (which deliberately ignores anything
+//! that doesn't sit inside a `<shard>/<hash>/` dir).
+//!
+//! This migration runs once on startup. It walks `$VIDEO_PATH` at depth
+//! 1, deletes every `.m3u8` / `.m3u8.tmp` / `.m3u8.unsupported` / `.ts`
+//! file, and reports a single info line. It is idempotent — a second
+//! run finds nothing and reports zero deletions, so it's safe to leave
+//! wired in across releases until the codebase finally drops the
+//! module.
+//!
+//! Sub-directories under `$VIDEO_PATH` are intentionally left alone:
+//! every legitimate child of `$VIDEO_PATH` in the new layout is a
+//! 2-char shard directory holding hash subdirs, and those are managed
+//! by `cleanup_orphaned_playlists`.
+
+use std::path::Path;
+
+use log::{info, warn};
+
+/// Counters for what the migration did this run.
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
+pub struct RetireStats {
+    pub deleted_playlists: usize,
+    pub deleted_segments: usize,
+    pub deleted_sentinels: usize,
+    pub deleted_tmp: usize,
+    pub errors: usize,
+}
+
+impl RetireStats {
+    pub fn total_deleted(&self) -> usize {
+        self.deleted_playlists + self.deleted_segments + self.deleted_sentinels + self.deleted_tmp
+    }
+}
+
+/// Delete every legacy basename-keyed HLS artifact at the root of
+/// `video_dir`. Hash dirs (children that are directories) are skipped.
+/// Returns counts so the caller can log a single line summary.
+pub fn retire_legacy_hls_output(video_dir: &Path) -> RetireStats {
+    let mut stats = RetireStats::default();
+
+    let read = match std::fs::read_dir(video_dir) {
+        Ok(r) => r,
+        Err(e) => {
+            warn!(
+                "Legacy HLS migration: cannot read {} ({}); skipping",
+                video_dir.display(),
+                e
+            );
+            return stats;
+        }
+    };
+
+    for entry in read.flatten() {
+        let file_type = match entry.file_type() {
+            Ok(t) => t,
+            Err(_) => continue,
+        };
+        if !file_type.is_file() {
+            // Hash shard directories live here in the new layout.
+            continue;
+        }
+        let path = entry.path();
+        let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
+            continue;
+        };
+
+        let bucket = classify(name);
+        let Some(bucket) = bucket else {
+            continue;
+        };
+
+        match std::fs::remove_file(&path) {
+            Ok(()) => match bucket {
+                LegacyKind::Playlist => stats.deleted_playlists += 1,
+                LegacyKind::Segment => stats.deleted_segments += 1,
+                LegacyKind::Sentinel => stats.deleted_sentinels += 1,
+                LegacyKind::Tmp => stats.deleted_tmp += 1,
+            },
+            Err(e) => {
+                warn!(
+                    "Legacy HLS migration: failed to remove {}: {}",
+                    path.display(),
+                    e
+                );
+                stats.errors += 1;
+            }
+        }
+    }
+
+    if stats.total_deleted() > 0 || stats.errors > 0 {
+        info!(
+            "Legacy HLS migration: deleted {} playlist(s), {} segment(s), {} sentinel(s), {} tmp; {} error(s)",
+            stats.deleted_playlists,
+            stats.deleted_segments,
+            stats.deleted_sentinels,
+            stats.deleted_tmp,
+            stats.errors,
+        );
+    } else {
+        info!(
+            "Legacy HLS migration: nothing to do under {}",
+            video_dir.display()
+        );
+    }
+
+    stats
+}
+
+#[derive(Debug, Clone, Copy)]
+enum LegacyKind {
+    Playlist,
+    Segment,
+    Sentinel,
+    Tmp,
+}
+
+/// Decide whether a flat file at `$VIDEO_PATH` root is legacy HLS
+/// output. Returns `None` for anything else — operator-stashed files,
+/// new-layout files (which don't live here), etc. — so we don't rm them.
+fn classify(name: &str) -> Option<LegacyKind> {
+    // Order matters: sentinel and tmp are more specific suffixes that
+    // sit on top of the .m3u8 / .ts extensions, so check them first.
+    if name.ends_with(".m3u8.unsupported") {
+        return Some(LegacyKind::Sentinel);
+    }
+    if name.ends_with(".m3u8.tmp") {
+        return Some(LegacyKind::Tmp);
+    }
+    if name.ends_with(".m3u8") {
+        return Some(LegacyKind::Playlist);
+    }
+    if name.ends_with(".ts") {
+        return Some(LegacyKind::Segment);
+    }
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+    use tempfile::tempdir;
+
+    #[test]
+    fn classify_recognises_each_legacy_artifact() {
+        assert!(matches!(
+            classify("IMG_0341.MOV.m3u8"),
+            Some(LegacyKind::Playlist)
+        ));
+        assert!(matches!(
+            classify("IMG_0341.MOV_000.ts"),
+            Some(LegacyKind::Segment)
+        ));
+        assert!(matches!(
+            classify("IMG_0341.MOV.m3u8.unsupported"),
+            Some(LegacyKind::Sentinel)
+        ));
+        assert!(matches!(
+            classify("IMG_0341.MOV.m3u8.tmp"),
+            Some(LegacyKind::Tmp)
+        ));
+
+        assert!(classify("README.md").is_none());
+        assert!(classify("ab").is_none()); // shard dir name
+        assert!(classify(".keep").is_none());
+    }
+
+    #[test]
+    fn retire_deletes_legacy_and_leaves_hash_dirs() {
+        let tmp = tempdir().unwrap();
+        let root = tmp.path();
+
+        // Legacy artifacts at root.
+        fs::write(root.join("IMG_0341.MOV.m3u8"), b"#EXTM3U").unwrap();
+        fs::write(root.join("IMG_0341.MOV_000.ts"), b"\x00").unwrap();
+        fs::write(root.join("IMG_0341.MOV_001.ts"), b"\x00").unwrap();
+        fs::write(root.join("clip.MP4.m3u8.unsupported"), b"").unwrap();
+        fs::write(root.join("partial.m3u8.tmp"), b"").unwrap();
+
+        // New-layout hash dir we must NOT touch.
+        let hash_dir = root.join("ab").join("a".repeat(64));
+        fs::create_dir_all(&hash_dir).unwrap();
+        fs::write(hash_dir.join("playlist.m3u8"), b"#EXTM3U").unwrap();
+        fs::write(hash_dir.join("segment_000.ts"), b"\x00").unwrap();
+
+        // Unrelated file we must NOT touch.
+        fs::write(root.join("README.md"), b"don't touch me").unwrap();
+
+        let stats = retire_legacy_hls_output(root);
+        assert_eq!(stats.deleted_playlists, 1);
+        assert_eq!(stats.deleted_segments, 2);
+        assert_eq!(stats.deleted_sentinels, 1);
+        assert_eq!(stats.deleted_tmp, 1);
+        assert_eq!(stats.errors, 0);
+
+        // Legacy artifacts gone.
+        assert!(!root.join("IMG_0341.MOV.m3u8").exists());
+        assert!(!root.join("IMG_0341.MOV_000.ts").exists());
+        assert!(!root.join("clip.MP4.m3u8.unsupported").exists());
+        assert!(!root.join("partial.m3u8.tmp").exists());
+        // Hash dir untouched.
+        assert!(hash_dir.join("playlist.m3u8").exists());
+        assert!(hash_dir.join("segment_000.ts").exists());
+        // Unrelated file untouched.
+        assert!(root.join("README.md").exists());
+    }
+
+    #[test]
+    fn retire_is_idempotent() {
+        let tmp = tempdir().unwrap();
+        let root = tmp.path();
+
+        fs::write(root.join("video.mp4.m3u8"), b"#EXTM3U").unwrap();
+        fs::write(root.join("video.mp4_000.ts"), b"\x00").unwrap();
+
+        let first = retire_legacy_hls_output(root);
+        assert_eq!(first.deleted_playlists + first.deleted_segments, 2);
+
+        let second = retire_legacy_hls_output(root);
+        assert_eq!(second.total_deleted(), 0);
+        assert_eq!(second.errors, 0);
+    }
+
+    #[test]
+    fn retire_handles_missing_dir() {
+        // No panic, no error count blowing up — just a warn + zero stats.
+        let tmp = tempdir().unwrap();
+        let missing = tmp.path().join("does_not_exist");
+        let stats = retire_legacy_hls_output(&missing);
+        assert_eq!(stats.total_deleted(), 0);
+        assert_eq!(stats.errors, 0);
+    }
+}
@@ -9,6 +9,8 @@ use walkdir::WalkDir;

 pub mod actors;
 pub mod ffmpeg;
+pub mod hls_paths;
+pub mod legacy_migration;

 #[allow(dead_code)]
 pub async fn generate_video_gifs() {
@@ -22,7 +22,6 @@ use std::time::{Duration, SystemTime};
 use actix::Addr;
 use chrono::Utc;
 use log::{debug, error, info, warn};
-use walkdir::WalkDir;

 use crate::backfill;
 use crate::content_hash;
@@ -33,6 +32,7 @@ use crate::exif;
 use crate::face_watch;
 use crate::faces;
 use crate::file_types;
+use crate::hls_stats;
 use crate::libraries;
 use crate::library_maintenance;
 use crate::perceptual_hash;
@@ -40,20 +40,34 @@ use crate::tags;
 use crate::tags::SqliteTagDao;
 use crate::thumbnails;
 use crate::video;
-use crate::video::actors::{GeneratePreviewClipMessage, QueueVideosMessage, VideoPlaylistManager};
+use crate::video::actors::{
+    GeneratePreviewClipMessage, QueueVideosMessage, VideoPlaylistManager, VideoToQueue,
+};
+use crate::video::hls_paths;

-/// Clean up orphaned HLS playlists and segments whose source videos no longer exist.
+/// Clean up orphaned HLS hash directories under `$VIDEO_PATH` whose
+/// content_hash no longer appears in `image_exif`.
+///
+/// Walks `<video_path>/<shard>/<hash>/` — the layout written by the
+/// hash-keyed `PlaylistGenerator` — and deletes any hash directory whose
+/// hash isn't in the current DISTINCT set of `image_exif.content_hash`
+/// values. Empty shard parents are reaped on the same pass.
+///
+/// Legacy basename-keyed files at `$VIDEO_PATH` root (from the
+/// pre-content-hash layout) are left alone here; the one-shot startup
+/// migration is responsible for retiring those.
 ///
 /// `libs_lock` is the shared live view of the libraries table — read at the
 /// top of each cleanup pass so a PATCH /libraries/{id} that disables or
 /// re-mounts a library is picked up without a restart.
 pub fn cleanup_orphaned_playlists(
    libs_lock: Arc<RwLock<Vec<libraries::Library>>>,
-    excluded_dirs: Vec<String>,
+    _excluded_dirs: Vec<String>,
    library_health: libraries::LibraryHealthMap,
 ) {
    std::thread::spawn(move || {
-        let video_path = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
+        let video_path_str = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
+        let video_path = PathBuf::from(&video_path_str);

        // Get cleanup interval from environment (default: 24 hours)
        let cleanup_interval_secs = dotenv::var("PLAYLIST_CLEANUP_INTERVAL_SECONDS")
@@ -61,18 +75,14 @@ pub fn cleanup_orphaned_playlists(
            .and_then(|s| s.parse::<u64>().ok())
            .unwrap_or(86400); // 24 hours

-        info!("Starting orphaned playlist cleanup job");
+        info!("Starting orphaned HLS cleanup job");
        info!("  Cleanup interval: {} seconds", cleanup_interval_secs);
-        info!("  Playlist directory: {}", video_path);
-        {
-            let libs = libs_lock.read().unwrap_or_else(|e| e.into_inner());
-            for lib in libs.iter() {
-                info!(
-                    "  Checking sources under '{}' at {}",
-                    lib.name, lib.root_path
-                );
-            }
-        }
+        info!("  HLS directory: {}", video_path.display());
+
+        let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(
+            SqliteExifDao::new(),
+        )
+            as Box<dyn ExifDao>));

        loop {
            std::thread::sleep(Duration::from_secs(cleanup_interval_secs));
@@ -83,22 +93,27 @@ pub fn cleanup_orphaned_playlists(
            let libs: Vec<libraries::Library> =
                libs_lock.read().unwrap_or_else(|e| e.into_inner()).clone();

-            // Safety gate: skip the cleanup cycle if any library is
-            // stale. A missing source video on a stale library is
-            // indistinguishable from a transient unmount, and the
-            // cleanup is destructive — we'd rather leak a few playlist
-            // files for a tick than delete one whose source is briefly
-            // unreachable. The cycle re-runs on the next interval.
+            // Safety gate: skip the cleanup cycle if any (enabled)
+            // library is stale. With hash-keyed layout the orphan
+            // decision is a pure DB query, but the upstream
+            // missing-file scan that *removes* image_exif rows already
+            // pauses for stale libraries — so a stale tick can hold
+            // hashes alive that would otherwise have been GC'd. The
+            // safety is then mostly belt-and-suspenders: a hash that
+            // should have been retired is just kept one tick longer.
+            // We'd rather leak a few hash dirs for 24h than wipe a
+            // hash dir whose source was briefly unreachable.
            {
                let guard = library_health.read().unwrap_or_else(|e| e.into_inner());
                let stale: Vec<String> = libs
                    .iter()
+                    .filter(|lib| lib.enabled)
                    .filter(|lib| guard.get(&lib.id).map(|h| !h.is_online()).unwrap_or(false))
                    .map(|lib| lib.name.clone())
                    .collect();
                if !stale.is_empty() {
                    warn!(
-                        "Skipping orphaned-playlist cleanup: {} library(ies) stale: [{}]",
+                        "Skipping orphaned-HLS cleanup: {} library(ies) stale: [{}]",
                        stale.len(),
                        stale.join(", ")
                    );
@@ -106,116 +121,129 @@ pub fn cleanup_orphaned_playlists(
                }
            }

-            info!("Running orphaned playlist cleanup");
+            info!("Running orphaned HLS cleanup");
            let start = std::time::Instant::now();
-            let mut deleted_count = 0;
-            let mut error_count = 0;

-            // Find all .m3u8 files in VIDEO_PATH
-            let playlists: Vec<PathBuf> = WalkDir::new(&video_path)
-                .into_iter()
-                .filter_map(|e| e.ok())
-                .filter(|e| e.file_type().is_file())
-                .filter(|e| {
-                    e.path()
-                        .extension()
-                        .and_then(|s| s.to_str())
-                        .map(|ext| ext.eq_ignore_ascii_case("m3u8"))
-                        .unwrap_or(false)
-                })
-                .map(|e| e.path().to_path_buf())
-                .collect();
+            // Snapshot every live content_hash currently in image_exif.
+            // We intentionally don't filter by library here — a hash that
+            // lives in any library is alive, even if the library a given
+            // download attributed it to has since been disabled.
+            let alive_hashes: HashSet<String> = {
+                let context = opentelemetry::Context::new();
+                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
+                match dao.list_distinct_content_hashes(&context) {
+                    Ok(hashes) => hashes.into_iter().collect(),
+                    Err(e) => {
+                        error!(
+                            "Failed to load distinct content hashes; skipping HLS cleanup: {:?}",
+                            e
+                        );
+                        continue;
+                    }
+                }
+            };

-            info!("Found {} playlist files to check", playlists.len());
+            let mut deleted_count = 0usize;
+            let mut error_count = 0usize;
+            let mut inspected = 0usize;

-            for playlist_path in playlists {
-                // Extract the original video filename from playlist name
-                // Playlist format: {VIDEO_PATH}/{original_filename}.m3u8
-                if let Some(filename) = playlist_path.file_stem() {
-                    let video_filename = filename.to_string_lossy();
+            // Walk top-level entries of VIDEO_PATH. Each is either a
+            // legacy basename-keyed `.m3u8` / `.ts` (skip — migration
+            // owns those) or a 2-char shard directory.
+            let read_root = match std::fs::read_dir(&video_path) {
+                Ok(r) => r,
+                Err(e) => {
+                    error!(
+                        "HLS cleanup: failed to read VIDEO_PATH {}: {}",
+                        video_path.display(),
+                        e
+                    );
+                    continue;
+                }
+            };

-                    // Search for this video file across every configured
-                    // library, respecting EXCLUDED_DIRS so we don't
-                    // false-resurrect playlists for videos that only
-                    // exist inside an excluded subtree. As soon as one
-                    // library has a matching source, we're done — the
-                    // playlist isn't orphaned.
-                    let mut video_exists = false;
-                    'libs: for lib in &libs {
-                        let effective = lib.effective_excluded_dirs(&excluded_dirs);
-                        for entry in image_api::file_scan::walk_library_files(
-                            Path::new(&lib.root_path),
-                            &effective,
-                        ) {
-                            if let Some(entry_stem) = entry.path().file_stem()
-                                && entry_stem == filename
-                                && file_types::is_video_file(entry.path())
-                            {
-                                video_exists = true;
-                                break 'libs;
-                            }
-                        }
+            for shard_entry in read_root.flatten() {
+                let shard_path = shard_entry.path();
+                if !shard_entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
+                    continue;
+                }
+                let shard_name = match shard_path.file_name().and_then(|n| n.to_str()) {
+                    Some(n) => n.to_owned(),
+                    None => continue,
+                };
+                if !is_hash_shard(&shard_name) {
+                    continue;
+                }
+
+                // Hash dirs inside this shard.
+                let read_shard = match std::fs::read_dir(&shard_path) {
+                    Ok(r) => r,
+                    Err(e) => {
+                        warn!(
+                            "HLS cleanup: failed to read shard {}: {}",
+                            shard_path.display(),
+                            e
+                        );
+                        continue;
+                    }
+                };
+
+                let mut shard_emptied = true;
+                for hash_entry in read_shard.flatten() {
+                    let hash_path = hash_entry.path();
+                    if !hash_entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
+                        shard_emptied = false;
+                        continue;
+                    }
+                    let Some(hash_name) = hash_path
+                        .file_name()
+                        .and_then(|n| n.to_str())
+                        .map(|n| n.to_owned())
+                    else {
+                        shard_emptied = false;
+                        continue;
+                    };
+                    if !is_full_hash(&hash_name) {
+                        shard_emptied = false;
+                        continue;
+                    }
+                    inspected += 1;
+
+                    if alive_hashes.contains(&hash_name) {
+                        shard_emptied = false;
+                        continue;
                    }

-                    if !video_exists {
-                        debug!(
-                            "Source video for playlist {} no longer exists, deleting",
-                            playlist_path.display()
-                        );
-
-                        // Delete the playlist file
-                        if let Err(e) = std::fs::remove_file(&playlist_path) {
+                    debug!(
+                        "HLS cleanup: removing orphan hash dir {}",
+                        hash_path.display()
+                    );
+                    match std::fs::remove_dir_all(&hash_path) {
+                        Ok(()) => deleted_count += 1,
+                        Err(e) => {
                            warn!(
-                                "Failed to delete playlist {}: {}",
-                                playlist_path.display(),
+                                "Failed to delete orphan hash dir {}: {}",
+                                hash_path.display(),
                                e
                            );
                            error_count += 1;
-                        } else {
-                            deleted_count += 1;
-
-                            // Also try to delete associated .ts segment files
-                            // They are typically named {filename}N.ts in the same directory
-                            if let Some(parent_dir) = playlist_path.parent() {
-                                for entry in WalkDir::new(parent_dir)
-                                    .max_depth(1)
-                                    .into_iter()
-                                    .filter_map(|e| e.ok())
-                                    .filter(|e| e.file_type().is_file())
-                                {
-                                    let entry_path = entry.path();
-                                    if let Some(ext) = entry_path.extension()
-                                        && ext.eq_ignore_ascii_case("ts")
-                                    {
-                                        // Check if this .ts file belongs to our playlist
-                                        if let Some(ts_stem) = entry_path.file_stem() {
-                                            let ts_name = ts_stem.to_string_lossy();
-                                            if ts_name.starts_with(&*video_filename) {
-                                                if let Err(e) = std::fs::remove_file(entry_path) {
-                                                    debug!(
-                                                        "Failed to delete segment {}: {}",
-                                                        entry_path.display(),
-                                                        e
-                                                    );
-                                                } else {
-                                                    debug!(
-                                                        "Deleted segment: {}",
-                                                        entry_path.display()
-                                                    );
-                                                }
-                                            }
-                                        }
-                                    }
-                                }
-                            }
+                            shard_emptied = false;
                        }
                    }
                }
+
+                // If this shard now has no surviving hash dirs, reap
+                // the (empty) shard dir too. remove_dir fails if non-
+                // empty, which is the guard.
+                if shard_emptied {
+                    let _ = std::fs::remove_dir(&shard_path);
+                }
            }

            info!(
-                "Orphaned playlist cleanup completed in {:?}: deleted {} playlists, {} errors",
+                "Orphaned HLS cleanup completed in {:?}: inspected {} hash dirs, deleted {} orphans, {} errors",
                start.elapsed(),
+                inspected,
                deleted_count,
                error_count
            );
@@ -223,11 +251,24 @@ pub fn cleanup_orphaned_playlists(
    });
 }

+/// True iff `s` is a two-character lowercase-hex shard prefix.
+fn is_hash_shard(s: &str) -> bool {
+    s.len() == 2 && s.bytes().all(|b| b.is_ascii_hexdigit())
+}
+
+/// True iff `s` looks like a full blake3 hex digest (64 hex chars).
+/// Be strict so we don't accidentally rm a non-HLS directory operators
+/// have stashed under VIDEO_PATH.
+fn is_full_hash(s: &str) -> bool {
+    s.len() == 64 && s.bytes().all(|b| b.is_ascii_hexdigit())
+}
+
 pub fn watch_files(
    libs_lock: Arc<RwLock<Vec<libraries::Library>>>,
    playlist_manager: Addr<VideoPlaylistManager>,
    preview_generator: Addr<video::actors::PreviewClipGenerator>,
    face_client: crate::ai::face_client::FaceClient,
+    clip_client: crate::ai::clip_client::ClipClient,
    excluded_dirs: Vec<String>,
    library_health: libraries::LibraryHealthMap,
 ) {
@@ -260,6 +301,14 @@ pub fn watch_files(
                 or APOLLO_API_BASE_URL to enable)"
            );
        }
+        if clip_client.is_enabled() {
+            info!("  CLIP semantic search: ENABLED");
+        } else {
+            info!(
+                "  CLIP semantic search: DISABLED (set APOLLO_CLIP_API_BASE_URL \
+                 or APOLLO_API_BASE_URL to enable)"
+            );
+        }
        {
            let libs = libs_lock.read().unwrap_or_else(|e| e.into_inner());
            for lib in libs.iter() {
@@ -288,7 +337,12 @@ pub fn watch_files(
        ));

        let mut last_quick_scan = SystemTime::now();
-        let mut last_full_scan = SystemTime::now();
+        // Initialize to UNIX_EPOCH so the *first* tick is treated as a
+        // full scan. That replaces the legacy startup ScanDirectoryMessage
+        // walk for HLS playlists: every library's existing media gets
+        // checked once at watcher boot, instead of waiting up to
+        // full_interval_secs (1h default) for the first natural full scan.
+        let mut last_full_scan = SystemTime::UNIX_EPOCH;
        let mut scan_count = 0u64;

        // Per-library cursor for the missing-file scan. Each tick reads
@@ -418,6 +472,21 @@ pub fn watch_files(
                    );
                }

+                // CLIP embedding backlog. Independent of face detection —
+                // drain runs whenever CLIP is enabled, even on deploys
+                // that don't have the face engine wired up. Mirrors the
+                // face drain shape (capped per tick, no-op when disabled).
+                if clip_client.is_enabled() {
+                    let context = opentelemetry::Context::new();
+                    backfill::process_clip_backlog(
+                        &context,
+                        lib,
+                        &clip_client,
+                        &exif_dao,
+                        &effective_excludes,
+                    );
+                }
+
                // Date-taken backfill: drain rows whose canonical date is
                // either unresolved or only fs_time-sourced. Independent
                // of face detection — runs even on deploys that don't
@@ -531,6 +600,16 @@ pub fn watch_files(
            }

            if is_full_scan {
+                // End-of-full-scan HLS readiness summary: log a single
+                // info line + refresh the Prometheus gauges. Skipped on
+                // quick scans because the cost is non-trivial on big
+                // libraries and the data only meaningfully changes on
+                // full passes.
+                let video_dir_str = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
+                let stats =
+                    hls_stats::compute_and_publish(&libs, &exif_dao, Path::new(&video_dir_str));
+                hls_stats::log_summary(&stats);
+
                last_full_scan = now;
            }
            last_quick_scan = now;
@@ -600,14 +679,18 @@ pub fn process_new_files(
    // Batch query: Get all EXIF data for these files in one query
    let file_paths: Vec<String> = files.iter().map(|(_, rel_path)| rel_path.clone()).collect();

-    let existing_exif_paths: HashMap<String, bool> = {
+    // Map of rel_path -> Option<content_hash>. The presence of the key
+    // tells us "row exists"; the Option value carries the hash for the
+    // HLS pipeline so video files without a hash (mid-backfill) skip
+    // this tick rather than fall back to a basename-colliding playlist.
+    let existing_exif: HashMap<String, Option<String>> = {
        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
        // Walk is per-library, so scope the lookup so a same-named file
        // in another library doesn't make this one look already-indexed.
        match dao.get_exif_batch(&context, Some(library.id), &file_paths) {
            Ok(exif_records) => exif_records
                .into_iter()
-                .map(|record| (record.file_path, true))
+                .map(|record| (record.file_path, record.content_hash))
                .collect(),
            Err(e) => {
                error!("Error batch querying EXIF data: {:?}", e);
@@ -637,7 +720,7 @@ pub fn process_new_files(
            && !bare_legacy_thumb_path.exists()
            && !thumbnails::unsupported_thumbnail_sentinel(&scoped_thumb_path).exists()
            && !thumbnails::unsupported_thumbnail_sentinel(&bare_legacy_thumb_path).exists();
-        let needs_row = !existing_exif_paths.contains_key(relative_path);
+        let needs_row = !existing_exif.contains_key(relative_path);

        if needs_thumbnail || needs_row {
            new_files_found = true;
@@ -796,28 +879,45 @@ pub fn process_new_files(
        }
    }

-    // Check for videos that need HLS playlists
+    // Check for videos that need HLS playlists. All output is keyed on
+    // `content_hash` (see `crate::video::hls_paths`), so files whose
+    // `image_exif.content_hash` is still NULL — typically mid-backfill —
+    // are skipped this tick and picked up after the unhashed backlog
+    // drain populates the hash on a subsequent tick. Skipping is the
+    // correct call: queuing without a hash would either fall back to
+    // basename keying (the bug this refactor fixes) or fabricate one.
    let video_path_base = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
-    let mut videos_needing_playlists = Vec::new();
+    let video_dir = Path::new(&video_path_base);
+    let mut videos_needing_playlists: Vec<VideoToQueue> = Vec::new();
+    let mut hashless_video_count = 0usize;

-    for (file_path, _relative_path) in &files {
-        if file_types::is_video_file(file_path) {
-            // Construct expected playlist path
-            let playlist_filename =
-                format!("{}.m3u8", file_path.file_name().unwrap().to_string_lossy());
-            let playlist_path = Path::new(&video_path_base).join(&playlist_filename);
-
-            // Check if playlist needs (re)generation
-            if playlist_needs_generation(file_path, &playlist_path) {
-                videos_needing_playlists.push(file_path.clone());
-            }
+    for (file_path, relative_path) in &files {
+        if !file_types::is_video_file(file_path) {
+            continue;
+        }
+        let Some(hash) = existing_exif.get(relative_path).and_then(|h| h.clone()) else {
+            hashless_video_count += 1;
+            continue;
+        };
+        let playlist_path = hls_paths::playlist_for_hash(video_dir, &hash);
+        if playlist_needs_generation(file_path, &playlist_path) {
+            videos_needing_playlists.push(VideoToQueue {
+                video_path: file_path.clone(),
+                content_hash: hash,
+            });
        }
    }

-    // Send queue request to playlist manager
+    if hashless_video_count > 0 {
+        debug!(
+            "Watcher tick for '{}': skipped {} video(s) with NULL content_hash (will retry after backfill)",
+            library.name, hashless_video_count
+        );
+    }
+
    if !videos_needing_playlists.is_empty() {
        playlist_manager.do_send(QueueVideosMessage {
-            video_paths: videos_needing_playlists,
+            videos: videos_needing_playlists,
        });
    }

@@ -962,6 +1062,33 @@ mod tests {
        assert!(playlist_needs_generation(&video, &playlist));
    }

+    #[test]
+    fn is_hash_shard_accepts_only_two_hex_chars() {
+        assert!(is_hash_shard("ab"));
+        assert!(is_hash_shard("00"));
+        assert!(is_hash_shard("FF")); // ASCII hexdigit covers upper-case too
+        assert!(!is_hash_shard("a"));
+        assert!(!is_hash_shard("abc"));
+        assert!(!is_hash_shard("zz"));
+        assert!(!is_hash_shard(""));
+        assert!(!is_hash_shard("a/"));
+    }
+
+    #[test]
+    fn is_full_hash_accepts_only_64_hex_chars() {
+        let h64 = "a".repeat(64);
+        assert!(is_full_hash(&h64));
+        let mixed = format!("ab{}", "0".repeat(62));
+        assert!(is_full_hash(&mixed));
+        assert!(!is_full_hash(&"a".repeat(63)));
+        assert!(!is_full_hash(&"a".repeat(65)));
+        assert!(!is_full_hash(&format!("z{}", "a".repeat(63))));
+        // Defends against operator stashing e.g. ".tmp" or "Plex" under
+        // VIDEO_PATH — neither passes the full-hash gate.
+        assert!(!is_full_hash(".tmp"));
+        assert!(!is_full_hash("Plex"));
+    }
+
    #[test]
    fn playlist_needs_generation_true_when_video_missing_metadata() {
        // Video doesn't exist; metadata fails for it. Falls through to the