auto-tag: Apollo tag client + probe binary

Adds ai::tag_client mirroring face_client for Apollo's RAM++ endpoint (APOLLO_TAG_API_BASE_URL falling back to APOLLO_API_BASE_URL), and a throwaway probe_auto_tags binary that walks image_exif and prints tags without writing the DB. Lets us eyeball RAM++ output quality + threshold before committing to a schema and per-tick drain. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 20:01:55 -04:00
95 changed files with 2447 additions and 19324 deletions
@@ -53,60 +53,11 @@ AGENTIC_CHAT_MAX_ITERATIONS=6
 # OPENROUTER_HTTP_REFERER=https://your-site.example
 # OPENROUTER_APP_TITLE=ImageApi

-# ── AI Insights — local backend switch ──────────────────────────────────
-# Picks which local LLM stack the server uses for chat, vision describe,
-# and embeddings. `ollama` (default) uses the OLLAMA_* settings above;
-# `llamacpp` uses the LLAMA_SWAP_* settings below. The switch is global
-# and applies to both `backend=local` and `backend=hybrid` (hybrid keeps
-# chat on OpenRouter but still uses this stack for the describe pass).
-# Don't flip mid-deploy without re-embedding existing index rows —
-# mixed vector spaces break similarity search.
-# LLM_BACKEND=ollama
-
-# ── AI Insights — llama.cpp / llama-swap (optional) ─────────────────────
-# Set LLAMA_SWAP_URL plus LLM_BACKEND=llamacpp to swap the local stack
-# off Ollama. Talks OpenAI-compatible /v1 to a llama-swap proxy fronting
-# per-slot llama-server instances. Chat models receive images directly
-# via content-parts (vision-capable models assumed); a separate vision
-# slot is used only by the describe_photo tool and describe-image utility.
-# LLAMA_SWAP_URL=http://localhost:9292/v1
-# LLAMA_SWAP_PRIMARY_MODEL=chat
-# Optional dedicated vision slot for describe_image. Defaults to
-# PRIMARY_MODEL so describe_photo works without extra config.
-# LLAMA_SWAP_VISION_MODEL=vision
-# LLAMA_SWAP_EMBEDDING_MODEL=embed
-# Comma-separated allowlist surfaced by /insights/models when
-# LLM_BACKEND=llamacpp. All report has_vision=true.
-# LLAMA_SWAP_ALLOWED_MODELS=chat,vision,embed
-# LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180
-
-# ── Unified search translation model (optional) ─────────────────────────
-# /photos/search/unified runs one small LLM call to translate a natural-
-# language query into structured filters + a semantic term, then CLIP-ranks.
-# That step needs an LLM AND CLIP available at once. On a tight VRAM budget a
-# large chat model can't co-reside with CLIP, so pin a small, fast model here
-# (it can stay loaded alongside CLIP and the chat model). Precedence:
-# UNIFIED_SEARCH_MODEL > the client's selected model > the configured default.
-# Use the configured backend (LLM_BACKEND); local only — no hybrid.
-# UNIFIED_SEARCH_MODEL=qwen3-0.6b
-
-# ── Text-to-speech (optional, requires LLAMA_SWAP_URL) ───────────────────
-# TTS routes through the same llama-swap proxy (a Chatterbox model id), so it
-# only needs LLAMA_SWAP_URL — it does NOT require LLM_BACKEND=llamacpp.
-# Powers POST /tts/speech and the /tts/voices* endpoints (read-aloud insights
-# + voice cloning in the mobile app).
-# LLAMA_SWAP_TTS_MODEL=chatterbox        # TTS model id in config.yaml
-# LLAMA_SWAP_TTS_VOICE=m                 # default voice when a request omits one
-# LLAMA_SWAP_TTS_REF_SECONDS=30          # max voice-clone reference clip length (s)
-# LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS=600   # synth timeout (long chunked text)
-
 # ── AI Insights — sibling services (optional) ───────────────────────────
-# Apollo (places, face inference, CLIP encoders). Single-Apollo deploys
-# typically set only APOLLO_API_BASE_URL and let the face + CLIP
-# clients fall back to it.
+# Apollo (places + face inference). Single Apollo deploys typically set
+# only APOLLO_API_BASE_URL and let the face client fall back to it.
 # APOLLO_API_BASE_URL=http://apollo.lan:8000
 # APOLLO_FACE_API_BASE_URL=http://apollo.lan:8000
-# APOLLO_CLIP_API_BASE_URL=http://apollo.lan:8000
 # SMS_API_URL=http://localhost:8000
 # SMS_API_TOKEN=

@@ -129,51 +80,6 @@ FACE_DETECT_TIMEOUT_SEC=60
 FACE_BACKLOG_MAX_PER_TICK=64
 FACE_HASH_BACKFILL_MAX_PER_TICK=2000

-# ── CLIP semantic photo search ──────────────────────────────────────────
-# ImageApi calls Apollo's /api/internal/clip/{encode_image,encode_text}
-# to populate per-photo embeddings during the watcher's backlog drain
-# and to encode user queries at /photos/search time. Disabled when
-# neither APOLLO_CLIP_API_BASE_URL nor APOLLO_API_BASE_URL is set.
-#
-# Per-watcher-tick cap on the encode drain. Default 32 ≈ ~1 photo/sec
-# on CPU, ~30 photos/sec on a single-GPU host (Apollo's threadpool
-# is 1 on CUDA, so concurrency is bounded server-side regardless of
-# our setting). Bump on a fresh deploy to clear the backlog faster.
-CLIP_BACKLOG_MAX_PER_TICK=32
-# Client-side parallel encode calls per drain pass. Apollo's GPU pool
-# serializes server-side; this just overlaps file-IO with inference.
-CLIP_ENCODE_CONCURRENCY=4
-# Per-encode HTTP timeout. CPU-only Apollo deploys may need higher.
-CLIP_REQUEST_TIMEOUT_SEC=60
-
 # ── RAG / search ────────────────────────────────────────────────────────
 # Set to `1` to enable cross-encoder reranking on /search results.
 SEARCH_RAG_RERANK=0
-
-# ── Nightly reel pre-generation (Phase 3+) ──────────────────────────────
-# Set to `1` to enable the scheduler. Disabled by default.
-# REEL_PREGEN_ENABLED=1
-# Hour (0-23) when the nightly batch fires. Default 3 AM.
-# REEL_PREGEN_HOUR=3
-# Day of week for weekly reels (0=Sun, 1=Mon, …). Default Monday.
-# REEL_PREGEN_WEEK_DOW=1
-# Timezone offset in minutes from UTC (e.g., -480 = PST). Defaults to
-# the server's local timezone.
-# REEL_PREGEN_TZ_OFFSET_MINUTES=
-# Fixed timezone offset — overrides auto-detect to avoid DST shifts.
-# When set, both the DB fallback and env fallback use this value.
-# REEL_PREGEN_TZ_FIXED_MINUTES=-480
-# Voice ID for narration (e.g., "grandma"). Falls back to the value
-# stored in the user_ai_prefs DB row when set.
-# REEL_PREGEN_VOICE=
-# Library filter: a library id (e.g. "1") or "all" for every library.
-# REEL_PREGEN_LIBRARY=all
-# Max agentic tool iterations for pre-gen scripter. Default 8.
-# REEL_PREGEN_MAX_TOOL_ITERS=8
-#
-# On-disk reel cache sweep (runs every 24h, independent of pre-gen). Removes
-# reel MP4s with no ledger row + no live job that are older than the max age —
-# i.e. the on-demand cache, which otherwise grows forever. Set to 0 to disable.
-# REEL_CACHE_SWEEP_ENABLED=1
-# Age (days) before an unreferenced reel MP4 is swept. Default 7.
-# REEL_CACHE_MAX_AGE_DAYS=7
@@ -1,9 +0,0 @@
-# Normalize line endings in the repo to LF. Windows checkouts can still
-# present working-copy files as CRLF; this just keeps the committed history
-# stable so contributors on any OS don't see whitespace-only diffs every
-# time someone touches a file.
-* text=auto eol=lf
-
-# Migrations and SQL must be LF — SQLite parsers don't care, but diffing
-# is much cleaner with stable endings.
-*.sql text eol=lf
@@ -5,8 +5,6 @@ database/target
 *.db-shm
 *.db-wal
 .env
-# Server-local TTS pronunciation overrides (tts_pronunciations.example.json is the template)
-/tts_pronunciations.json
 /tmp
 /docs
 /specs
@@ -473,16 +473,10 @@ GET /memories?path=...&recursive=true
 POST /insights/generate              (non-agentic single-shot)
 POST /insights/generate/agentic      (tool-calling loop; body: { file_path, backend?, model?, ... })
 GET  /insights?path=...&library=...
-GET  /insights/models                (local-backend models + capabilities; Ollama OR llama-swap based on LLM_BACKEND)
+GET  /insights/models                (local Ollama models + capabilities)
 GET  /insights/openrouter/models     (curated OpenRouter allowlist)
 POST /insights/rate                  (thumbs up/down for training data)

-// Text-to-Speech (Chatterbox via llama-swap; needs LLAMA_SWAP_URL)
-POST /tts/speech                     (read-aloud: { text, voice?, ... } -> { audio_base64, format })
-GET  /tts/voices                     (Chatterbox voice library)
-POST /tts/voices/upload              (clone a voice from an uploaded clip; multipart)
-POST /tts/voices/from-library        (clone a voice from a library audio/video file)
-
 // Insight Chat Continuation
 POST /insights/chat                  (single-turn reply, non-streaming)
 POST /insights/chat/stream           (SSE: text / tool_call / tool_result / truncated / done)
@@ -637,55 +631,8 @@ OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small  # Optional, embeddings
 OPENROUTER_HTTP_REFERER=https://your-site.example    # Optional attribution header
 OPENROUTER_APP_TITLE=ImageApi                  # Optional attribution header

-# Local LLM backend switch. `ollama` (default) keeps the OLLAMA_* settings
-# above; `llamacpp` swaps the entire local stack (chat + vision describe +
-# embeddings) over to llama-swap. The switch is global and applies to
-# `backend=local` requests and to `backend=hybrid`'s describe pass (hybrid
-# chat still goes to OpenRouter). Don't flip mid-deploy without
-# re-embedding — mixed vector spaces break similarity search.
-LLM_BACKEND=ollama
-
-# Embedding model contract. Corpus and queries must be embedded by the same
-# model with matching prefixes — after changing the embed model or any of
-# these, run `cargo run --bin reembed_embeddings` (all tables) or search is
-# garbage. Prefix values may contain a literal \n (expanded to a newline).
-EMBEDDING_DIM=768           # 768 = nomic-embed-text v1.5; 1024 = Qwen3-Embedding-0.6B
-EMBED_QUERY_PREFIX=         # nomic: "search_query: " | Qwen3: "Instruct: <task>\nQuery: "
-EMBED_DOCUMENT_PREFIX=      # nomic: "search_document: " | Qwen3: leave empty
-
-# llama.cpp / llama-swap (used when LLM_BACKEND=llamacpp). OpenAI-compatible
-# proxy hosting one or more llama-server processes. Chat models receive
-# images directly via content-parts (all models assumed vision-capable).
-LLAMA_SWAP_URL=http://localhost:9292/v1         # Required when LLM_BACKEND=llamacpp
-LLAMA_SWAP_PRIMARY_MODEL=chat                   # Chat slot id (matches config.yaml)
-LLAMA_SWAP_VISION_MODEL=                        # Dedicated vision slot for describe_image / describe_photo
-                                                # tool. Defaults to PRIMARY_MODEL when unset.
-LLAMA_SWAP_EMBEDDING_MODEL=embed                # Embedding slot id
-LLAMA_SWAP_ALLOWED_MODELS=chat,coder            # Curated allowlist surfaced by GET /insights/models
-                                                # when LLM_BACKEND=llamacpp. All report has_vision=true.
-                                                # Empty = picker shows only the configured primary model.
-LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS=180          # Per-request timeout; bump for slow CPU offload
-
-# Text-to-speech (Chatterbox served behind llama-swap). Only needs
-# LLAMA_SWAP_URL — independent of LLM_BACKEND. Powers /tts/speech (read-aloud)
-# and /tts/voices* (voice cloning). Reference audio is ffmpeg-normalized to WAV
-# server-side, so any source format works.
-LLAMA_SWAP_TTS_MODEL=chatterbox                # TTS model id in config.yaml (default: chatterbox)
-LLAMA_SWAP_TTS_VOICE=m                         # Default voice when /tts/speech omits one (optional)
-LLAMA_SWAP_TTS_REF_SECONDS=30                  # Max voice-clone reference clip length, seconds
-                                               # (Chatterbox is zero-shot; ~10-20s clean ref is ideal)
-LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS=600     # Per-request synth timeout (long chunked insights take
-                                               # minutes); overrides the shared client timeout for /tts/speech
-TTS_PRONUNCIATIONS_PATH=tts_pronunciations.json # JSON map of pronunciation overrides applied before synth
-                                               # (see tts_pronunciations.example.json); hot-reloaded on change
-
 # Insight Chat Continuation
 AGENTIC_CHAT_MAX_ITERATIONS=6                  # Cap on tool-calling iterations per chat turn (default 6)
-AGENTIC_CHAT_DEFAULT_NUM_CTX=32768             # Assumed context window for the history-truncation budget
-                                               # when a chat request omits num_ctx (default 32768). Size to
-                                               # the smallest context among the chat models actually served;
-                                               # too small silently guts replayed history every turn (and
-                                               # destroys llama.cpp KV-cache prefix reuse).
 ```

 **AI Insights Fallback Behavior:**
@@ -703,50 +650,10 @@ The `OllamaClient` provides methods to query available models:

 This allows runtime verification of model availability before generating insights.

-**Local backend switch (`LLM_BACKEND`):**
-
-One env var decides which "local" stack the server runs against — `ollama`
-(default) or `llamacpp`. It's global on purpose: chat, vision, and
-embeddings all route through the same backend, so the embedding-vector
-column in SQLite stays in one vector space. Don't flip mid-deploy without
-re-embedding the affected rows — similarity search will collapse.
-
- `LLM_BACKEND=ollama`: chat, vision, and embeddings use Ollama. Vision
-  capability is probed per-model via `/api/show`.
- `LLM_BACKEND=llamacpp`: chat models receive images directly via OpenAI
-  content-parts (all models assumed vision-capable). Embeddings hit the
-  `embed` slot. A dedicated `LLAMA_SWAP_VISION_MODEL` slot (defaults to
-  the chat model) handles `describe_image` for the `describe_photo` tool.
-  Requires `LLAMA_SWAP_URL`.
-
-The per-request `backend=hybrid` override is orthogonal: it always sends
-chat to OpenRouter (text-only, images are pre-described and inlined), but
-the describe + embed passes still route through whichever `LLM_BACKEND`
-is configured.
-
-**Backend dispatch (`ResolvedBackend`):**
-
-`InsightGenerator::resolve_backend(kind, overrides)` is the single entry
-point that builds clients for a request. Returns a `ResolvedBackend` with
-two roles: `.chat()` (the agentic/chat client) and `.local()` (local-only
-utility calls: rerank, describe_image, embeddings). `BackendKind` is an
-enum (`Local` | `Hybrid`) replacing the stringly-typed `"local"` /
-`"hybrid"` labels. `SamplingOverrides` groups model/ctx/temp/top_p/top_k/
-min_p per-request overrides. All downstream code (`execute_tool`,
-`run_streaming_agentic_loop`, etc.) takes `&ResolvedBackend` rather than
-individual client references.
-
-`GET /insights/models` returns the local-backend models with capabilities
-in the same envelope shape regardless of `LLM_BACKEND`: Ollama servers
-when `ollama`, llama-swap slots (from `LLAMA_SWAP_ALLOWED_MODELS`) when
-`llamacpp`. No `/insights/llamacpp/models` — the picker reads a single
-endpoint.
-
 **Hybrid Backend (OpenRouter):**
 - Per-request opt-in via `backend=hybrid` on `POST /insights/generate/agentic`.
- Vision describe happens before the agentic loop; the description is inlined
-  into the chat prompt and the agentic loop runs on OpenRouter. Vision
-  routes through whichever `LLM_BACKEND` is configured.
+- Local Ollama still describes the image (vision); the description is inlined
+  into the chat prompt and the agentic loop runs on OpenRouter.
 - `request.model` (if provided) overrides `OPENROUTER_DEFAULT_MODEL` for that
  call. The mobile picker reads from `OPENROUTER_ALLOWED_MODELS`.
 - No live capability precheck — the operator-curated allowlist is trusted.
@@ -754,15 +661,6 @@ endpoint.
 - `GET /insights/openrouter/models` returns `{ models, default_model, configured }`
  for client picker UIs.

-**Cross-replay matrix (chat continuation):**
- `local → local` allowed (whether served by Ollama or llama-swap; that's
-  a deploy-time decision, not a request-time one).
- `hybrid → hybrid` allowed.
- `hybrid → local` allowed (the inlined description replays as text).
- `local → hybrid` rejected — the stored transcript has raw images in the
-  first user message and OpenRouter providers don't accept that shape
-  consistently. Regenerate the insight in hybrid mode instead.
-
 **Insight Chat Continuation:**

 After an agentic insight is generated, the full `Vec<ChatMessage>` transcript is
@@ -809,17 +707,14 @@ Per-`(library_id, file_path)` async mutex (`AppState.insight_chat.chat_locks`)
 serialises concurrent turns on the same insight so the JSON blob doesn't race.

 Context management is a soft bound: if the serialized history exceeds
-`num_ctx - 2048` tokens (cheap 4-byte/token heuristic; `num_ctx` defaults
-to `AGENTIC_CHAT_DEFAULT_NUM_CTX`, 32768, when the request omits it), the
-oldest assistant-tool_call + tool_result pairs are dropped until under budget. The
+`num_ctx - 2048` tokens (cheap 4-byte/token heuristic), the oldest
+assistant-tool_call + tool_result pairs are dropped until under budget. The
 initial user message (with any images) and system prompt are always preserved.
 The `truncated` event / flag is surfaced to the client when a drop occurred.

 Configurable env:
 - `AGENTIC_CHAT_MAX_ITERATIONS` — cap on tool-calling iterations per turn
  (default 6). Per-request `max_iterations` is clamped to this cap.
- `AGENTIC_CHAT_DEFAULT_NUM_CTX` — assumed context window for the truncation
-  budget when the request omits `num_ctx` (default 32768).

 **Apollo Places integration (optional):**

@@ -2051,7 +2051,7 @@ dependencies = [

 [[package]]
 name = "image-api"
-version = "1.4.0"
+version = "1.1.0"
 dependencies = [
 "actix",
 "actix-cors",
@@ -2104,7 +2104,6 @@ dependencies = [
 "tokio",
 "tokio-util",
 "urlencoding",
- "uuid",
 "walkdir",
 "zerocopy",
 ]
@@ -4392,9 +4391,7 @@ version = "1.23.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76"
 dependencies = [
- "getrandom 0.4.2",
 "js-sys",
- "serde_core",
 "wasm-bindgen",
 ]

@@ -1,6 +1,6 @@
 [package]
 name = "image-api"
-version = "1.4.0"
+version = "1.1.0"
 authors = ["Cameron Cordes <cameronc.dev@gmail.com>"]
 edition = "2024"

@@ -66,7 +66,6 @@ image_hasher = "3.0"
 bk-tree = "0.5"
 async-trait = "0.1"
 indicatif = "0.17"
-uuid = { version = "1.10", features = ["v4", "serde"] }

 # Windows lacks system sqlite3, so re-enable the bundled C build there.
 # Linux/macOS use the system library (faster builds, smaller binary).
@@ -147,56 +147,6 @@ so you can rewrite the saved summary from within chat.
 - `AGENTIC_CHAT_MAX_ITERATIONS` - Cap on tool-calling iterations per chat turn [default: `6`]
  - Per-request `max_iterations` (when sent by the client) is clamped to this cap

-#### Text-to-Speech (Optional)
-Reads insights aloud and manages cloned voices via a Chatterbox model served
-behind the same llama-swap proxy. Only requires `LLAMA_SWAP_URL` (the TTS client
-is built whenever that's set — independent of `LLM_BACKEND`). Endpoints:
- `POST /tts/speech` — body `{ text, voice?, format?, exaggeration?, cfg_weight?,
-  temperature? }`; returns `{ audio_base64, format }`. Input is cleaned
-  server-side (markdown + emoji stripped, then pronunciation overrides applied —
-  see below) and the generation knobs are clamped
-  to Chatterbox's ranges. Synthesis is serialized (one at a time — the upstream
-  has no GPU lock of its own); a concurrent request gets a fast `429`.
- `POST /tts/speech/jobs` — durable variant for long syntheses: same body as
-  `/tts/speech`, returns `202 { job_id, status }` immediately. Jobs queue on the
-  GPU permit instead of fast-failing `429`.
- `GET /tts/speech/jobs/{id}` — poll a job: `{ job_id, status, format,
-  audio_base64?, error? }` with status `queued|running|done|error|cancelled`.
-  Results are kept in memory ~10 min after completion, then the job 404s.
- `DELETE /tts/speech/jobs/{id}` — cancel a queued/running job.
- `GET /tts/voices` — list the voice library. Served from an in-memory cache
-  (so the listing doesn't make llama-swap spin up the TTS model and evict the
-  resident LLM); pass `?refresh=1` to force an upstream re-query. The cache is
-  invalidated by voice create/delete.
- `POST /tts/voices/upload` — multipart `voice_name` + `voice_file`; clone a
-  voice from an uploaded clip (≤25 MB).
- `POST /tts/voices/from-library` — body `{ voice_name, path, library? }`; clone
-  from a library file (audio forwarded as-is; video has its audio extracted via
-  ffmpeg).
- `DELETE /tts/voices/{name}` — remove a cloned voice from the library.
-
-Created voice names are tagged with the ref-clip cap in effect (e.g.
-`grandma-30s`) so the library shows which reference length produced each clone.
-
-Words the model mispronounces (place names, initialisms) can be rewritten
-before synthesis via a JSON map — copy `tts_pronunciations.example.json` to
-`tts_pronunciations.json` and edit; changes apply without a restart. Full
-matching rules are documented in `src/ai/pronunciation.rs`.
-
-Env:
- `TTS_PRONUNCIATIONS_PATH` - pronunciation-override JSON file
-  [default: `tts_pronunciations.json` in the working directory]
- `LLAMA_SWAP_TTS_MODEL` - TTS model id in llama-swap's `config.yaml` [default: `chatterbox`]
- `LLAMA_SWAP_TTS_VOICE` - default voice used when a `/tts/speech` request omits `voice` (optional)
- `LLAMA_SWAP_TTS_REF_SECONDS` - max voice-clone reference clip length in seconds
-  [default: `30`]. Reference audio is ffmpeg-normalized to mono 24 kHz WAV (so any
-  source format works); Chatterbox is zero-shot, so a clean ~10–20s sample is the
-  sweet spot — more rarely helps.
- `LLAMA_SWAP_TTS_REQUEST_TIMEOUT_SECONDS` - per-request synthesis timeout in
-  seconds [default: `600`]. Long insights are chunked + synthesized server-side
-  and can take minutes; this is separate from (and overrides, for `/tts/speech`)
-  the shared `LLAMA_SWAP_REQUEST_TIMEOUT_SECONDS`.
-
 #### Fallback Behavior
 - Primary server is tried first with 5-second connection timeout
 - On failure, automatically falls back to secondary server (if configured)
@@ -1,3 +0,0 @@
-DROP INDEX IF EXISTS idx_image_exif_clip_backfill;
-ALTER TABLE image_exif DROP COLUMN clip_model_version;
-ALTER TABLE image_exif DROP COLUMN clip_embedding;
@@ -1,27 +0,0 @@
-- CLIP semantic photo search: store a per-photo image embedding so
-- text queries can rerank against the live library via cosine
-- similarity. Apollo encodes the bytes via its CLIP service; ImageApi
-- writes the resulting blob here.
--
-- `clip_embedding` is the raw little-endian float32 buffer of an
-- L2-normalized vector (dim depends on the model — 768 bytes×4 for
-- ViT-L/14, 512 bytes×4 for ViT-B/32). Apollo always returns the
-- normalized form so the search-time dot product reduces to a plain
-- cosine similarity.
--
-- `clip_model_version` echoes the upstream `APOLLO_CLIP_MODEL` (e.g.
-- "ViT-L/14"). A model swap shouldn't silently mix geometries — the
-- backfill drain will re-eligibilize rows whose stored model_version
-- differs from the live engine's, and the search route refuses to
-- mix rows from two model_versions in the same response.
-ALTER TABLE image_exif ADD COLUMN clip_embedding BLOB;
-ALTER TABLE image_exif ADD COLUMN clip_model_version TEXT;
-
-- Partial index for the backfill drain. Mirrors the shape of
-- `idx_image_exif_date_backfill`: candidate rows are those with a
-- known content_hash (so we don't race the unhashed backlog) but no
-- embedding yet. SELECT cost stays O(missing rows) instead of full
-- table scan once the column is mostly populated.
-CREATE INDEX IF NOT EXISTS idx_image_exif_clip_backfill
-    ON image_exif (id)
-    WHERE clip_embedding IS NULL AND content_hash IS NOT NULL;
@@ -1,3 +0,0 @@
-DROP INDEX IF EXISTS idx_insight_gen_jobs_status_cleanup;
-DROP INDEX IF EXISTS idx_insight_gen_jobs_file;
-DROP TABLE IF EXISTS insight_generation_jobs;
@@ -1,23 +0,0 @@
-- Track async insight generation jobs so the client can poll for
-- completion after the server returns 202 Accepted. Each generation
-- creates a new row; the application layer cancels prior running
-- jobs before inserting.
-CREATE TABLE insight_generation_jobs (
-    id INTEGER PRIMARY KEY AUTOINCREMENT,
-    library_id INTEGER NOT NULL DEFAULT 1,
-    file_path TEXT NOT NULL,
-    generation_type TEXT NOT NULL,
-    status TEXT NOT NULL DEFAULT 'running',
-    started_at INTEGER NOT NULL,
-    completed_at INTEGER,
-    result_insight_id INTEGER,
-    error_message TEXT
-);
-
-- For the status endpoint: fast lookup by (library_id, file_path)
-CREATE INDEX idx_insight_gen_jobs_file
-    ON insight_generation_jobs(library_id, file_path);
-
-- For startup cleanup (future): prune old completed/failed jobs
-CREATE INDEX idx_insight_gen_jobs_status_cleanup
-    ON insight_generation_jobs(status, started_at);
@@ -1,28 +0,0 @@
-- Restore UNIQUE constraint
-
-CREATE TABLE insight_generation_jobs_new (
-    id INTEGER PRIMARY KEY AUTOINCREMENT,
-    library_id INTEGER NOT NULL DEFAULT 1,
-    file_path TEXT NOT NULL,
-    generation_type TEXT NOT NULL,
-    status TEXT NOT NULL DEFAULT 'running',
-    started_at INTEGER NOT NULL,
-    completed_at INTEGER,
-    result_insight_id INTEGER,
-    error_message TEXT,
-    UNIQUE(library_id, file_path, generation_type)
-);
-
-INSERT INTO insight_generation_jobs_new
-    SELECT id, library_id, file_path, generation_type, status, started_at, completed_at, result_insight_id, error_message
-    FROM insight_generation_jobs;
-
-DROP TABLE insight_generation_jobs;
-
-ALTER TABLE insight_generation_jobs_new RENAME TO insight_generation_jobs;
-
-CREATE INDEX idx_insight_gen_jobs_file
-    ON insight_generation_jobs(library_id, file_path);
-
-CREATE INDEX idx_insight_gen_jobs_status_cleanup
-    ON insight_generation_jobs(status, started_at);
@@ -1,30 +0,0 @@
-- Remove UNIQUE(library_id, file_path, generation_type) constraint to allow
-- multiple job rows per file. This enables proper cancel/regenerate semantics:
-- a new job is always inserted on regenerate, and the old job is cancelled
-- independently. The application layer prevents concurrent running jobs.
-
-CREATE TABLE insight_generation_jobs_new (
-    id INTEGER PRIMARY KEY AUTOINCREMENT,
-    library_id INTEGER NOT NULL DEFAULT 1,
-    file_path TEXT NOT NULL,
-    generation_type TEXT NOT NULL,
-    status TEXT NOT NULL DEFAULT 'running',
-    started_at INTEGER NOT NULL,
-    completed_at INTEGER,
-    result_insight_id INTEGER,
-    error_message TEXT
-);
-
-INSERT INTO insight_generation_jobs_new
-    SELECT id, library_id, file_path, generation_type, status, started_at, completed_at, result_insight_id, error_message
-    FROM insight_generation_jobs;
-
-DROP TABLE insight_generation_jobs;
-
-ALTER TABLE insight_generation_jobs_new RENAME TO insight_generation_jobs;
-
-CREATE INDEX idx_insight_gen_jobs_file
-    ON insight_generation_jobs(library_id, file_path);
-
-CREATE INDEX idx_insight_gen_jobs_status_cleanup
-    ON insight_generation_jobs(status, started_at);
@@ -1,11 +0,0 @@
-- SQLite doesn't support DROP COLUMN before 3.35.0; recreate the table
-- without the new columns. This is only needed for rollback.
-CREATE TABLE photo_insights_old AS
-    SELECT id, library_id, rel_path, title, summary, generated_at,
-           model_version, is_current, training_messages, approved,
-           backend, fewshot_source_ids, content_hash
-    FROM photo_insights;
-
-DROP TABLE photo_insights;
-
-ALTER TABLE photo_insights_old RENAME TO photo_insights;
@@ -1,8 +0,0 @@
-- Persist generation parameters on each insight row for auditing.
-ALTER TABLE photo_insights ADD COLUMN num_ctx INTEGER;
-ALTER TABLE photo_insights ADD COLUMN temperature REAL;
-ALTER TABLE photo_insights ADD COLUMN top_p REAL;
-ALTER TABLE photo_insights ADD COLUMN top_k INTEGER;
-ALTER TABLE photo_insights ADD COLUMN min_p REAL;
-ALTER TABLE photo_insights ADD COLUMN system_prompt TEXT;
-ALTER TABLE photo_insights ADD COLUMN persona_id TEXT;
@@ -1,13 +0,0 @@
-- SQLite doesn't support DROP COLUMN before 3.35.0; recreate the table
-- without the token-count columns. This is only needed for rollback.
-CREATE TABLE photo_insights_old AS
-    SELECT id, library_id, rel_path, title, summary, generated_at,
-           model_version, is_current, training_messages, approved,
-           backend, fewshot_source_ids, content_hash,
-           num_ctx, temperature, top_p, top_k, min_p,
-           system_prompt, persona_id
-    FROM photo_insights;
-
-DROP TABLE photo_insights;
-
-ALTER TABLE photo_insights_old RENAME TO photo_insights;
@@ -1,6 +0,0 @@
-- Persist token usage on each insight row. Split from
-- 2026-05-27-000002_add_insight_generation_params because that
-- migration was already applied on some environments before these
-- columns were added.
-ALTER TABLE photo_insights ADD COLUMN prompt_eval_count INTEGER;
-ALTER TABLE photo_insights ADD COLUMN eval_count INTEGER;
@@ -1,2 +0,0 @@
-DROP INDEX IF EXISTS idx_precomputed_reels_span_library;
-DROP TABLE IF EXISTS precomputed_reels;
@@ -1,14 +0,0 @@
-CREATE TABLE precomputed_reels (
-    id INTEGER PRIMARY KEY AUTOINCREMENT,
-    span TEXT NOT NULL,
-    library_key TEXT NOT NULL,
-    cache_key TEXT NOT NULL,
-    output_path TEXT NOT NULL,
-    title TEXT NOT NULL,
-    media_count INT NOT NULL,
-    render_version INT NOT NULL DEFAULT 1,
-    tz_offset_minutes INT NOT NULL,
-    voice TEXT,
-    generated_at BIGINT NOT NULL
-);
-CREATE INDEX idx_precomputed_reels_span_library ON precomputed_reels(span, library_key, generated_at DESC);
@@ -1 +0,0 @@
-DROP TABLE IF EXISTS user_ai_prefs;
@@ -1,7 +0,0 @@
-CREATE TABLE user_ai_prefs (
-    id INTEGER PRIMARY KEY CHECK(id=1),
-    voice TEXT,
-    tz_offset_minutes INTEGER,
-    library TEXT,
-    updated_at BIGINT NOT NULL
-);
@@ -1,146 +0,0 @@
-use anyhow::{Result, anyhow};
-
-use crate::ai::llm_client::LlmClient;
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum BackendKind {
-    Local,
-    Hybrid,
-}
-
-impl BackendKind {
-    pub fn parse(s: &str) -> Result<Self> {
-        match s.trim().to_lowercase().as_str() {
-            "local" | "" => Ok(Self::Local),
-            "hybrid" => Ok(Self::Hybrid),
-            other => Err(anyhow!(
-                "unknown backend '{}'; expected 'local' or 'hybrid'",
-                other
-            )),
-        }
-    }
-
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            Self::Local => "local",
-            Self::Hybrid => "hybrid",
-        }
-    }
-}
-
-impl std::fmt::Display for BackendKind {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.as_str())
-    }
-}
-
-pub struct SamplingOverrides {
-    pub model: Option<String>,
-    pub num_ctx: Option<i32>,
-    pub temperature: Option<f32>,
-    pub top_p: Option<f32>,
-    pub top_k: Option<i32>,
-    pub min_p: Option<f32>,
-    /// Reasoning toggle. Only the llama.cpp backend honors it (forwarded as
-    /// `chat_template_kwargs.enable_thinking`); other backends ignore it.
-    /// `None` leaves the model/template default in place.
-    pub enable_thinking: Option<bool>,
-}
-
-impl SamplingOverrides {
-    pub fn has_sampling(&self) -> bool {
-        self.temperature.is_some()
-            || self.top_p.is_some()
-            || self.top_k.is_some()
-            || self.min_p.is_some()
-    }
-}
-
-pub struct ResolvedBackend {
-    chat: Box<dyn LlmClient>,
-    local: Box<dyn LlmClient>,
-    pub kind: BackendKind,
-    /// `true` when the chat model receives images directly (Ollama with
-    /// vision, or llamacpp). `false` for hybrid where we describe-then-inline.
-    pub images_inline: bool,
-}
-
-impl ResolvedBackend {
-    pub fn new(
-        chat: Box<dyn LlmClient>,
-        local: Box<dyn LlmClient>,
-        kind: BackendKind,
-        images_inline: bool,
-    ) -> Self {
-        Self {
-            chat,
-            local,
-            kind,
-            images_inline,
-        }
-    }
-
-    pub fn chat(&self) -> &dyn LlmClient {
-        self.chat.as_ref()
-    }
-
-    pub fn local(&self) -> &dyn LlmClient {
-        self.local.as_ref()
-    }
-
-    pub fn model(&self) -> &str {
-        self.chat.primary_model()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn parse_backend_kind() {
-        assert_eq!(BackendKind::parse("local").unwrap(), BackendKind::Local);
-        assert_eq!(BackendKind::parse("hybrid").unwrap(), BackendKind::Hybrid);
-        assert_eq!(BackendKind::parse("  Local ").unwrap(), BackendKind::Local);
-        assert_eq!(BackendKind::parse("HYBRID").unwrap(), BackendKind::Hybrid);
-        assert_eq!(BackendKind::parse("").unwrap(), BackendKind::Local);
-        assert!(BackendKind::parse("vllm").is_err());
-    }
-
-    #[test]
-    fn backend_kind_as_str_roundtrips() {
-        assert_eq!(
-            BackendKind::parse(BackendKind::Local.as_str()).unwrap(),
-            BackendKind::Local
-        );
-        assert_eq!(
-            BackendKind::parse(BackendKind::Hybrid.as_str()).unwrap(),
-            BackendKind::Hybrid
-        );
-    }
-
-    #[test]
-    fn sampling_overrides_has_sampling() {
-        let empty = SamplingOverrides {
-            model: None,
-            num_ctx: None,
-            temperature: None,
-            top_p: None,
-            top_k: None,
-            min_p: None,
-            enable_thinking: None,
-        };
-        assert!(!empty.has_sampling());
-
-        let with_temp = SamplingOverrides {
-            model: None,
-            num_ctx: Some(4096),
-            temperature: Some(0.7),
-            top_p: None,
-            top_k: None,
-            min_p: None,
-            enable_thinking: None,
-        };
-        assert!(with_temp.has_sampling());
-    }
-}
@@ -1,395 +0,0 @@
-//! Thin async HTTP client for Apollo's `/api/internal/clip/*` endpoints.
-//!
-//! Apollo hosts the OpenAI CLIP inference service (ViT-L/14 by default,
-//! configurable via `APOLLO_CLIP_MODEL`). This client is the ImageApi side
-//! of the contract: shove image bytes through `/encode_image` to populate
-//! `image_exif.clip_embedding` during backfill, and call `/encode_text` to
-//! encode a user's natural-language query at search time. The actual
-//! cosine-similarity rerank runs locally in ImageApi.
-//!
-//! Mirrors `face_client.rs` / `tag_client.rs` shape: optional base URL
-//! (None = disabled — feature off, drain and search no-op), reqwest
-//! client with a generous timeout because GPU inference under a backlog
-//! can queue server-side (Apollo's threadpool is bounded to 1 worker on
-//! CUDA).
-//!
-//! Configured via `APOLLO_CLIP_API_BASE_URL`, falling back to
-//! `APOLLO_API_BASE_URL` when the dedicated var is unset (single-Apollo
-//! deploys are the common case).
-//!
-//! Wire format:
-//! - `/encode_image`: multipart/form-data with `file=<bytes>` and
-//!   `meta=<json>` (content_hash / library_id / rel_path for logging).
-//! - `/encode_text`: JSON `{"text": "<query>"}`.
-//!
-//! Both return `{model_version, embedding_dim, duration_ms, embedding}`
-//! where `embedding` is base64 of `dim×4` little-endian float32 bytes,
-//! L2-normalized so the rerank reduces to a plain dot product.
-//!
-//! Error mapping (reflected in [`ClipError`]):
-//! - 422 `decode_failed` / `empty_text` → permanent: ImageApi marks the
-//!   row failed or surfaces the empty-query error to the search caller.
-//! - 503 `cuda_oom` / `engine_unavailable` → defer-and-retry: no marker.
-//! - Any other 5xx / network error → defer.
-
-use anyhow::{Context, Result};
-use base64::Engine;
-use reqwest::Client;
-use serde::{Deserialize, Serialize};
-use std::time::Duration;
-
-#[derive(Debug, Clone, Serialize)]
-pub struct EncodeImageMeta {
-    pub content_hash: String,
-    pub library_id: i32,
-    pub rel_path: String,
-}
-
-#[derive(Debug, Clone, Deserialize)]
-#[allow(dead_code)] // duration_ms logged by the backfill drain
-pub struct EncodeResponse {
-    pub model_version: String,
-    pub embedding_dim: i32,
-    pub duration_ms: i64,
-    /// base64 of `embedding_dim * 4` bytes (LE float32). ImageApi stores
-    /// the decoded bytes verbatim as a BLOB.
-    pub embedding: String,
-}
-
-impl EncodeResponse {
-    /// Decode the wire-format embedding back into raw bytes for storage.
-    /// Validates the buffer is `embedding_dim * 4` bytes long so a
-    /// malformed response surfaces here rather than as a downstream
-    /// silent length mismatch.
-    pub fn decode_embedding(&self) -> Result<Vec<u8>> {
-        let bytes = base64::engine::general_purpose::STANDARD
-            .decode(self.embedding.as_bytes())
-            .context("clip embedding base64 decode")?;
-        let expected = (self.embedding_dim as usize) * 4;
-        if bytes.len() != expected {
-            anyhow::bail!(
-                "clip embedding wrong size: got {} bytes, expected {} ({} * 4)",
-                bytes.len(),
-                expected,
-                self.embedding_dim
-            );
-        }
-        Ok(bytes)
-    }
-}
-
-#[derive(Debug, Clone, Deserialize)]
-#[allow(dead_code)] // load_error consumed by future health probe
-pub struct ClipHealth {
-    pub loaded: bool,
-    pub device: String,
-    pub model_version: String,
-    pub embedding_dim: i32,
-    #[serde(default)]
-    pub load_error: Option<String>,
-}
-
-#[derive(Debug)]
-pub enum ClipError {
-    /// Apollo refused for a reason that won't change on retry (decode
-    /// failure on /encode_image, empty text on /encode_text).
-    Permanent(anyhow::Error),
-    /// Apollo couldn't process this turn but might next time (CUDA OOM,
-    /// engine not loaded, network hiccup).
-    Transient(anyhow::Error),
-    /// Feature is disabled (no `APOLLO_CLIP_API_BASE_URL` /
-    /// `APOLLO_API_BASE_URL`).
-    Disabled,
-}
-
-impl std::fmt::Display for ClipError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            ClipError::Permanent(e) => write!(f, "permanent: {e}"),
-            ClipError::Transient(e) => write!(f, "transient: {e}"),
-            ClipError::Disabled => write!(f, "clip client disabled"),
-        }
-    }
-}
-
-impl std::error::Error for ClipError {}
-
-#[derive(Clone)]
-pub struct ClipClient {
-    client: Client,
-    base_url: Option<String>,
-}
-
-impl ClipClient {
-    pub fn new(base_url: Option<String>) -> Self {
-        let timeout_secs = std::env::var("CLIP_REQUEST_TIMEOUT_SEC")
-            .ok()
-            .and_then(|s| s.parse::<u64>().ok())
-            .unwrap_or(60);
-        let client = Client::builder()
-            .timeout(Duration::from_secs(timeout_secs))
-            .build()
-            .expect("reqwest client build");
-        Self {
-            client,
-            base_url: base_url.map(|u| u.trim_end_matches('/').to_string()),
-        }
-    }
-
-    /// Read both standard env vars. `APOLLO_CLIP_API_BASE_URL` wins;
-    /// fallback to `APOLLO_API_BASE_URL`. Both unset → disabled.
-    pub fn from_env() -> Self {
-        let base = std::env::var("APOLLO_CLIP_API_BASE_URL")
-            .ok()
-            .filter(|s| !s.trim().is_empty())
-            .or_else(|| {
-                std::env::var("APOLLO_API_BASE_URL")
-                    .ok()
-                    .filter(|s| !s.trim().is_empty())
-            });
-        Self::new(base)
-    }
-
-    pub fn is_enabled(&self) -> bool {
-        self.base_url.is_some()
-    }
-
-    /// Encode an image to a 768-d (ViT-L/14) or 512-d (ViT-B/32)
-    /// L2-normalized embedding. Used by the backfill drain.
-    pub async fn encode_image(
-        &self,
-        bytes: Vec<u8>,
-        meta: EncodeImageMeta,
-    ) -> std::result::Result<EncodeResponse, ClipError> {
-        let Some(base) = self.base_url.as_deref() else {
-            return Err(ClipError::Disabled);
-        };
-        let url = format!("{}/api/internal/clip/encode_image", base);
-        let meta_json = serde_json::to_string(&meta)
-            .map_err(|e| ClipError::Permanent(anyhow::anyhow!("meta serialize: {e}")))?;
-        let form = reqwest::multipart::Form::new()
-            .text("meta", meta_json)
-            .part(
-                "file",
-                reqwest::multipart::Part::bytes(bytes)
-                    .file_name(meta.rel_path.clone())
-                    .mime_str("application/octet-stream")
-                    .unwrap_or_else(|_| reqwest::multipart::Part::bytes(Vec::new())),
-            );
-        self.send_multipart(&url, form).await
-    }
-
-    /// Encode a natural-language query to an embedding. Used by the
-    /// search route to rank stored image embeddings by cosine sim.
-    pub async fn encode_text(&self, text: &str) -> std::result::Result<EncodeResponse, ClipError> {
-        let Some(base) = self.base_url.as_deref() else {
-            return Err(ClipError::Disabled);
-        };
-        let url = format!("{}/api/internal/clip/encode_text", base);
-        let body = serde_json::json!({ "text": text });
-
-        let resp = match self.client.post(&url).json(&body).send().await {
-            Ok(r) => r,
-            Err(e) if e.is_timeout() || e.is_connect() => {
-                log::warn!("clip encode_text network error to {url}: {e}");
-                return Err(ClipError::Transient(anyhow::anyhow!(
-                    "clip client network: {e}"
-                )));
-            }
-            Err(e) => {
-                log::warn!("clip encode_text request error to {url}: {e}");
-                return Err(ClipError::Transient(anyhow::anyhow!(
-                    "clip client request: {e}"
-                )));
-            }
-        };
-        let status = resp.status();
-        if status.is_success() {
-            let body: EncodeResponse = resp
-                .json()
-                .await
-                .map_err(|e| ClipError::Transient(anyhow::anyhow!("clip response decode: {e}")))?;
-            return Ok(body);
-        }
-        let body_text = resp.text().await.unwrap_or_default();
-        log::warn!("clip encode_text HTTP {status} from {url}: {body_text}");
-        Err(classify_error_response(status.as_u16(), &body_text))
-    }
-
-    /// Engine reachability + device/model report. Used as a startup
-    /// sanity check from the probe binary and (later) the backlog drain.
-    #[allow(dead_code)] // consumed by probe + drain
-    pub async fn health(&self) -> Result<ClipHealth> {
-        let base = self.base_url.as_deref().context("clip client disabled")?;
-        let url = format!("{}/api/internal/clip/health", base);
-        let resp = self.client.get(&url).send().await?.error_for_status()?;
-        let body: ClipHealth = resp.json().await?;
-        Ok(body)
-    }
-
-    async fn send_multipart(
-        &self,
-        url: &str,
-        form: reqwest::multipart::Form,
-    ) -> std::result::Result<EncodeResponse, ClipError> {
-        let resp = match self.client.post(url).multipart(form).send().await {
-            Ok(r) => r,
-            Err(e) if e.is_timeout() || e.is_connect() => {
-                return Err(ClipError::Transient(anyhow::anyhow!(
-                    "clip client network: {e}"
-                )));
-            }
-            Err(e) => {
-                return Err(ClipError::Transient(anyhow::anyhow!(
-                    "clip client request: {e}"
-                )));
-            }
-        };
-        let status = resp.status();
-        if status.is_success() {
-            let body: EncodeResponse = resp
-                .json()
-                .await
-                .map_err(|e| ClipError::Transient(anyhow::anyhow!("clip response decode: {e}")))?;
-            return Ok(body);
-        }
-        let body_text = resp.text().await.unwrap_or_default();
-        Err(classify_error_response(status.as_u16(), &body_text))
-    }
-}
-
-/// Pulled out as a pure function so the marker-row contract is unit-
-/// testable without spinning up an HTTP server. Matches the shape used
-/// by face_client::classify_error_response so future retry policies
-/// can share code.
-fn classify_error_response(status: u16, body_text: &str) -> ClipError {
-    let detail_code = serde_json::from_str::<serde_json::Value>(body_text)
-        .ok()
-        .and_then(|v| {
-            v.get("detail")
-                .and_then(|d| d.as_str().map(str::to_string))
-                .or_else(|| {
-                    v.get("detail")
-                        .and_then(|d| d.get("code"))
-                        .and_then(|c| c.as_str())
-                        .map(str::to_string)
-                })
-        })
-        .unwrap_or_default();
-
-    if status == 422 {
-        return ClipError::Permanent(anyhow::anyhow!(
-            "clip {} {}: {}",
-            status,
-            detail_code,
-            body_text
-        ));
-    }
-    if status == 503 {
-        return ClipError::Transient(anyhow::anyhow!(
-            "clip {} {}: {}",
-            status,
-            detail_code,
-            body_text
-        ));
-    }
-    // 408 / 413 / 429 are operator-fixable infra issues; defer.
-    if matches!(status, 408 | 413 | 429) {
-        return ClipError::Transient(anyhow::anyhow!(
-            "clip {} {}: {}",
-            status,
-            detail_code,
-            body_text
-        ));
-    }
-    if (400..500).contains(&status) {
-        ClipError::Permanent(anyhow::anyhow!(
-            "clip {} {}: {}",
-            status,
-            detail_code,
-            body_text
-        ))
-    } else {
-        ClipError::Transient(anyhow::anyhow!(
-            "clip {} {}: {}",
-            status,
-            detail_code,
-            body_text
-        ))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn is_permanent(e: &ClipError) -> bool {
-        matches!(e, ClipError::Permanent(_))
-    }
-    fn is_transient(e: &ClipError) -> bool {
-        matches!(e, ClipError::Transient(_))
-    }
-
-    #[test]
-    fn classify_422_decode_failed_is_permanent() {
-        assert!(is_permanent(&classify_error_response(
-            422,
-            r#"{"detail":"decode_failed: bad bytes"}"#
-        )));
-    }
-
-    #[test]
-    fn classify_422_empty_text_is_permanent() {
-        assert!(is_permanent(&classify_error_response(
-            422,
-            r#"{"detail":"empty_text"}"#
-        )));
-    }
-
-    #[test]
-    fn classify_503_cuda_oom_is_transient() {
-        assert!(is_transient(&classify_error_response(
-            503,
-            r#"{"detail":{"code":"cuda_oom","error":"out of memory"}}"#,
-        )));
-    }
-
-    #[test]
-    fn classify_5xx_is_transient_other_4xx_is_permanent() {
-        assert!(is_transient(&classify_error_response(500, "")));
-        assert!(is_permanent(&classify_error_response(404, "{}")));
-    }
-
-    #[test]
-    fn classify_infra_4xx_is_transient() {
-        assert!(is_transient(&classify_error_response(408, "")));
-        assert!(is_transient(&classify_error_response(413, "<html>")));
-        assert!(is_transient(&classify_error_response(429, "{}")));
-    }
-
-    #[test]
-    fn decode_embedding_size_mismatch_errors() {
-        // dim=4 says we expect 16 bytes (4 floats × 4 bytes). Encode 8.
-        use base64::Engine;
-        let resp = EncodeResponse {
-            model_version: "ViT-L/14".into(),
-            embedding_dim: 4,
-            duration_ms: 0,
-            embedding: base64::engine::general_purpose::STANDARD.encode([0u8; 8]),
-        };
-        assert!(resp.decode_embedding().is_err());
-    }
-
-    #[test]
-    fn decode_embedding_round_trip() {
-        use base64::Engine;
-        let bytes: Vec<u8> = (0..16).collect();
-        let resp = EncodeResponse {
-            model_version: "ViT-L/14".into(),
-            embedding_dim: 4,
-            duration_ms: 0,
-            embedding: base64::engine::general_purpose::STANDARD.encode(&bytes),
-        };
-        assert_eq!(resp.decode_embedding().unwrap(), bytes);
-    }
-}
@@ -1,88 +0,0 @@
-// GPU lease — in-process coordination for llama-swap model contention.
-//
-// llama-swap runs the heavyweight models (chat / vision / Chatterbox TTS) as
-// a mutually-exclusive set on one GPU (matrix DSL `(q27 | … | tts) & e`): a
-// request for a non-resident model is HELD by llama-swap until the resident
-// model's in-flight requests drain, then the models swap. That hold counts
-// against the *holder's* reqwest timeout — measured live: a queued TTS burned
-// 77s of its budget behind a single LLM turn, and an LLM request behind a
-// running synthesis waited the entire remaining synth. Uncoordinated
-// cross-model traffic therefore times out instead of queueing.
-//
-// The lease moves that wait into this process, BEFORE the HTTP request is
-// sent and before its timeout starts:
-// - chat/vision requests (the LLM-side slots) share the READ lease;
-// - TTS synthesis and voice-library ops (anything that spins Chatterbox up
-//   and evicts the LLM) take the WRITE lease;
-// - embeddings take NO lease: the `embed` slot is in llama-swap's
-//   always-resident group (the `& e` term) and never participates in a swap,
-//   so leasing it would only stall searches behind a queued synthesis.
-//
-// tokio's RwLock is fair (FIFO, write-preferring): a queued TTS gets the GPU
-// right after the current LLM request drains, and later LLM requests queue
-// behind it — bounded waits in both directions, no starvation, no timeout
-// budget burned while waiting.
-//
-// RULES: hold a lease for exactly one HTTP request (for streaming, the
-// stream's lifetime) and NEVER acquire one while already holding one — once a
-// writer is queued, new read acquisitions block, so nested acquisition can
-// deadlock.
-
-use std::sync::LazyLock;
-use std::time::Instant;
-use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
-
-static GPU_LEASE: LazyLock<RwLock<()>> = LazyLock::new(|| RwLock::new(()));
-
-/// Waits longer than this are logged — they mean a cross-model swap was
-/// avoided and quantify what the request *would* have burned of its timeout.
-const SLOW_WAIT_LOG_SECS: f64 = 2.0;
-
-/// Shared lease for LLM-side requests (chat / vision slots).
-pub async fn llm_lease() -> RwLockReadGuard<'static, ()> {
-    let started = Instant::now();
-    let guard = GPU_LEASE.read().await;
-    log_slow_wait("llm", started);
-    guard
-}
-
-/// Exclusive lease for TTS-side requests (speech synthesis + voice-library
-/// ops that spin up Chatterbox).
-pub async fn tts_lease() -> RwLockWriteGuard<'static, ()> {
-    let started = Instant::now();
-    let guard = GPU_LEASE.write().await;
-    log_slow_wait("tts", started);
-    guard
-}
-
-fn log_slow_wait(kind: &str, started: Instant) {
-    let waited = started.elapsed().as_secs_f64();
-    if waited > SLOW_WAIT_LOG_SECS {
-        log::info!("GPU lease ({kind}): waited {waited:.1}s for the other model class to drain");
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    // One sequential test, not several: the lease is a single global, so
-    // parallel tests interleaving reads and writes on it can hit the very
-    // nested-acquisition deadlock the module comment warns about.
-    #[tokio::test]
-    async fn write_lease_excludes_readers_then_reads_share() {
-        let w = tts_lease().await;
-        // A reader must not acquire while the writer is held.
-        let pending = tokio::spawn(async { drop(llm_lease().await) });
-        tokio::task::yield_now().await;
-        assert!(!pending.is_finished());
-        drop(w);
-        pending.await.expect("reader acquires after writer drops");
-
-        // With no writer queued, read leases are shared.
-        let a = llm_lease().await;
-        let b = llm_lease().await;
-        drop(a);
-        drop(b);
-    }
-}
@@ -170,55 +170,3 @@ pub struct ModelCapabilities {
    pub has_vision: bool,
    pub has_tool_calling: bool,
 }
-
-/// Strip a leading `<think>…</think>` reasoning block from model output.
-///
-/// Thinking models sometimes emit chain-of-thought inside think tags before
-/// the real answer. Everything after the first `</think>` is the answer;
-/// when no tag is present — or the text after it is empty — the trimmed
-/// input is returned unchanged. Mirrors the behavior Ollama's
-/// `extract_final_answer` has applied to single-shot generation; shared here
-/// so the tool-calling final-content paths (agentic generation + chat) can
-/// apply the identical cleanup before parsing / persisting.
-pub fn strip_think_blocks(response: &str) -> String {
-    let response = response.trim();
-
-    if let Some(pos) = response.find("</think>") {
-        let answer = response[pos + "</think>".len()..].trim();
-        if !answer.is_empty() {
-            return answer.to_string();
-        }
-    }
-
-    response.to_string()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn strip_think_blocks_removes_leading_think_block() {
-        let raw = "<think>\nLet me reason about this.\n</think>\n\nTitle: A Day Out\n\nThe body.";
-        assert_eq!(strip_think_blocks(raw), "Title: A Day Out\n\nThe body.");
-    }
-
-    #[test]
-    fn strip_think_blocks_passes_through_plain_content() {
-        assert_eq!(strip_think_blocks("  just an answer  "), "just an answer");
-    }
-
-    #[test]
-    fn strip_think_blocks_keeps_content_when_answer_after_tag_is_empty() {
-        // A think block with nothing after it: better to return the trimmed
-        // original than an empty string (matches Ollama's fallback).
-        let raw = "<think>only thoughts</think>";
-        assert_eq!(strip_think_blocks(raw), raw);
-    }
-
-    #[test]
-    fn strip_think_blocks_handles_unclosed_tag() {
-        let raw = "<think>thinking forever";
-        assert_eq!(strip_think_blocks(raw), raw);
-    }
-}
@@ -1,88 +0,0 @@
-//! Bundle of the local LLM pair (Ollama + optional llama-swap) with the
-//! `LLM_BACKEND` dispatch baked in.
-//!
-//! Exists because passing the pair around as loose values invited the same
-//! bug three times: import/backfill tooling embedded corpora via
-//! `OllamaClient` directly while the query side dispatched through
-//! `embed_one`, so flipping `LLM_BACKEND=llamacpp` silently split queries
-//! and corpus into different vector spaces. Anything that writes or reads
-//! embeddings should go through this type (or `embed_one`/`embed_many`),
-//! never a concrete client.
-//!
-//! Deliberately knows nothing about chat policy — hybrid/OpenRouter routing
-//! is request-scoped and stays in `ResolvedBackend`. This is only the
-//! local stack: embeddings and offline single-shot generation.
-
-// Constructed by binaries, not the server — dead code from main.rs's view.
-#![allow(dead_code)]
-
-use std::sync::Arc;
-
-use anyhow::Result;
-
-use super::llamacpp::LlamaCppClient;
-use super::llm_client::LlmClient;
-use super::ollama::{EMBEDDING_MODEL, OllamaClient};
-
-#[derive(Clone)]
-pub struct LocalLlm {
-    ollama: OllamaClient,
-    llamacpp: Option<Arc<LlamaCppClient>>,
-}
-
-impl LocalLlm {
-    pub fn new(ollama: OllamaClient, llamacpp: Option<Arc<LlamaCppClient>>) -> Self {
-        Self { ollama, llamacpp }
-    }
-
-    /// Construct from the canonical env wiring shared with `AppState`.
-    pub fn from_env() -> Self {
-        Self::new(
-            crate::state::build_ollama_from_env(),
-            crate::state::build_llamacpp_from_env(),
-        )
-    }
-
-    /// Embed a search query (applies `EMBED_QUERY_PREFIX`). Callers must
-    /// pick query vs document — retrieval models treat the two sides
-    /// differently and an unmarked embed invites prefix-mismatch bugs.
-    pub async fn embed_query(&self, text: &str) -> Result<Vec<f32>> {
-        super::embed_query(&self.ollama, self.llamacpp.as_deref(), text).await
-    }
-
-    /// Embed corpus text (applies `EMBED_DOCUMENT_PREFIX`).
-    pub async fn embed_document(&self, text: &str) -> Result<Vec<f32>> {
-        super::embed_document(&self.ollama, self.llamacpp.as_deref(), text).await
-    }
-
-    /// Single-shot local text generation via the `LLM_BACKEND`-selected
-    /// client (offline tooling; chat turns belong to `ResolvedBackend`).
-    pub async fn generate(&self, prompt: &str, system: Option<&str>) -> Result<String> {
-        if super::local_backend_is_llamacpp() {
-            if let Some(lc) = self.llamacpp.as_deref() {
-                return <LlamaCppClient as LlmClient>::generate(lc, prompt, system, None).await;
-            }
-            anyhow::bail!(
-                "LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured — \
-                 set LLAMA_SWAP_URL or switch to LLM_BACKEND=ollama"
-            );
-        }
-        self.ollama.generate(prompt, system).await
-    }
-
-    /// Label identifying which backend + model produces embeddings right
-    /// now. Store it alongside vectors (`model_version` columns) so a
-    /// backend flip is detectable in the data, not just in env history.
-    pub fn embedding_model_version(&self) -> String {
-        if super::local_backend_is_llamacpp() {
-            let slot = self
-                .llamacpp
-                .as_deref()
-                .map(|c| c.embedding_model.as_str())
-                .unwrap_or("embed");
-            format!("llama-swap:{}", slot)
-        } else {
-            EMBEDDING_MODEL.to_string()
-        }
-    }
-}
@@ -1,22 +1,14 @@
 pub mod apollo_client;
-pub mod backend;
-pub mod clip_client;
 pub mod daily_summary_job;
 pub mod face_client;
-pub mod gpu;
 pub mod handlers;
 pub mod insight_chat;
 pub mod insight_generator;
-pub mod llamacpp;
 pub mod llm_client;
-pub mod local_llm;
-pub mod nl_query;
 pub mod ollama;
 pub mod openrouter;
-pub mod pronunciation;
 pub mod sms_client;
-pub mod tts;
-pub mod turn_registry;
+pub mod tag_client;

 // strip_summary_boilerplate is used by binaries (test_daily_summary), not the library
 #[allow(unused_imports)]
@@ -25,29 +17,18 @@ pub use daily_summary_job::{
    generate_daily_summaries, strip_summary_boilerplate,
 };
 pub use handlers::{
-    cancel_generation_handler, cancel_turn_handler, chat_history_handler, chat_rewind_handler,
-    chat_stream_handler, chat_turn_handler, delete_insight_handler, export_training_data_handler,
-    generate_agentic_insight_handler, generate_insight_handler, generation_status_handler,
-    get_all_insights_handler, get_available_models_handler, get_insight_handler,
-    get_insight_history_handler, get_openrouter_models_handler, rate_insight_handler,
-    turn_async_handler, turn_replay_handler,
+    chat_history_handler, chat_rewind_handler, chat_stream_handler, chat_turn_handler,
+    delete_insight_handler, export_training_data_handler, generate_agentic_insight_handler,
+    generate_insight_handler, get_all_insights_handler, get_available_models_handler,
+    get_insight_handler, get_openrouter_models_handler, rate_insight_handler,
 };
 pub use insight_generator::InsightGenerator;
-pub use llamacpp::LlamaCppClient;
 #[allow(unused_imports)]
 pub use llm_client::{
    ChatMessage, LlmClient, ModelCapabilities, Tool, ToolCall, ToolCallFunction, ToolFunction,
 };
-// LocalLlm is constructed by binaries (reembed_embeddings, importers), not the server
-#[allow(unused_imports)]
-pub use local_llm::LocalLlm;
 pub use ollama::{EMBEDDING_MODEL, OllamaClient};
 pub use sms_client::{SmsApiClient, SmsMessage};
-pub use tts::{
-    cancel_speech_job_handler, create_speech_job_handler, create_voice_from_library_handler,
-    create_voice_upload_handler, delete_voice_handler, list_voices_handler,
-    speech_job_status_handler, tts_speech_handler,
-};

 /// Display name used for the user in message transcripts and first-person
 /// prompt text. Reads the `USER_NAME` env var; defaults to `"Me"`. Models
@@ -57,153 +38,3 @@ pub use tts::{
 pub fn user_display_name() -> String {
    std::env::var("USER_NAME").unwrap_or_else(|_| "Me".to_string())
 }
-
-/// One switch for the "local" LLM stack: when `LLM_BACKEND=llamacpp` is
-/// set, chat / vision describe / embeddings all route through llama-swap
-/// instead of Ollama. Any other value (including unset, the default) is
-/// Ollama. This is intentionally global — embeddings must be drawn from
-/// a single source or similarity search across the index breaks (mixed
-/// vector spaces, possibly mixed dims). The `backend=hybrid` per-request
-/// override remains orthogonal: it always sends chat to OpenRouter, and
-/// uses `LLM_BACKEND` for the describe-then-inline vision pass.
-pub fn local_backend_is_llamacpp() -> bool {
-    matches!(
-        std::env::var("LLM_BACKEND")
-            .ok()
-            .as_deref()
-            .map(|s| s.trim().to_lowercase())
-            .as_deref(),
-        Some("llamacpp")
-    )
-}
-
-/// Expected embedding dimensionality, env-overridable via `EMBEDDING_DIM`
-/// (default 768, nomic-embed-text). Every store/query dim check reads this —
-/// swapping to a different-dim model (e.g. Qwen3-Embedding-0.6B at 1024) is
-/// then a config flip plus a `reembed_embeddings` run, not a code change.
-/// Cached for the process lifetime; a flip requires a restart anyway since
-/// the corpus must be re-embedded with it.
-pub fn embedding_dim() -> usize {
-    static DIM: std::sync::OnceLock<usize> = std::sync::OnceLock::new();
-    *DIM.get_or_init(|| {
-        std::env::var("EMBEDDING_DIM")
-            .ok()
-            .and_then(|v| v.parse().ok())
-            .unwrap_or(768)
-    })
-}
-
-/// Read an embedding prefix from the environment. `.env` values can't hold
-/// real newlines, so a literal `\n` in the value is expanded — Qwen3-style
-/// query instructions need one ("Instruct: ...\nQuery: ").
-fn embed_prefix(key: &str) -> String {
-    std::env::var(key)
-        .map(|v| v.replace("\\n", "\n"))
-        .unwrap_or_default()
-}
-
-/// Embed a search query. Applies `EMBED_QUERY_PREFIX` (default empty) —
-/// retrieval models distinguish query-side from document-side text:
-/// nomic v1.5 wants `search_query: `, Qwen3-Embedding wants
-/// `Instruct: <task>\nQuery: `. Must pair with the document prefix the
-/// corpus was embedded with or similarity degrades.
-pub async fn embed_query(
-    ollama: &OllamaClient,
-    llamacpp: Option<&LlamaCppClient>,
-    text: &str,
-) -> anyhow::Result<Vec<f32>> {
-    let prefixed = format!("{}{}", embed_prefix("EMBED_QUERY_PREFIX"), text);
-    embed_one(ollama, llamacpp, &prefixed).await
-}
-
-/// Embed corpus text (the stored side of retrieval). Applies
-/// `EMBED_DOCUMENT_PREFIX` (default empty; nomic v1.5 wants
-/// `search_document: `, Qwen3-Embedding wants none).
-pub async fn embed_document(
-    ollama: &OllamaClient,
-    llamacpp: Option<&LlamaCppClient>,
-    text: &str,
-) -> anyhow::Result<Vec<f32>> {
-    let prefixed = format!("{}{}", embed_prefix("EMBED_DOCUMENT_PREFIX"), text);
-    embed_one(ollama, llamacpp, &prefixed).await
-}
-
-/// Embed a batch of strings via the configured local backend. Routes
-/// through llama-swap when `LLM_BACKEND=llamacpp` (and a client is
-/// configured), else Ollama. See [`local_backend_is_llamacpp`] for the
-/// rationale on consistency.
-pub async fn embed_many(
-    ollama: &OllamaClient,
-    llamacpp: Option<&LlamaCppClient>,
-    texts: &[&str],
-) -> anyhow::Result<Vec<Vec<f32>>> {
-    if local_backend_is_llamacpp() {
-        if let Some(lc) = llamacpp {
-            return <LlamaCppClient as LlmClient>::generate_embeddings(lc, texts).await;
-        }
-        anyhow::bail!(
-            "LLM_BACKEND=llamacpp but LlamaCppClient is unconfigured — \
-             set LLAMA_SWAP_URL or switch to LLM_BACKEND=ollama"
-        );
-    }
-    ollama.generate_embeddings(texts).await
-}
-
-/// Embed one string via the configured local backend. Single-text
-/// convenience over [`embed_many`].
-pub async fn embed_one(
-    ollama: &OllamaClient,
-    llamacpp: Option<&LlamaCppClient>,
-    text: &str,
-) -> anyhow::Result<Vec<f32>> {
-    let mut vecs = embed_many(ollama, llamacpp, &[text]).await?;
-    vecs.pop()
-        .ok_or_else(|| anyhow::anyhow!("embedding backend returned no embeddings"))
-}
-
-#[cfg(test)]
-mod env_dispatch_tests {
-    use super::*;
-
-    /// Env vars are process-global, and the test harness runs in parallel —
-    /// without this lock the `LLM_BACKEND` tests race each other and flake.
-    static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
-
-    fn with_env<F: FnOnce()>(key: &str, val: Option<&str>, f: F) {
-        let _guard = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
-        let prev = std::env::var(key).ok();
-        match val {
-            Some(v) => unsafe { std::env::set_var(key, v) },
-            None => unsafe { std::env::remove_var(key) },
-        }
-        f();
-        match prev {
-            Some(v) => unsafe { std::env::set_var(key, v) },
-            None => unsafe { std::env::remove_var(key) },
-        }
-    }
-
-    #[test]
-    fn llm_backend_defaults_to_ollama() {
-        with_env("LLM_BACKEND", None, || {
-            assert!(!local_backend_is_llamacpp());
-        });
-    }
-
-    #[test]
-    fn llm_backend_llamacpp_case_insensitive() {
-        with_env("LLM_BACKEND", Some("LlamaCpp"), || {
-            assert!(local_backend_is_llamacpp());
-        });
-        with_env("LLM_BACKEND", Some("  llamacpp "), || {
-            assert!(local_backend_is_llamacpp());
-        });
-    }
-
-    #[test]
-    fn llm_backend_unknown_value_is_ollama() {
-        with_env("LLM_BACKEND", Some("vllm"), || {
-            assert!(!local_backend_is_llamacpp());
-        });
-    }
-}
@@ -1,408 +0,0 @@
-//! Natural-language → structured-query translation for unified photo search.
-//!
-//! The unified search endpoint (`/photos/search/unified`, Phase 2) needs to
-//! turn a free-text query like *"sunset photos in Italy from last summer"*
-//! into the structured filter the existing `/photos` engine understands plus
-//! a semantic term for CLIP ranking. That translation is a single grounded
-//! LLM call, isolated here so it can be unit-tested without a network or the
-//! full `InsightGenerator`.
-//!
-//! Two-stage design:
-//!  1. The LLM emits a [`RawNlQuery`] — references are by *name* (tags) and
-//!     dates as ISO strings, never numeric ids it could hallucinate.
-//!  2. [`resolve_raw_query`] maps names against the real tag vocabulary and
-//!     converts ISO dates to unix seconds, producing a [`StructuredQuery`].
-//!     A tag the model invents that isn't in the vocab is surfaced in
-//!     `unmatched_tags` (the caller folds it back into the semantic term)
-//!     rather than silently dropped — this is the anti-noise guard.
-//!
-//! Geocoding of `place` and person filtering are intentionally *not* handled
-//! here: `place` stays as text for the caller to forward-geocode (async, see
-//! `geo::forward_geocode`), and person filtering is deferred until a
-//! person→photos resolver exists.
-
-use crate::ai::llm_client::{ChatMessage, LlmClient, Tool, strip_think_blocks};
-use anyhow::{Result, anyhow};
-use serde::{Deserialize, Serialize};
-
-/// Raw query object as emitted by the LLM. Tag references are by name
-/// (resolved against the real vocab in Rust); dates are ISO `YYYY-MM-DD`.
-/// Every field is optional so a partial / minimal model response still
-/// deserializes.
-#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
-pub struct RawNlQuery {
-    /// Visual/scene description handed to CLIP for ranking. The descriptive
-    /// remainder after structured filters are peeled off.
-    #[serde(default)]
-    pub semantic: Option<String>,
-    /// Tag names the photos must have. Matched case-insensitively against
-    /// the supplied vocabulary; non-matches land in `unmatched_tags`.
-    #[serde(default)]
-    pub tags: Vec<String>,
-    /// Tag names the photos must NOT have.
-    #[serde(default)]
-    pub exclude_tags: Vec<String>,
-    #[serde(default)]
-    pub camera_make: Option<String>,
-    #[serde(default)]
-    pub camera_model: Option<String>,
-    #[serde(default)]
-    pub lens_model: Option<String>,
-    /// Free-text place/location name to forward-geocode (e.g. "Italy").
-    #[serde(default)]
-    pub place: Option<String>,
-    /// Inclusive start date, ISO `YYYY-MM-DD`.
-    #[serde(default)]
-    pub date_from: Option<String>,
-    /// Inclusive end date, ISO `YYYY-MM-DD`.
-    #[serde(default)]
-    pub date_to: Option<String>,
-    /// "photo" | "video" — normalized in [`resolve_raw_query`].
-    #[serde(default)]
-    pub media_type: Option<String>,
-}
-
-/// Resolved structured query: tag names mapped to ids against the real
-/// vocab, ISO dates converted to unix seconds. `place` stays as text for the
-/// caller to forward-geocode into a gps circle. Serializable so the endpoint
-/// can echo it back to the client as "this is how I read your query"
-/// (editable filter chips).
-#[derive(Debug, Clone, Default, PartialEq, Serialize)]
-pub struct StructuredQuery {
-    pub semantic: Option<String>,
-    pub tag_ids: Vec<i32>,
-    pub exclude_tag_ids: Vec<i32>,
-    /// Tag names the model produced that don't exist in the vocabulary.
-    /// The caller folds these back into the semantic term so the concept
-    /// isn't lost — and surfacing them keeps a hallucinated tag from
-    /// silently filtering the whole library to nothing.
-    pub unmatched_tags: Vec<String>,
-    pub camera_make: Option<String>,
-    pub camera_model: Option<String>,
-    pub lens_model: Option<String>,
-    /// Raw place name awaiting forward-geocoding by the caller.
-    pub place: Option<String>,
-    pub date_from: Option<i64>,
-    pub date_to: Option<i64>,
-    /// Normalized to "photo" | "video"; `None` means no media-type filter.
-    pub media_type: Option<String>,
-}
-
-/// Convert an ISO `YYYY-MM-DD` date to a unix timestamp (seconds). With
-/// `end_of_day`, returns 23:59:59 of that day so a `date_to` filter is
-/// inclusive of the whole day; otherwise 00:00:00. Returns `None` for any
-/// unparseable input (the filter is simply omitted rather than erroring).
-pub fn iso_to_unix(date: &str, end_of_day: bool) -> Option<i64> {
-    let d = chrono::NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d").ok()?;
-    let time = if end_of_day {
-        chrono::NaiveTime::from_hms_opt(23, 59, 59)?
-    } else {
-        chrono::NaiveTime::from_hms_opt(0, 0, 0)?
-    };
-    Some(d.and_time(time).and_utc().timestamp())
-}
-
-/// Normalize a free-form media-type string to the engine's vocabulary.
-/// Anything that isn't clearly photo or video (including "all") yields
-/// `None` — no filter.
-fn normalize_media_type(raw: &str) -> Option<String> {
-    match raw.trim().to_lowercase().as_str() {
-        "photo" | "photos" | "image" | "images" | "picture" | "pictures" => {
-            Some("photo".to_string())
-        }
-        "video" | "videos" | "movie" | "movies" | "clip" | "clips" => Some("video".to_string()),
-        _ => None,
-    }
-}
-
-/// Resolve a raw LLM query against the real tag vocabulary, producing the
-/// structured filter. Pure — no network, no LLM — so it carries the
-/// correctness-critical mapping logic under unit test.
-///
-/// `tag_vocab` is `(tag_id, tag_name)` pairs (the shape `TagDao::get_all_tags`
-/// yields once the count is dropped). Matching is case-insensitive and exact
-/// on the trimmed name.
-pub fn resolve_raw_query(raw: RawNlQuery, tag_vocab: &[(i32, String)]) -> StructuredQuery {
-    // Case-insensitive name → id lookup. Built once per call.
-    let lookup: std::collections::HashMap<String, i32> = tag_vocab
-        .iter()
-        .map(|(id, name)| (name.trim().to_lowercase(), *id))
-        .collect();
-
-    let resolve_names = |names: &[String], ids: &mut Vec<i32>, unmatched: &mut Vec<String>| {
-        for name in names {
-            let key = name.trim().to_lowercase();
-            if key.is_empty() {
-                continue;
-            }
-            match lookup.get(&key) {
-                Some(id) if !ids.contains(id) => ids.push(*id),
-                Some(_) => {} // duplicate, already collected
-                None => {
-                    if !unmatched.iter().any(|u| u.eq_ignore_ascii_case(name)) {
-                        unmatched.push(name.trim().to_string());
-                    }
-                }
-            }
-        }
-    };
-
-    let mut tag_ids = Vec::new();
-    let mut unmatched_tags = Vec::new();
-    resolve_names(&raw.tags, &mut tag_ids, &mut unmatched_tags);
-
-    // Excluded tags that don't match a real tag are simply ignored — you
-    // can't exclude a tag that doesn't exist, and folding them into
-    // `semantic` would make no sense.
-    let mut exclude_tag_ids = Vec::new();
-    let mut exclude_unmatched = Vec::new();
-    resolve_names(
-        &raw.exclude_tags,
-        &mut exclude_tag_ids,
-        &mut exclude_unmatched,
-    );
-
-    let clean = |s: Option<String>| s.map(|v| v.trim().to_string()).filter(|v| !v.is_empty());
-
-    StructuredQuery {
-        semantic: clean(raw.semantic),
-        tag_ids,
-        exclude_tag_ids,
-        unmatched_tags,
-        camera_make: clean(raw.camera_make),
-        camera_model: clean(raw.camera_model),
-        lens_model: clean(raw.lens_model),
-        place: clean(raw.place),
-        date_from: raw.date_from.as_deref().and_then(|d| iso_to_unix(d, false)),
-        date_to: raw.date_to.as_deref().and_then(|d| iso_to_unix(d, true)),
-        media_type: raw.media_type.as_deref().and_then(normalize_media_type),
-    }
-}
-
-/// Build the grounded system prompt. The model is told the current date (so
-/// "last summer" resolves) and the exact tag vocabulary (so it uses real
-/// tags or routes the concept to `semantic` instead of inventing one).
-fn build_system_prompt(tag_vocab: &[(i32, String)], today: chrono::NaiveDate) -> String {
-    // Cap the vocab dump so a huge library doesn't blow the context window;
-    // the most-used tags are the ones a query is likely to reference.
-    const MAX_TAGS: usize = 400;
-    let mut names: Vec<&str> = tag_vocab.iter().map(|(_, n)| n.as_str()).collect();
-    names.sort_unstable();
-    names.dedup();
-    let shown = names.len().min(MAX_TAGS);
-    let vocab = names[..shown].join(", ");
-    let truncation = if names.len() > MAX_TAGS {
-        format!(" (showing {MAX_TAGS} of {} tags)", names.len())
-    } else {
-        String::new()
-    };
-
-    format!(
-        "You translate a user's natural-language photo-search request into a JSON \
-filter. Today's date is {today}. Respond with ONLY a JSON object, no prose, no \
-code fences.\n\n\
-Schema (all fields optional):\n\
-{{\n  \
-\"semantic\": string|null,        // visual scene/subject for image similarity search\n  \
-\"tags\": string[],               // ONLY names from the tag list below\n  \
-\"exclude_tags\": string[],       // ONLY names from the tag list below\n  \
-\"camera_make\": string|null,\n  \
-\"camera_model\": string|null,\n  \
-\"lens_model\": string|null,\n  \
-\"place\": string|null,           // a location name to look up (city, country, landmark)\n  \
-\"date_from\": \"YYYY-MM-DD\"|null,  // inclusive\n  \
-\"date_to\": \"YYYY-MM-DD\"|null,    // inclusive\n  \
-\"media_type\": \"photo\"|\"video\"|null\n\
-}}\n\n\
-Rules:\n\
- Put descriptive/visual concepts (\"sunset\", \"crowded beach\", \"red car\") in \"semantic\".\n\
- Only use \"tags\"/\"exclude_tags\" values that appear EXACTLY in the tag list. If a \
-concept isn't a listed tag, put it in \"semantic\" instead — never invent a tag.\n\
- Resolve relative dates against today's date (\"last summer\", \"2023\", \"last month\").\n\
- Put place/location names in \"place\" (not \"semantic\").\n\
- Omit (use null / empty array) anything the request doesn't mention.\n\n\
-Available tags{truncation}: {vocab}"
-    )
-}
-
-/// Extract the JSON object from a model response that may include a leading
-/// `<think>` block, code fences, or trailing prose. Strips the think block
-/// first (so reasoning that mentions braces can't fool the scan), then
-/// returns the substring from the first `{` to the last `}` inclusive — or
-/// the trimmed text if no braces are found (which then fails to parse with a
-/// clear error).
-fn extract_json(raw: &str) -> String {
-    let s = strip_think_blocks(raw);
-    let start = s.find('{');
-    let end = s.rfind('}');
-    match (start, end) {
-        (Some(a), Some(b)) if b >= a => s[a..=b].to_string(),
-        _ => s.trim().to_string(),
-    }
-}
-
-/// Parse a model response string into a [`StructuredQuery`], resolving names
-/// against the vocab. Separated from the LLM call so it's unit-testable.
-pub fn parse_response(response: &str, tag_vocab: &[(i32, String)]) -> Result<StructuredQuery> {
-    let json = extract_json(response);
-    let raw: RawNlQuery = serde_json::from_str(&json)
-        .map_err(|e| anyhow!("failed to parse NL query JSON: {e}; raw response: {response:?}"))?;
-    Ok(resolve_raw_query(raw, tag_vocab))
-}
-
-/// Translate a natural-language query into a [`StructuredQuery`] via one
-/// grounded LLM call. The `client` is any configured backend (the unified
-/// endpoint passes the resolved chat backend); `tag_vocab` grounds the tag
-/// mapping; `today` anchors relative-date resolution.
-pub async fn translate_nl_query(
-    client: &dyn LlmClient,
-    nl: &str,
-    tag_vocab: &[(i32, String)],
-    today: chrono::NaiveDate,
-) -> Result<StructuredQuery> {
-    let system = build_system_prompt(tag_vocab, today);
-    let messages = vec![ChatMessage::system(system), ChatMessage::user(nl)];
-    let (msg, _, _) = client.chat_with_tools(messages, Vec::<Tool>::new()).await?;
-    parse_response(&msg.content, tag_vocab)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn vocab() -> Vec<(i32, String)> {
-        vec![
-            (1, "beach".to_string()),
-            (2, "Sunset".to_string()), // mixed case to exercise case-insensitivity
-            (3, "family".to_string()),
-        ]
-    }
-
-    #[test]
-    fn iso_to_unix_start_and_end_of_day() {
-        // 2023-01-01 UTC midnight = 1672531200.
-        assert_eq!(iso_to_unix("2023-01-01", false), Some(1_672_531_200));
-        // End of that day is 86399 seconds later.
-        assert_eq!(
-            iso_to_unix("2023-01-01", true),
-            Some(1_672_531_200 + 86_399)
-        );
-    }
-
-    #[test]
-    fn iso_to_unix_rejects_garbage() {
-        assert_eq!(iso_to_unix("last summer", false), None);
-        assert_eq!(iso_to_unix("2023-13-99", false), None);
-        assert_eq!(iso_to_unix("", false), None);
-    }
-
-    #[test]
-    fn resolve_matches_tags_case_insensitively() {
-        let raw = RawNlQuery {
-            tags: vec!["BEACH".to_string(), "sunset".to_string()],
-            ..Default::default()
-        };
-        let q = resolve_raw_query(raw, &vocab());
-        assert_eq!(q.tag_ids, vec![1, 2]);
-        assert!(q.unmatched_tags.is_empty());
-    }
-
-    #[test]
-    fn resolve_surfaces_unmatched_tags_not_silently_dropped() {
-        // A hallucinated / non-vocab tag must be surfaced so the caller can
-        // fold it into semantic — never silently used as a hard filter.
-        let raw = RawNlQuery {
-            tags: vec!["beach".to_string(), "golden hour".to_string()],
-            ..Default::default()
-        };
-        let q = resolve_raw_query(raw, &vocab());
-        assert_eq!(q.tag_ids, vec![1]);
-        assert_eq!(q.unmatched_tags, vec!["golden hour".to_string()]);
-    }
-
-    #[test]
-    fn resolve_dedups_repeated_tags() {
-        let raw = RawNlQuery {
-            tags: vec![
-                "beach".to_string(),
-                "Beach".to_string(),
-                "beach".to_string(),
-            ],
-            ..Default::default()
-        };
-        let q = resolve_raw_query(raw, &vocab());
-        assert_eq!(q.tag_ids, vec![1]);
-    }
-
-    #[test]
-    fn resolve_normalizes_media_type_and_dates() {
-        let raw = RawNlQuery {
-            media_type: Some("Videos".to_string()),
-            date_from: Some("2023-06-01".to_string()),
-            date_to: Some("2023-06-30".to_string()),
-            ..Default::default()
-        };
-        let q = resolve_raw_query(raw, &vocab());
-        assert_eq!(q.media_type.as_deref(), Some("video"));
-        assert_eq!(q.date_from, iso_to_unix("2023-06-01", false));
-        assert_eq!(q.date_to, iso_to_unix("2023-06-30", true));
-    }
-
-    #[test]
-    fn resolve_media_type_all_is_no_filter() {
-        let raw = RawNlQuery {
-            media_type: Some("all".to_string()),
-            ..Default::default()
-        };
-        assert_eq!(resolve_raw_query(raw, &vocab()).media_type, None);
-    }
-
-    #[test]
-    fn resolve_trims_and_empties_to_none() {
-        let raw = RawNlQuery {
-            semantic: Some("   ".to_string()),
-            camera_make: Some("  Fujifilm  ".to_string()),
-            place: Some("".to_string()),
-            ..Default::default()
-        };
-        let q = resolve_raw_query(raw, &vocab());
-        assert_eq!(q.semantic, None);
-        assert_eq!(q.camera_make.as_deref(), Some("Fujifilm"));
-        assert_eq!(q.place, None);
-    }
-
-    #[test]
-    fn parse_response_handles_code_fences_and_prose() {
-        let resp = "Here is the filter:\n```json\n{\"semantic\":\"sunset\",\"tags\":[\"beach\"]}\n```\nDone.";
-        let q = parse_response(resp, &vocab()).expect("parse");
-        assert_eq!(q.semantic.as_deref(), Some("sunset"));
-        assert_eq!(q.tag_ids, vec![1]);
-    }
-
-    #[test]
-    fn parse_response_handles_think_block_then_json() {
-        let resp = "<think>user wants beach sunsets</think>{\"tags\":[\"beach\",\"sunset\"]}";
-        let q = parse_response(resp, &vocab()).expect("parse");
-        assert_eq!(q.tag_ids, vec![1, 2]);
-    }
-
-    #[test]
-    fn parse_response_errors_on_non_json() {
-        assert!(parse_response("I cannot help with that.", &vocab()).is_err());
-    }
-
-    #[test]
-    fn build_system_prompt_includes_date_and_vocab() {
-        let today = chrono::NaiveDate::from_ymd_opt(2026, 6, 14).unwrap();
-        let prompt = build_system_prompt(&vocab(), today);
-        assert!(
-            prompt.contains("2026-06-14"),
-            "prompt should state today's date"
-        );
-        assert!(prompt.contains("beach"), "prompt should list the vocab");
-        assert!(
-            prompt.contains("never invent a tag"),
-            "prompt should warn against inventing tags"
-        );
-    }
-}
@@ -360,7 +360,18 @@ impl OllamaClient {
    /// Extract final answer from thinking model output
    /// Handles <think>...</think> tags and takes everything after
    fn extract_final_answer(&self, response: &str) -> String {
-        crate::ai::llm_client::strip_think_blocks(response)
+        let response = response.trim();
+
+        // Look for </think> tag and take everything after it
+        if let Some(pos) = response.find("</think>") {
+            let answer = response[pos + 8..].trim();
+            if !answer.is_empty() {
+                return answer.to_string();
+            }
+        }
+
+        // Fallback: return the whole response trimmed
+        response.to_string()
    }

    async fn try_generate(
@@ -413,7 +424,10 @@ impl OllamaClient {
        self.generate_with_images(prompt, system, None).await
    }

-    #[allow(dead_code)]
+    /// Variant of `generate` that sets Ollama's top-level `think: false`.
+    /// Used by latency-sensitive callers like the rerank pass, where the
+    /// task has nothing to reason about and chain-of-thought tokens are
+    /// wasted wall time. Server-side no-op on non-reasoning models.
    pub async fn generate_no_think(&self, prompt: &str, system: Option<&str>) -> Result<String> {
        self.generate_with_options(prompt, system, None, Some(false))
            .await
@@ -548,16 +562,7 @@ Capture the key moment or theme. Return ONLY the title, nothing else."#,
        let title = self
            .generate_with_images(&prompt, Some(system), None)
            .await?;
-        // Models decorate despite "Return ONLY the title": quotes, bold
-        // markers, sometimes a "Title:" label.
-        use crate::ai::insight_generator::strip_title_markdown;
-        let cleaned = strip_title_markdown(title.trim());
-        let cleaned = cleaned
-            .strip_prefix("Title:")
-            .or_else(|| cleaned.strip_prefix("title:"))
-            .map(strip_title_markdown)
-            .unwrap_or(cleaned);
-        Ok(cleaned.to_string())
+        Ok(title.trim().trim_matches('"').to_string())
    }

    /// Generate a summary for a single photo based on its context
@@ -844,14 +849,11 @@ Analyze the image and use specific details from both the visual content and the
                            if !chunk.message.role.is_empty() {
                                role = chunk.message.role;
                            }
-                            // Ollama ≥0.8 can stream tool_calls incrementally
-                            // across chunks (older servers attach them all to
-                            // one chunk) — append rather than overwrite so
-                            // calls from earlier chunks survive.
+                            // Ollama only attaches tool_calls on the final chunk.
                            if let Some(tcs) = chunk.message.tool_calls
                                && !tcs.is_empty()
                            {
-                                append_streamed_tool_calls(&mut tool_calls, tcs);
+                                tool_calls = Some(tcs);
                            }
                            if chunk.done {
                                prompt_eval_count = chunk.prompt_eval_count;
@@ -1055,14 +1057,13 @@ Analyze the image and use specific details from both the visual content and the
            }
        };

-        // Validate embedding dimensions (EMBEDDING_DIM; 768 for nomic-embed-text:v1.5)
+        // Validate embedding dimensions (should be 768 for nomic-embed-text:v1.5)
        for (i, embedding) in embeddings.iter().enumerate() {
-            if embedding.len() != crate::ai::embedding_dim() {
+            if embedding.len() != 768 {
                log::warn!(
-                    "Unexpected embedding dimensions for item {}: {} (expected {})",
+                    "Unexpected embedding dimensions for item {}: {} (expected 768)",
                    i,
-                    embedding.len(),
-                    crate::ai::embedding_dim()
+                    embedding.len()
                );
            }
        }
@@ -1331,20 +1332,8 @@ struct OllamaEmbedResponse {
    embeddings: Vec<Vec<f32>>,
 }

-/// Accumulate tool calls streamed across NDJSON chunks. Ollama ≥0.8 may
-/// emit each tool call on its own chunk; replacing the accumulator on every
-/// chunk would keep only the last call, so extend instead.
-fn append_streamed_tool_calls(
-    acc: &mut Option<Vec<crate::ai::llm_client::ToolCall>>,
-    new: Vec<crate::ai::llm_client::ToolCall>,
-) {
-    acc.get_or_insert_with(Vec::new).extend(new);
-}
-
 #[cfg(test)]
 mod tests {
-    use super::append_streamed_tool_calls;
-    use crate::ai::llm_client::{ToolCall, ToolCallFunction};

    #[test]
    fn generate_photo_description_prompt_is_concise() {
@@ -1355,38 +1344,4 @@ mod tests {
                      Focus on the people, location, and activity.";
        assert!(prompt.len() < 200, "Prompt should be concise");
    }
-
-    fn call(name: &str) -> ToolCall {
-        ToolCall {
-            id: None,
-            function: ToolCallFunction {
-                name: name.to_string(),
-                arguments: serde_json::json!({}),
-            },
-        }
-    }
-
-    #[test]
-    fn streamed_tool_calls_across_chunks_accumulate() {
-        // Two tool calls arriving in two separate stream chunks must BOTH
-        // survive assembly — the old `tool_calls = Some(tcs)` kept only the
-        // last chunk's calls.
-        let mut acc: Option<Vec<ToolCall>> = None;
-        append_streamed_tool_calls(&mut acc, vec![call("get_sms_messages")]);
-        append_streamed_tool_calls(&mut acc, vec![call("reverse_geocode")]);
-
-        let calls = acc.expect("tool calls accumulated");
-        assert_eq!(calls.len(), 2);
-        assert_eq!(calls[0].function.name, "get_sms_messages");
-        assert_eq!(calls[1].function.name, "reverse_geocode");
-    }
-
-    #[test]
-    fn streamed_tool_calls_single_chunk_batch_kept_intact() {
-        // Older Ollama servers attach all calls to one chunk — unchanged.
-        let mut acc: Option<Vec<ToolCall>> = None;
-        append_streamed_tool_calls(&mut acc, vec![call("a"), call("b")]);
-        let calls = acc.expect("tool calls accumulated");
-        assert_eq!(calls.len(), 2);
-    }
 }
@@ -1,282 +0,0 @@
-// User-configurable pronunciation overrides for TTS. Chatterbox mispronounces
-// place names ("Worcester"), initialisms ("WSL"), and clipped abbreviations
-// ("blvd"), so we rewrite them to phonetic spellings before synthesis.
-//
-// The map lives in a JSON file on the server — a flat object of
-// `"written form": "spoken form"` pairs, e.g.:
-//
-//   {
-//     "Worcester": "Wuster",
-//     "WSL": "W S L",
-//     "blvd": "boulevard",
-//     "Dr.": "Doctor"
-//   }
-//
-// Path comes from `TTS_PRONUNCIATIONS_PATH` (default `tts_pronunciations.json`
-// in the working directory). A missing file simply disables the feature. The
-// file is re-read whenever its mtime changes, so edits apply to the next
-// synthesis without a restart; a malformed edit keeps the last good map and
-// logs the parse error instead of silently dropping all overrides.
-//
-// Matching rules:
-// - Whole words only — `cat` never rewrites `category`. (Boundaries are only
-//   asserted next to word characters, so keys like `Dr.` still work.)
-// - Smartcase: an all-lowercase key matches case-insensitively; a key with
-//   any uppercase matches exactly. That lets `worcester` catch every casing
-//   while `US` (the country) leaves the pronoun `us` alone.
-// - Longer keys win over shorter ones (`New York Times` before `New York`).
-
-use regex::Regex;
-use std::collections::HashMap;
-use std::path::Path;
-use std::sync::{Arc, LazyLock, Mutex as StdMutex};
-use std::time::SystemTime;
-
-/// A compiled pronunciation map: one alternation regex over every key plus
-/// the lookup tables the replacement closure resolves matches against.
-#[derive(Default)]
-struct CompiledMap {
-    /// `None` when the map is empty — apply() is then a no-op.
-    regex: Option<Regex>,
-    /// Case-sensitive entries, keyed verbatim.
-    exact: HashMap<String, String>,
-    /// Case-insensitive entries, keyed lowercased.
-    folded: HashMap<String, String>,
-}
-
-impl CompiledMap {
-    fn from_entries(entries: &HashMap<String, String>) -> Self {
-        let mut keys: Vec<&str> = entries
-            .keys()
-            .map(|k| k.as_str())
-            .filter(|k| !k.trim().is_empty())
-            .collect();
-        if keys.is_empty() {
-            return Self::default();
-        }
-        // Longest key first so overlapping entries prefer the more specific
-        // one (regex alternation is first-match-wins, not longest-match).
-        keys.sort_by(|a, b| b.len().cmp(&a.len()).then(a.cmp(b)));
-
-        let mut exact = HashMap::new();
-        let mut folded = HashMap::new();
-        let alternatives: Vec<String> = keys
-            .iter()
-            .map(|key| {
-                let escaped = regex::escape(key);
-                // Only assert a word boundary where the key edge is a word
-                // character — `\b` adjacent to punctuation (e.g. the dot in
-                // `Dr.`) would otherwise never match.
-                let lead = if key
-                    .chars()
-                    .next()
-                    .is_some_and(|c| c.is_alphanumeric() || c == '_')
-                {
-                    r"\b"
-                } else {
-                    ""
-                };
-                let trail = if key
-                    .chars()
-                    .last()
-                    .is_some_and(|c| c.is_alphanumeric() || c == '_')
-                {
-                    r"\b"
-                } else {
-                    ""
-                };
-                let case_sensitive = key.chars().any(|c| c.is_uppercase());
-                if case_sensitive {
-                    exact.insert(key.to_string(), entries[*key].clone());
-                    format!("{lead}{escaped}{trail}")
-                } else {
-                    folded.insert(key.to_lowercase(), entries[*key].clone());
-                    format!("{lead}(?i:{escaped}){trail}")
-                }
-            })
-            .collect();
-
-        // Escaped fixed strings can't produce an invalid pattern; if one ever
-        // does, treat the whole map as empty rather than panicking a handler.
-        let pattern = alternatives.join("|");
-        let regex = match Regex::new(&pattern) {
-            Ok(r) => Some(r),
-            Err(e) => {
-                log::error!("pronunciation map failed to compile: {e}");
-                None
-            }
-        };
-        Self {
-            regex,
-            exact,
-            folded,
-        }
-    }
-
-    fn apply(&self, text: &str) -> String {
-        let Some(re) = &self.regex else {
-            return text.to_string();
-        };
-        re.replace_all(text, |caps: &regex::Captures| {
-            let m = &caps[0];
-            self.exact
-                .get(m)
-                .or_else(|| self.folded.get(&m.to_lowercase()))
-                .cloned()
-                // Unreachable in practice — every alternative came from one
-                // of the two maps — but never drop the user's text.
-                .unwrap_or_else(|| m.to_string())
-        })
-        .into_owned()
-    }
-}
-
-struct CacheEntry {
-    mtime: Option<SystemTime>,
-    compiled: Arc<CompiledMap>,
-}
-
-static CACHE: LazyLock<StdMutex<Option<CacheEntry>>> = LazyLock::new(|| StdMutex::new(None));
-
-fn config_path() -> String {
-    std::env::var("TTS_PRONUNCIATIONS_PATH")
-        .ok()
-        .map(|s| s.trim().to_string())
-        .filter(|s| !s.is_empty())
-        .unwrap_or_else(|| "tts_pronunciations.json".to_string())
-}
-
-/// Load the compiled map, re-reading the file only when its mtime changed
-/// since the last call (or it appeared/disappeared). Synthesis is serialized
-/// on a single GPU permit, so a stat per call is noise.
-fn current_map() -> Arc<CompiledMap> {
-    let path_s = config_path();
-    let path = Path::new(&path_s);
-    let mtime = std::fs::metadata(path).and_then(|m| m.modified()).ok();
-
-    let mut cache = CACHE.lock().unwrap();
-    if let Some(entry) = cache.as_ref()
-        && entry.mtime == mtime
-    {
-        return entry.compiled.clone();
-    }
-
-    let compiled = match mtime {
-        None => Arc::new(CompiledMap::default()), // no file → no overrides
-        Some(_) => match std::fs::read_to_string(path)
-            .map_err(anyhow::Error::from)
-            .and_then(|s| Ok(serde_json::from_str::<HashMap<String, String>>(&s)?))
-        {
-            Ok(entries) => {
-                log::info!(
-                    "loaded {} pronunciation override(s) from {path_s}",
-                    entries.len()
-                );
-                Arc::new(CompiledMap::from_entries(&entries))
-            }
-            Err(e) => {
-                log::error!("failed to load pronunciation map {path_s}: {e}");
-                // Keep serving the previous map rather than regressing to
-                // none mid-edit; still record the new mtime so the error
-                // logs once per bad save, not once per synthesis.
-                cache
-                    .as_ref()
-                    .map(|c| c.compiled.clone())
-                    .unwrap_or_default()
-            }
-        },
-    };
-    *cache = Some(CacheEntry {
-        mtime,
-        compiled: compiled.clone(),
-    });
-    compiled
-}
-
-/// Rewrite configured words/abbreviations to their phonetic spellings.
-/// Call on cleaned (post-markdown-strip) text, right before synthesis.
-pub fn apply_pronunciations(text: &str) -> String {
-    current_map().apply(text)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn compile(pairs: &[(&str, &str)]) -> CompiledMap {
-        let entries = pairs
-            .iter()
-            .map(|(k, v)| (k.to_string(), v.to_string()))
-            .collect();
-        CompiledMap::from_entries(&entries)
-    }
-
-    #[test]
-    fn empty_map_is_a_noop() {
-        let m = compile(&[]);
-        assert_eq!(m.apply("nothing changes"), "nothing changes");
-    }
-
-    #[test]
-    fn replaces_whole_words_only() {
-        let m = compile(&[("cat", "kitty")]);
-        assert_eq!(m.apply("the cat sat"), "the kitty sat");
-        // No substring rewrites.
-        assert_eq!(m.apply("the category"), "the category");
-        assert_eq!(m.apply("concatenate"), "concatenate");
-    }
-
-    #[test]
-    fn lowercase_keys_match_any_casing() {
-        let m = compile(&[("worcester", "Wuster")]);
-        assert_eq!(m.apply("Worcester is nice"), "Wuster is nice");
-        assert_eq!(m.apply("in WORCESTER today"), "in Wuster today");
-        assert_eq!(m.apply("worcester sauce"), "Wuster sauce");
-    }
-
-    #[test]
-    fn uppercase_keys_match_case_sensitively() {
-        let m = compile(&[("US", "U S")]);
-        assert_eq!(m.apply("the US economy"), "the U S economy");
-        // The pronoun survives.
-        assert_eq!(m.apply("join us today"), "join us today");
-    }
-
-    #[test]
-    fn keys_with_punctuation_work() {
-        // `\b` is only asserted next to word characters, so the trailing dot
-        // doesn't break matching.
-        let m = compile(&[("Dr.", "Doctor"), ("blvd", "boulevard")]);
-        assert_eq!(
-            m.apply("Dr. Smith on Sunset blvd"),
-            "Doctor Smith on Sunset boulevard"
-        );
-    }
-
-    #[test]
-    fn longer_keys_win_over_shorter() {
-        let m = compile(&[("new york", "Noo York"), ("new york times", "the Times")]);
-        assert_eq!(m.apply("read the new york times"), "read the the Times");
-        assert_eq!(m.apply("visit new york soon"), "visit Noo York soon");
-    }
-
-    #[test]
-    fn multiple_occurrences_all_rewrite() {
-        let m = compile(&[("wsl", "W S L")]);
-        assert_eq!(m.apply("WSL and wsl and Wsl"), "W S L and W S L and W S L");
-    }
-
-    #[test]
-    fn replacement_text_is_verbatim() {
-        // Replacements aren't re-scanned — a value containing another key
-        // doesn't cascade.
-        let m = compile(&[("a1", "b2"), ("b2", "c3")]);
-        assert_eq!(m.apply("a1"), "b2");
-    }
-
-    #[test]
-    fn blank_keys_are_ignored() {
-        let m = compile(&[("", "x"), ("  ", "y"), ("ok", "fine")]);
-        assert_eq!(m.apply("ok then"), "fine then");
-    }
-}
@@ -281,9 +281,6 @@ impl SmsApiClient {
        if let Some(cid) = params.contact_id {
            url.push_str(&format!("&contact_id={}", cid));
        }
-        if let Some(ref c) = params.contact {
-            url.push_str(&format!("&contact={}", urlencoding::encode(c)));
-        }
        if let Some(off) = params.offset {
            url.push_str(&format!("&offset={}", off));
        }
@@ -416,9 +413,6 @@ pub struct SmsSearchParams<'a> {
    pub mode: &'a str,
    pub limit: usize,
    pub contact_id: Option<i64>,
-    /// Contact name (case-insensitive). Resolved to a numeric ID by the
-    /// SMS-API server when `contact_id` is not set.
-    pub contact: Option<String>,
    /// Unix-seconds inclusive lower bound on `date`.
    pub date_from: Option<i64>,
    /// Unix-seconds inclusive upper bound on `date`.
@@ -0,0 +1,319 @@
+//! Thin async HTTP client for Apollo's `/api/internal/tags/*` endpoints.
+//!
+//! Apollo hosts the RAM++ auto-tag inference service alongside insightface.
+//! This client is the ImageApi side — shove image bytes through `/auto` and
+//! get back a list of `(name, confidence)` predictions over RAM++'s
+//! ~4585-tag vocabulary.
+//!
+//! Mirrors `face_client.rs` shape: optional base URL (None = disabled), one
+//! reqwest client with a generous timeout because GPU inference under a
+//! backlog can queue server-side (Apollo's threadpool is bounded to 1
+//! worker on CUDA).
+//!
+//! Configured via `APOLLO_TAG_API_BASE_URL`, falling back to
+//! `APOLLO_API_BASE_URL` when the dedicated var is unset (single-Apollo
+//! deploys are the common case). Both unset → `is_enabled()` returns false
+//! and the probe binary / future backlog drain no-op.
+//!
+//! Wire format: multipart/form-data with `file=<bytes>` and `meta=<json>`.
+//! `meta` carries `{content_hash, library_id, rel_path, threshold?}` —
+//! Apollo logs the path/lib for traceability and reads `threshold` to
+//! override the engine default for that call (the probe binary uses this
+//! to sweep without restarting Apollo).
+//!
+//! Error mapping (reflected in [`TagDetectError`]):
+//! - 422 `decode_failed` → permanent: ImageApi marks `status='failed'` and
+//!   doesn't retry until a manual rerun.
+//! - 200 with `tags:[]` → `status='no_tags'` marker (success-with-zero).
+//! - 503 `cuda_oom` / `engine_unavailable` → defer-and-retry: no marker
+//!   written.
+//! - Any other 5xx / network error → defer.
+
+use anyhow::{Context, Result};
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+#[derive(Debug, Clone, Serialize)]
+pub struct TagMeta {
+    pub content_hash: String,
+    pub library_id: i32,
+    pub rel_path: String,
+    /// Per-call threshold override. Apollo's engine default (0.68 for
+    /// ram_plus_swin_large_14m) is used when unset. The probe binary
+    /// uses this to sweep without restarting Apollo.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub threshold: Option<f32>,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+pub struct TagPrediction {
+    pub name: String,
+    pub confidence: f32,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+pub struct TagResponse {
+    pub model_version: String,
+    pub duration_ms: i64,
+    pub threshold: f32,
+    pub tags: Vec<TagPrediction>,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+#[allow(dead_code)] // Reported by Apollo; load_error consumed by future health probe
+pub struct TagHealth {
+    pub loaded: bool,
+    pub device: String,
+    pub model_version: String,
+    pub image_size: i32,
+    pub threshold: f32,
+    #[serde(default)]
+    pub load_error: Option<String>,
+}
+
+/// Distinguishes permanent failures (don't retry) from transient ones
+/// (defer and retry on next scan tick). Mirrors `FaceDetectError` so the
+/// future backlog drain can use the same marker-row decision tree.
+#[derive(Debug)]
+pub enum TagDetectError {
+    /// Apollo refused the bytes for a reason that won't change on retry
+    /// (decode failure, zero-dim image). Mark `status='failed'`.
+    Permanent(anyhow::Error),
+    /// Apollo couldn't process this turn but might next time (CUDA OOM,
+    /// engine not loaded yet, network hiccup). Don't mark anything.
+    Transient(anyhow::Error),
+    /// Feature is disabled (no APOLLO_TAG_API_BASE_URL / APOLLO_API_BASE_URL).
+    /// Caller should silently no-op.
+    Disabled,
+}
+
+impl std::fmt::Display for TagDetectError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            TagDetectError::Permanent(e) => write!(f, "permanent: {e}"),
+            TagDetectError::Transient(e) => write!(f, "transient: {e}"),
+            TagDetectError::Disabled => write!(f, "tag client disabled"),
+        }
+    }
+}
+
+impl std::error::Error for TagDetectError {}
+
+#[derive(Clone)]
+pub struct TagClient {
+    client: Client,
+    /// `None` → disabled. Trailing slash trimmed at construction so url
+    /// building doesn't double up.
+    base_url: Option<String>,
+}
+
+impl TagClient {
+    pub fn new(base_url: Option<String>) -> Self {
+        // 60 s timeout: GPU inference is fast (~50–150 ms on RTX-class
+        // hardware) but Apollo's 1-worker threadpool means a backlog drain
+        // queues server-side. 60 s is enough headroom for a small queue
+        // depth without surfacing a false transient.
+        let timeout_secs = std::env::var("TAG_DETECT_TIMEOUT_SEC")
+            .ok()
+            .and_then(|s| s.parse::<u64>().ok())
+            .unwrap_or(60);
+        let client = Client::builder()
+            .timeout(Duration::from_secs(timeout_secs))
+            .build()
+            .expect("reqwest client build");
+        Self {
+            client,
+            base_url: base_url.map(|u| u.trim_end_matches('/').to_string()),
+        }
+    }
+
+    /// Construct a client from the standard env vars. APOLLO_TAG_API_BASE_URL
+    /// wins; falls back to APOLLO_API_BASE_URL. Both unset → disabled.
+    pub fn from_env() -> Self {
+        let base = std::env::var("APOLLO_TAG_API_BASE_URL")
+            .ok()
+            .filter(|s| !s.trim().is_empty())
+            .or_else(|| {
+                std::env::var("APOLLO_API_BASE_URL")
+                    .ok()
+                    .filter(|s| !s.trim().is_empty())
+            });
+        Self::new(base)
+    }
+
+    pub fn is_enabled(&self) -> bool {
+        self.base_url.is_some()
+    }
+
+    /// Run RAM++ auto-tagging over `bytes`. Empty `tags[]` is the no-tags
+    /// signal — caller writes a marker row in the persistence phase.
+    pub async fn auto_tag(
+        &self,
+        bytes: Vec<u8>,
+        meta: TagMeta,
+    ) -> std::result::Result<TagResponse, TagDetectError> {
+        let Some(base) = self.base_url.as_deref() else {
+            return Err(TagDetectError::Disabled);
+        };
+        let url = format!("{}/api/internal/tags/auto", base);
+        self.post_multipart(&url, bytes, &meta).await
+    }
+
+    /// Engine reachability + device/model report.
+    #[allow(dead_code)] // consumed by future startup probe
+    pub async fn health(&self) -> Result<TagHealth> {
+        let base = self.base_url.as_deref().context("tag client disabled")?;
+        let url = format!("{}/api/internal/tags/health", base);
+        let resp = self.client.get(&url).send().await?.error_for_status()?;
+        let body: TagHealth = resp.json().await?;
+        Ok(body)
+    }
+
+    async fn post_multipart(
+        &self,
+        url: &str,
+        bytes: Vec<u8>,
+        meta: &TagMeta,
+    ) -> std::result::Result<TagResponse, TagDetectError> {
+        let meta_json = serde_json::to_string(meta)
+            .map_err(|e| TagDetectError::Permanent(anyhow::anyhow!("meta serialize: {e}")))?;
+        let form = reqwest::multipart::Form::new()
+            .text("meta", meta_json)
+            .part(
+                "file",
+                reqwest::multipart::Part::bytes(bytes)
+                    .file_name(meta.rel_path.clone())
+                    .mime_str("application/octet-stream")
+                    .unwrap_or_else(|_| reqwest::multipart::Part::bytes(Vec::new())),
+            );
+
+        let resp = match self.client.post(url).multipart(form).send().await {
+            Ok(r) => r,
+            Err(e) if e.is_timeout() || e.is_connect() => {
+                return Err(TagDetectError::Transient(anyhow::anyhow!(
+                    "tag client network: {e}"
+                )));
+            }
+            Err(e) => {
+                return Err(TagDetectError::Transient(anyhow::anyhow!(
+                    "tag client request: {e}"
+                )));
+            }
+        };
+
+        let status = resp.status();
+        if status.is_success() {
+            let body: TagResponse = resp.json().await.map_err(|e| {
+                TagDetectError::Transient(anyhow::anyhow!("tag response decode: {e}"))
+            })?;
+            return Ok(body);
+        }
+
+        let body_text = resp.text().await.unwrap_or_default();
+        Err(classify_error_response(status.as_u16(), &body_text))
+    }
+}
+
+/// Pulled out as a pure function so the marker-row contract is unit-testable
+/// without spinning up an HTTP server. Behavior matches face_client::classify
+/// so the future backlog drain can share the same retry policy.
+fn classify_error_response(status: u16, body_text: &str) -> TagDetectError {
+    let detail_code = serde_json::from_str::<serde_json::Value>(body_text)
+        .ok()
+        .and_then(|v| {
+            v.get("detail")
+                .and_then(|d| d.as_str().map(str::to_string))
+                .or_else(|| {
+                    v.get("detail")
+                        .and_then(|d| d.get("code"))
+                        .and_then(|c| c.as_str())
+                        .map(str::to_string)
+                })
+        })
+        .unwrap_or_default();
+
+    if status == 422 {
+        return TagDetectError::Permanent(anyhow::anyhow!(
+            "tag detect 422 {}: {}",
+            detail_code,
+            body_text
+        ));
+    }
+    if status == 503 {
+        return TagDetectError::Transient(anyhow::anyhow!(
+            "tag detect 503 {}: {}",
+            detail_code,
+            body_text
+        ));
+    }
+    // 408 / 413 / 429 are operator-fixable infra issues — defer so the
+    // next pass retries naturally once the proxy is fixed (see
+    // face_client::classify_error_response for the cautionary tale).
+    if matches!(status, 408 | 413 | 429) {
+        return TagDetectError::Transient(anyhow::anyhow!(
+            "tag detect {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ));
+    }
+    if (400..500).contains(&status) {
+        TagDetectError::Permanent(anyhow::anyhow!(
+            "tag detect {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ))
+    } else {
+        TagDetectError::Transient(anyhow::anyhow!(
+            "tag detect {} {}: {}",
+            status,
+            detail_code,
+            body_text
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn is_permanent(e: &TagDetectError) -> bool {
+        matches!(e, TagDetectError::Permanent(_))
+    }
+    fn is_transient(e: &TagDetectError) -> bool {
+        matches!(e, TagDetectError::Transient(_))
+    }
+
+    #[test]
+    fn classify_422_decode_failed_is_permanent() {
+        let e = classify_error_response(422, r#"{"detail":"decode_failed: bad bytes"}"#);
+        assert!(is_permanent(&e));
+        assert!(format!("{e}").contains("decode_failed"));
+    }
+
+    #[test]
+    fn classify_503_cuda_oom_is_transient() {
+        let e = classify_error_response(
+            503,
+            r#"{"detail":{"code":"cuda_oom","error":"out of memory"}}"#,
+        );
+        assert!(is_transient(&e));
+        assert!(format!("{e}").contains("cuda_oom"));
+    }
+
+    #[test]
+    fn classify_5xx_is_transient_other_4xx_is_permanent() {
+        assert!(is_transient(&classify_error_response(500, "")));
+        assert!(is_permanent(&classify_error_response(400, "{}")));
+        assert!(is_permanent(&classify_error_response(404, "{}")));
+    }
+
+    #[test]
+    fn classify_infra_4xx_is_transient() {
+        assert!(is_transient(&classify_error_response(408, "")));
+        assert!(is_transient(&classify_error_response(413, "<html>")));
+        assert!(is_transient(&classify_error_response(429, "{}")));
+    }
+}
@@ -1,748 +0,0 @@
-use crate::ai::insight_chat::ChatStreamEvent;
-use std::collections::HashMap;
-use std::sync::Arc;
-use std::sync::Mutex as StdMutex;
-use std::sync::atomic::{AtomicU32, Ordering};
-use std::time::Instant;
-use tokio::sync::{Mutex, Notify};
-use tokio::task::AbortHandle;
-
-/// Maximum number of events buffered per turn. Agentic turns typically
-/// produce ~120 events; 500 provides 4× headroom. When exceeded, oldest
-/// events are evicted from the front.
-const MAX_BUFFERED_EVENTS: usize = 500;
-
-/// Turn status codes used by `TurnEntry::status`.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum TurnStatus {
-    Running = 0,
-    Done = 1,
-    Error = 2,
-    Cancelled = 3,
-}
-
-impl From<u32> for TurnStatus {
-    fn from(v: u32) -> Self {
-        match v {
-            0 => TurnStatus::Running,
-            1 => TurnStatus::Done,
-            2 => TurnStatus::Error,
-            3 => TurnStatus::Cancelled,
-            _ => TurnStatus::Running,
-        }
-    }
-}
-
-impl TurnStatus {
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            TurnStatus::Running => "running",
-            TurnStatus::Done => "done",
-            TurnStatus::Error => "error",
-            TurnStatus::Cancelled => "cancelled",
-        }
-    }
-}
-
-/// Shared metadata about a turn, read by the SSE replay handler to emit
-/// the initial `turn_info` event and to decide whether to wait for new
-/// events or close immediately.
-#[derive(Debug, Clone)]
-pub struct TurnInfo {
-    pub turn_id: String,
-    pub file_path: String,
-    pub library_id: i32,
-    pub status: TurnStatus,
-    pub total_events_pushed: u32,
-    pub buffered_count: u32,
-}
-
-/// Result of reading events at or after an absolute `skip_before` index.
-#[derive(Debug)]
-pub enum ReplayOutcome {
-    /// New events are available. `next_skip` is the absolute index to pass
-    /// on the next read (i.e. one past the last event returned).
-    Events {
-        events: Vec<ChatStreamEvent>,
-        next_skip: u32,
-    },
-    /// The reader is caught up to the live edge — no events past `skip_before`
-    /// yet. `next_skip` is the current high-water mark.
-    CaughtUp { next_skip: u32 },
-    /// `skip_before` points below the buffer's base index: the requested
-    /// events were evicted. Maps to HTTP 410 Gone.
-    Gone,
-}
-
-/// Per-turn state shared between the agentic loop (writer) and all SSE
-/// replay connections (readers).
-pub struct TurnEntry {
-    pub turn_id: String,
-    pub file_path: String,
-    pub library_id: i32,
-    /// Shared event buffer — multiple SSE connections can read independently.
-    /// Each connection tracks its own `skip_before` offset.
-    events: Mutex<Vec<ChatStreamEvent>>,
-    /// Monotonic counter: total events pushed (may exceed events.len()
-    /// due to eviction). Used for skip_before indexing.
-    total_events_pushed: AtomicU32,
-    /// The event index that this entry started with. Adjusts on eviction
-    /// so that `skip_before` stays absolute across connections.
-    base_index: AtomicU32,
-    pub status: AtomicU32,
-    /// Abort handle for the spawned agentic task, set once after spawn.
-    /// Behind a std `Mutex` because the entry is shared via `Arc` and the
-    /// handle is installed after the entry is already in the registry.
-    abort_handle: StdMutex<Option<AbortHandle>>,
-    pub created_at: Instant,
-    notify: Arc<Notify>,
-}
-
-impl TurnEntry {
-    pub fn new(turn_id: String, file_path: String, library_id: i32) -> Self {
-        Self {
-            turn_id,
-            file_path,
-            library_id,
-            events: Mutex::new(Vec::new()),
-            total_events_pushed: AtomicU32::new(0),
-            base_index: AtomicU32::new(0),
-            status: AtomicU32::new(TurnStatus::Running as u32),
-            abort_handle: StdMutex::new(None),
-            created_at: Instant::now(),
-            notify: Arc::new(Notify::new()),
-        }
-    }
-
-    /// Install the abort handle for the spawned agentic task. Called once,
-    /// right after the task is spawned.
-    pub fn set_abort_handle(&self, handle: AbortHandle) {
-        *self.abort_handle.lock().expect("abort_handle poisoned") = Some(handle);
-    }
-
-    /// Abort the spawned agentic task, if a handle was installed. Returns
-    /// `true` if a task was aborted.
-    pub fn abort(&self) -> bool {
-        if let Some(handle) = self
-            .abort_handle
-            .lock()
-            .expect("abort_handle poisoned")
-            .take()
-        {
-            handle.abort();
-            true
-        } else {
-            false
-        }
-    }
-
-    /// Push an event into the buffer. Evicts oldest events if the buffer
-    /// exceeds `MAX_BUFFERED_EVENTS`. Notifies all waiting SSE connections.
-    pub async fn push_event(&self, event: ChatStreamEvent) {
-        {
-            let mut events = self.events.lock().await;
-
-            // Evict oldest events if we've hit the cap.
-            if events.len() >= MAX_BUFFERED_EVENTS {
-                // Drop the oldest event to make room and advance the base
-                // index so skip_before stays absolute across connections.
-                events.remove(0);
-                self.base_index.fetch_add(1, Ordering::Relaxed);
-            }
-
-            events.push(event);
-            // Increment while holding the buffer lock so the counter stays in
-            // lock-step with the buffer even if multiple writers ever exist.
-            self.total_events_pushed.fetch_add(1, Ordering::Relaxed);
-        }
-
-        self.notify.notify_waiters();
-    }
-
-    /// Get a snapshot of turn metadata for the `turn_info` SSE event.
-    pub async fn info(&self) -> TurnInfo {
-        let events = self.events.lock().await;
-        let buffered = events.len() as u32;
-        let total = self.total_events_pushed.load(Ordering::Relaxed);
-        drop(events);
-
-        TurnInfo {
-            turn_id: self.turn_id.clone(),
-            file_path: self.file_path.clone(),
-            library_id: self.library_id,
-            status: self.status.load(Ordering::Relaxed).into(),
-            total_events_pushed: total,
-            buffered_count: buffered,
-        }
-    }
-
-    /// Set the terminal status and notify all waiters.
-    pub fn set_terminal_status(&self, status: TurnStatus) {
-        self.status.store(status as u32, Ordering::Relaxed);
-        self.notify.notify_waiters();
-    }
-
-    /// Read buffered events at or after absolute index `skip_before` without
-    /// waiting. Distinguishes "evicted" (Gone) from "caught up" (no new
-    /// events yet) — the previous boolean/`Option` API conflated the two.
-    pub async fn replay_from(&self, skip_before: u32) -> ReplayOutcome {
-        let events = self.events.lock().await;
-        let base = self.base_index.load(Ordering::Relaxed);
-
-        // The buffer holds absolute indices [base, base + len). A request
-        // below `base` asked for events that have been evicted.
-        if skip_before < base {
-            return ReplayOutcome::Gone;
-        }
-
-        let offset = (skip_before - base) as usize;
-        let next_skip = base + events.len() as u32;
-        if offset >= events.len() {
-            // Caught up to (or past) the live edge — nothing new yet.
-            return ReplayOutcome::CaughtUp { next_skip };
-        }
-
-        ReplayOutcome::Events {
-            events: events[offset..].to_vec(),
-            next_skip,
-        }
-    }
-
-    /// Wait for the next batch of events past `skip_before`, the turn to
-    /// finish, or eviction. Returns:
-    /// - `Events` when new events are available (drained before any terminal
-    ///   signal so the final `Done`/`Error` is never dropped),
-    /// - `CaughtUp` only when the turn has reached a terminal status and the
-    ///   reader is fully drained (the caller should close the stream),
-    /// - `Gone` when `skip_before` points into evicted territory.
-    pub async fn next_batch(&self, skip_before: u32) -> ReplayOutcome {
-        loop {
-            // Register interest BEFORE inspecting state so a push/terminal that
-            // races between our read and our await can't be lost (Notify's
-            // `notify_waiters` does not store a permit).
-            let notified = self.notify.notified();
-            tokio::pin!(notified);
-            notified.as_mut().enable();
-
-            match self.replay_from(skip_before).await {
-                ReplayOutcome::CaughtUp { next_skip } => {
-                    // No new events. If the turn is finished, every event
-                    // (including the terminal one) has already been drained
-                    // above on a prior call, so signal the caller to close.
-                    if !self.is_running() {
-                        return ReplayOutcome::CaughtUp { next_skip };
-                    }
-                    // Still running — wait for the next push or terminal.
-                }
-                other => return other, // Events or Gone
-            }
-
-            notified.await;
-        }
-    }
-
-    /// Check if this turn is still running.
-    pub fn is_running(&self) -> bool {
-        self.status.load(Ordering::Relaxed) == TurnStatus::Running as u32
-    }
-}
-
-/// In-memory registry of all active chat turns. Injected into `AppState`
-/// and shared across all handlers.
-pub struct TurnRegistry {
-    entries: Mutex<HashMap<String, Arc<TurnEntry>>>,
-    timeout_secs: u64,
-}
-
-impl TurnRegistry {
-    pub fn new(timeout_secs: u64) -> Self {
-        Self {
-            entries: Mutex::new(HashMap::new()),
-            timeout_secs,
-        }
-    }
-
-    /// Returns the cleanup timeout in seconds.
-    pub fn timeout_secs(&self) -> u64 {
-        self.timeout_secs
-    }
-
-    /// Insert a new turn entry. Returns the turn_id.
-    pub async fn insert(&self, entry: Arc<TurnEntry>) -> String {
-        let turn_id = entry.turn_id.clone();
-        let mut entries = self.entries.lock().await;
-        entries.insert(turn_id.clone(), entry);
-        turn_id
-    }
-
-    /// Look up a turn by id. Returns None if not found or expired.
-    pub async fn get(&self, turn_id: &str) -> Option<Arc<TurnEntry>> {
-        let entries = self.entries.lock().await;
-        entries.get(turn_id).cloned()
-    }
-
-    /// Clean up stale entries older than the timeout. Returns the count of
-    /// entries removed.
-    pub async fn cleanup_stale(&self) -> usize {
-        let mut entries = self.entries.lock().await;
-        let _now = Instant::now();
-        let stale: Vec<String> = entries
-            .iter()
-            .filter(|(_, entry)| entry.created_at.elapsed().as_secs() > self.timeout_secs)
-            .map(|(id, _)| id.clone())
-            .collect();
-
-        for id in &stale {
-            entries.remove(id);
-        }
-
-        if !stale.is_empty() {
-            log::info!(
-                "TurnRegistry: cleaned up {} stale entries (timeout={}s)",
-                stale.len(),
-                self.timeout_secs
-            );
-        }
-
-        stale.len()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::ai::insight_chat::ChatStreamEvent;
-    use std::time::Duration;
-
-    /// Unwrap the events from a `ReplayOutcome::Events`, panicking otherwise.
-    fn events_of(outcome: ReplayOutcome) -> Vec<ChatStreamEvent> {
-        match outcome {
-            ReplayOutcome::Events { events, .. } => events,
-            other => panic!("expected Events, got {other:?}"),
-        }
-    }
-
-    // ── TurnStatus ──────────────────────────────────────────────────
-
-    #[test]
-    fn turn_status_from_u32_valid_values() {
-        assert_eq!(TurnStatus::from(0), TurnStatus::Running);
-        assert_eq!(TurnStatus::from(1), TurnStatus::Done);
-        assert_eq!(TurnStatus::from(2), TurnStatus::Error);
-        assert_eq!(TurnStatus::from(3), TurnStatus::Cancelled);
-    }
-
-    #[test]
-    fn turn_status_from_u32_unknown_defaults_to_running() {
-        assert_eq!(TurnStatus::from(4), TurnStatus::Running);
-        assert_eq!(TurnStatus::from(u32::MAX), TurnStatus::Running);
-    }
-
-    #[test]
-    fn turn_status_as_str() {
-        assert_eq!(TurnStatus::Running.as_str(), "running");
-        assert_eq!(TurnStatus::Done.as_str(), "done");
-        assert_eq!(TurnStatus::Error.as_str(), "error");
-        assert_eq!(TurnStatus::Cancelled.as_str(), "cancelled");
-    }
-
-    // ── TurnEntry ───────────────────────────────────────────────────
-
-    #[tokio::test]
-    async fn turn_entry_push_and_replay() {
-        let entry = Arc::new(TurnEntry::new(
-            "t1".to_string(),
-            "/photo.jpg".to_string(),
-            1,
-        ));
-
-        entry
-            .push_event(ChatStreamEvent::TextDelta("hello".to_string()))
-            .await;
-        entry
-            .push_event(ChatStreamEvent::TextDelta(" world".to_string()))
-            .await;
-
-        let events = events_of(entry.replay_from(0).await);
-        assert_eq!(events.len(), 2);
-    }
-
-    #[tokio::test]
-    async fn turn_entry_replay_with_skip() {
-        let entry = Arc::new(TurnEntry::new(
-            "t1".to_string(),
-            "/photo.jpg".to_string(),
-            1,
-        ));
-
-        for i in 0..5 {
-            entry
-                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
-                .await;
-        }
-
-        // skip_before=0 → all 5 events
-        let all = events_of(entry.replay_from(0).await);
-        assert_eq!(all.len(), 5);
-
-        // skip_before=2 → events 2,3,4 (3 events)
-        let skipped = events_of(entry.replay_from(2).await);
-        assert_eq!(skipped.len(), 3);
-
-        // skip_before=5 → caught up to the live edge (not Gone).
-        assert!(matches!(
-            entry.replay_from(5).await,
-            ReplayOutcome::CaughtUp { next_skip: 5 }
-        ));
-    }
-
-    #[tokio::test]
-    async fn turn_entry_replay_empty_by_default() {
-        let entry = Arc::new(TurnEntry::new(
-            "t1".to_string(),
-            "/photo.jpg".to_string(),
-            1,
-        ));
-        // Empty buffer with skip_before=0 → caught up (nothing to replay yet).
-        assert!(matches!(
-            entry.replay_from(0).await,
-            ReplayOutcome::CaughtUp { next_skip: 0 }
-        ));
-    }
-
-    #[tokio::test]
-    async fn turn_entry_is_running_initially() {
-        let entry = TurnEntry::new("t1".to_string(), "/photo.jpg".to_string(), 1);
-        assert!(entry.is_running());
-    }
-
-    #[tokio::test]
-    async fn turn_entry_set_terminal_status() {
-        let entry = Arc::new(TurnEntry::new(
-            "t1".to_string(),
-            "/photo.jpg".to_string(),
-            1,
-        ));
-        assert!(entry.is_running());
-        entry.set_terminal_status(TurnStatus::Done);
-        assert!(!entry.is_running());
-    }
-
-    #[tokio::test]
-    async fn turn_entry_info() {
-        let entry = Arc::new(TurnEntry::new(
-            "t1".to_string(),
-            "/photo.jpg".to_string(),
-            42,
-        ));
-
-        entry
-            .push_event(ChatStreamEvent::TextDelta("x".to_string()))
-            .await;
-        entry.set_terminal_status(TurnStatus::Done);
-
-        let info = entry.info().await;
-        assert_eq!(info.turn_id, "t1");
-        assert_eq!(info.file_path, "/photo.jpg");
-        assert_eq!(info.library_id, 42);
-        assert_eq!(info.status, TurnStatus::Done);
-        assert_eq!(info.total_events_pushed, 1);
-        assert_eq!(info.buffered_count, 1);
-    }
-
-    #[tokio::test]
-    async fn turn_entry_eviction_caps_buffer() {
-        let entry = Arc::new(TurnEntry::new(
-            "t1".to_string(),
-            "/photo.jpg".to_string(),
-            1,
-        ));
-
-        // Push MAX_BUFFERED_EVENTS + 10 events.
-        for i in 0..(MAX_BUFFERED_EVENTS + 10) {
-            entry
-                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
-                .await;
-        }
-
-        // Asking from absolute 0 after eviction is Gone (0-9 were dropped).
-        assert!(matches!(entry.replay_from(0).await, ReplayOutcome::Gone));
-
-        // Reading from the new base (10) returns the full capped buffer.
-        let events = events_of(entry.replay_from(10).await);
-        assert_eq!(events.len(), MAX_BUFFERED_EVENTS);
-
-        // First event should be at index 10 (0-9 were evicted).
-        if let ChatStreamEvent::TextDelta(s) = &events[0] {
-            assert_eq!(s, "e10");
-        } else {
-            panic!("expected TextDelta");
-        }
-
-        // Last event should be at index MAX_BUFFERED_EVENTS + 9.
-        if let ChatStreamEvent::TextDelta(s) = &events[events.len() - 1] {
-            assert_eq!(s, &format!("e{}", MAX_BUFFERED_EVENTS + 9));
-        } else {
-            panic!("expected TextDelta");
-        }
-    }
-
-    #[tokio::test]
-    async fn turn_entry_replay_evicted_index_is_gone() {
-        let entry = Arc::new(TurnEntry::new(
-            "t1".to_string(),
-            "/photo.jpg".to_string(),
-            1,
-        ));
-
-        // Push one past the cap so exactly one event (index 0) is evicted.
-        for i in 0..=MAX_BUFFERED_EVENTS {
-            entry
-                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
-                .await;
-        }
-
-        // Base is now 1; asking from absolute 0 is evicted territory → Gone.
-        assert!(matches!(entry.replay_from(0).await, ReplayOutcome::Gone));
-
-        // skip_before = MAX_BUFFERED_EVENTS → last event only (index valid).
-        let last = events_of(entry.replay_from(MAX_BUFFERED_EVENTS as u32).await);
-        assert_eq!(last.len(), 1);
-
-        // skip_before = MAX_BUFFERED_EVENTS + 1 → caught up to the live edge.
-        assert!(matches!(
-            entry.replay_from((MAX_BUFFERED_EVENTS + 1) as u32).await,
-            ReplayOutcome::CaughtUp { .. }
-        ));
-    }
-
-    // ── TurnRegistry ────────────────────────────────────────────────
-
-    #[tokio::test]
-    async fn turn_registry_insert_and_get() {
-        let registry = TurnRegistry::new(300);
-        let entry = Arc::new(TurnEntry::new(
-            "t1".to_string(),
-            "/photo.jpg".to_string(),
-            1,
-        ));
-        let id = registry.insert(entry).await;
-        assert_eq!(id, "t1");
-
-        let retrieved = registry.get("t1").await;
-        assert!(retrieved.is_some());
-        assert_eq!(retrieved.unwrap().turn_id, "t1");
-    }
-
-    #[tokio::test]
-    async fn turn_registry_get_nonexistent_returns_none() {
-        let registry = TurnRegistry::new(300);
-        assert!(registry.get("nonexistent").await.is_none());
-    }
-
-    #[tokio::test]
-    async fn turn_registry_cleanup_stale_removes_old_entries() {
-        let registry = TurnRegistry::new(0);
-        let mut entry = TurnEntry::new("t1".to_string(), "/photo.jpg".to_string(), 1);
-        entry.created_at = Instant::now() - Duration::from_secs(1);
-        registry.insert(Arc::new(entry)).await;
-
-        let cleaned = registry.cleanup_stale().await;
-        assert_eq!(cleaned, 1);
-        assert!(registry.get("t1").await.is_none());
-    }
-
-    #[tokio::test]
-    async fn turn_registry_cleanup_stale_preserves_recent() {
-        let registry = TurnRegistry::new(3600); // 1 hour
-        let entry = Arc::new(TurnEntry::new(
-            "t1".to_string(),
-            "/photo.jpg".to_string(),
-            1,
-        ));
-        registry.insert(entry).await;
-
-        let cleaned = registry.cleanup_stale().await;
-        assert_eq!(cleaned, 0);
-        assert!(registry.get("t1").await.is_some());
-    }
-
-    #[tokio::test]
-    async fn turn_registry_cleanup_stale_multiple() {
-        let registry = TurnRegistry::new(0);
-
-        for i in 0..5 {
-            let mut entry = TurnEntry::new(format!("t{i}"), "/photo.jpg".to_string(), 1);
-            entry.created_at = Instant::now() - Duration::from_secs(1);
-            registry.insert(Arc::new(entry)).await;
-        }
-
-        let cleaned = registry.cleanup_stale().await;
-        assert_eq!(cleaned, 5);
-    }
-
-    #[tokio::test]
-    async fn turn_registry_timeout_secs() {
-        let registry = TurnRegistry::new(600);
-        assert_eq!(registry.timeout_secs(), 600);
-    }
-
-    // ── next_batch / live replay ────────────────────────────────────
-
-    /// Drain a turn the way the SSE replay handler does: pull batches via
-    /// `next_batch` until the turn is finished and fully drained.
-    async fn drain_to_end(entry: Arc<TurnEntry>) -> Vec<ChatStreamEvent> {
-        let mut out = Vec::new();
-        let mut skip = 0u32;
-        while let ReplayOutcome::Events { events, next_skip } = entry.next_batch(skip).await {
-            out.extend(events);
-            skip = next_skip;
-        }
-        out
-    }
-
-    fn is_terminal(ev: &ChatStreamEvent) -> bool {
-        matches!(ev, ChatStreamEvent::Done { .. } | ChatStreamEvent::Error(_))
-    }
-
-    /// The core guarantee behind the replay rewrite: a reader waiting on
-    /// `next_batch` always receives the terminal event, even though the
-    /// writer flips status to terminal immediately after pushing it.
-    #[tokio::test]
-    async fn next_batch_always_delivers_terminal_event() {
-        for _ in 0..50 {
-            let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
-
-            let writer = entry.clone();
-            let w = tokio::spawn(async move {
-                writer
-                    .push_event(ChatStreamEvent::IterationStart { n: 1, max: 6 })
-                    .await;
-                writer
-                    .push_event(ChatStreamEvent::TextDelta("hi".into()))
-                    .await;
-                // Push terminal then flip status with no await between — the
-                // race that previously dropped the Done on the reader side.
-                writer
-                    .push_event(ChatStreamEvent::Done {
-                        tool_calls_made: 0,
-                        iterations_used: 1,
-                        truncated: false,
-                        prompt_tokens: None,
-                        eval_tokens: None,
-                        num_ctx: None,
-                        amended_insight_id: None,
-                        backend_used: "local".into(),
-                        model_used: "m".into(),
-                        cancelled: false,
-                    })
-                    .await;
-                writer.set_terminal_status(TurnStatus::Done);
-            });
-
-            let events = drain_to_end(entry).await;
-            w.await.unwrap();
-
-            assert!(
-                events.last().is_some_and(is_terminal),
-                "terminal event missing; got {} events",
-                events.len()
-            );
-            assert_eq!(events.len(), 3, "expected IterationStart, TextDelta, Done");
-        }
-    }
-
-    /// A reader that connects before any event is pushed blocks in
-    /// `next_batch` and then receives events as the writer produces them.
-    #[tokio::test]
-    async fn next_batch_waits_for_late_events() {
-        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
-
-        let writer = entry.clone();
-        tokio::spawn(async move {
-            tokio::task::yield_now().await;
-            writer
-                .push_event(ChatStreamEvent::TextDelta("late".into()))
-                .await;
-            writer.set_terminal_status(TurnStatus::Done);
-        });
-
-        // First call blocks until the writer pushes, rather than returning
-        // CaughtUp on the empty buffer of a running turn.
-        match entry.next_batch(0).await {
-            ReplayOutcome::Events { events, next_skip } => {
-                assert_eq!(events.len(), 1);
-                assert_eq!(next_skip, 1);
-            }
-            other => panic!("expected Events, got {other:?}"),
-        }
-    }
-
-    #[tokio::test]
-    async fn next_batch_closes_on_terminal_when_caught_up() {
-        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
-        entry
-            .push_event(ChatStreamEvent::TextDelta("x".into()))
-            .await;
-        entry.set_terminal_status(TurnStatus::Done);
-
-        // Caught up (skip past the one buffered event) on a finished turn →
-        // CaughtUp so the handler closes the stream rather than hanging.
-        assert!(matches!(
-            entry.next_batch(1).await,
-            ReplayOutcome::CaughtUp { .. }
-        ));
-    }
-
-    #[tokio::test]
-    async fn next_batch_reports_gone_for_evicted_index() {
-        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
-        for i in 0..=MAX_BUFFERED_EVENTS {
-            entry
-                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
-                .await;
-        }
-        // Index 0 was evicted (base advanced to 1).
-        assert!(matches!(entry.next_batch(0).await, ReplayOutcome::Gone));
-    }
-
-    // ── abort handle (#1 cancellation) ──────────────────────────────
-
-    #[tokio::test]
-    async fn abort_handle_aborts_task_once() {
-        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
-
-        // No handle installed yet → abort is a no-op.
-        assert!(!entry.abort());
-
-        let handle = tokio::spawn(async {
-            // Long-lived task that only ends via abort.
-            futures::future::pending::<()>().await;
-        });
-        entry.set_abort_handle(handle.abort_handle());
-
-        assert!(entry.abort(), "first abort should fire");
-        assert!(!entry.abort(), "handle is taken; second abort is a no-op");
-
-        // The aborted task resolves to a cancellation JoinError.
-        let join = handle.await;
-        assert!(join.unwrap_err().is_cancelled());
-    }
-
-    #[tokio::test]
-    async fn base_index_tracks_eviction() {
-        let entry = Arc::new(TurnEntry::new("t".into(), "/p.jpg".into(), 1));
-        for i in 0..(MAX_BUFFERED_EVENTS + 5) {
-            entry
-                .push_event(ChatStreamEvent::TextDelta(format!("e{i}")))
-                .await;
-        }
-        let info = entry.info().await;
-        // 5 events evicted; total keeps climbing, buffer stays capped.
-        assert_eq!(info.total_events_pushed, (MAX_BUFFERED_EVENTS + 5) as u32);
-        assert_eq!(info.buffered_count, MAX_BUFFERED_EVENTS as u32);
-        // First live index is 5: reading from there yields the full buffer.
-        let from_base = events_of(entry.replay_from(5).await);
-        assert_eq!(from_base.len(), MAX_BUFFERED_EVENTS);
-    }
-}
@@ -220,76 +220,6 @@ pub fn backfill_missing_date_taken(
 /// unscanned image_exif rows directly via the FaceDao anti-join and
 /// hands them to the existing detection pass. Runs on every tick (not
 /// just full scans) so the backlog moves at quick-scan cadence.
-/// Per-tick CLIP encoding drain. Mirrors `process_face_backlog`: pull
-/// up to `CLIP_BACKLOG_MAX_PER_TICK` candidates with a known
-/// `content_hash` but no `clip_embedding`, hand them to
-/// `clip_watch::run_clip_encoding_pass` for parallel fan-out, and let
-/// that module write the result back via `backfill_clip_embedding`.
-///
-/// Idempotent — a row stays in the candidate set until its embedding
-/// lands, so a transient failure (Apollo unreachable, CUDA OOM) just
-/// defers to the next tick. Permanent failures (un-decodable bytes)
-/// retry every tick at this point; future Branch may add a status
-/// column like face_detections has.
-pub fn process_clip_backlog(
-    context: &opentelemetry::Context,
-    library: &libraries::Library,
-    clip_client: &crate::ai::clip_client::ClipClient,
-    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
-    excluded_dirs: &[String],
-) {
-    if !clip_client.is_enabled() {
-        return;
-    }
-    let cap: i64 = dotenv::var("CLIP_BACKLOG_MAX_PER_TICK")
-        .ok()
-        .and_then(|s| s.parse().ok())
-        .filter(|n: &i64| *n > 0)
-        .unwrap_or(32);
-
-    let rows: Vec<(String, String)> = {
-        let mut dao = exif_dao.lock().expect("exif dao");
-        match dao.list_clip_unencoded_candidates(context, library.id, cap) {
-            Ok(r) => r,
-            Err(e) => {
-                warn!(
-                    "clip_watch: list_clip_unencoded_candidates failed for library '{}': {:?}",
-                    library.name, e
-                );
-                return;
-            }
-        }
-    };
-    if rows.is_empty() {
-        return;
-    }
-
-    info!(
-        "clip_watch: backlog drain — encoding {} candidate(s) for library '{}' (cap={})",
-        rows.len(),
-        library.name,
-        cap
-    );
-
-    let candidates: Vec<crate::clip_watch::ClipCandidate> = rows
-        .into_iter()
-        .map(
-            |(rel_path, content_hash)| crate::clip_watch::ClipCandidate {
-                rel_path,
-                content_hash,
-            },
-        )
-        .collect();
-
-    crate::clip_watch::run_clip_encoding_pass(
-        library,
-        excluded_dirs,
-        clip_client,
-        Arc::clone(exif_dao),
-        candidates,
-    );
-}
-
 pub fn process_face_backlog(
    context: &opentelemetry::Context,
    library: &libraries::Library,
@@ -529,21 +459,16 @@ mod tests {
        opentelemetry::Context::new()
    }

-    /// Everything `setup` hands back to a test: tempdir, library, shared
-    /// connection, and the two DAOs. Aliased to keep clippy's
-    /// type-complexity lint satisfied.
-    type SetupFixture = (
+    /// Build a tempdir-backed library + DAOs sharing a single in-memory
+    /// SQLite connection (so cross-table joins like
+    /// `list_unscanned_candidates` see consistent state).
+    fn setup() -> (
        TempDir,
        Library,
        Arc<Mutex<diesel::SqliteConnection>>,
        Arc<Mutex<Box<dyn ExifDao>>>,
        Arc<Mutex<Box<dyn FaceDao>>>,
-    );
-
-    /// Build a tempdir-backed library + DAOs sharing a single in-memory
-    /// SQLite connection (so cross-table joins like
-    /// `list_unscanned_candidates` see consistent state).
-    fn setup() -> SetupFixture {
+    ) {
        let tmp = TempDir::new().expect("tempdir");
        let mut conn = in_memory_db_connection();
        // Migration seeds library id=1 with a placeholder root; rewrite it
@@ -1,7 +1,7 @@
 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Parser;
-use image_api::ai::LocalLlm;
+use image_api::ai::ollama::OllamaClient;
 use image_api::bin_progress;
 use image_api::database::calendar_dao::{InsertCalendarEvent, SqliteCalendarEventDao};
 use image_api::parsers::ical_parser::parse_ics_file;
@@ -44,10 +44,22 @@ async fn main() -> Result<()> {

    let context = opentelemetry::Context::current();

-    // LocalLlm dispatches per LLM_BACKEND, so embeddings written here land
-    // in the same vector space the query side searches.
-    let llm = if args.generate_embeddings {
-        Some(LocalLlm::from_env())
+    let ollama = if args.generate_embeddings {
+        let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
+            .or_else(|_| dotenv::var("OLLAMA_URL"))
+            .unwrap_or_else(|_| "http://localhost:11434".to_string());
+        let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
+        let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
+            .or_else(|_| dotenv::var("OLLAMA_MODEL"))
+            .unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
+        let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
+
+        Some(OllamaClient::new(
+            primary_url,
+            fallback_url,
+            primary_model,
+            fallback_model,
+        ))
    } else {
        None
    };
@@ -78,7 +90,7 @@ async fn main() -> Result<()> {
        }

        // Generate embedding if requested (blocking call)
-        let embedding = if let Some(ref llm) = llm {
+        let embedding = if let Some(ref ollama_client) = ollama {
            let text = format!(
                "{} {} {}",
                event.summary,
@@ -88,7 +100,7 @@ async fn main() -> Result<()> {

            match tokio::task::block_in_place(|| {
                tokio::runtime::Handle::current()
-                    .block_on(async { llm.embed_document(&text).await })
+                    .block_on(async { ollama_client.generate_embedding(&text).await })
            }) {
                Ok(emb) => Some(emb),
                Err(e) => {
@@ -1,7 +1,7 @@
 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Parser;
-use image_api::ai::LocalLlm;
+use image_api::ai::ollama::OllamaClient;
 use image_api::bin_progress;
 use image_api::database::search_dao::{InsertSearchRecord, SqliteSearchHistoryDao};
 use image_api::parsers::search_html_parser::parse_search_html;
@@ -38,9 +38,16 @@ async fn main() -> Result<()> {

    info!("Found {} search records", searches.len());

-    // LocalLlm dispatches per LLM_BACKEND, so embeddings written here land
-    // in the same vector space the query side searches.
-    let llm = LocalLlm::from_env();
+    let primary_url = dotenv::var("OLLAMA_PRIMARY_URL")
+        .or_else(|_| dotenv::var("OLLAMA_URL"))
+        .unwrap_or_else(|_| "http://localhost:11434".to_string());
+    let fallback_url = dotenv::var("OLLAMA_FALLBACK_URL").ok();
+    let primary_model = dotenv::var("OLLAMA_PRIMARY_MODEL")
+        .or_else(|_| dotenv::var("OLLAMA_MODEL"))
+        .unwrap_or_else(|_| "nomic-embed-text:v1.5".to_string());
+    let fallback_model = dotenv::var("OLLAMA_FALLBACK_MODEL").ok();
+
+    let ollama = OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model);
    let context = opentelemetry::Context::current();

    let mut inserted_count = 0usize;
@@ -60,11 +67,12 @@ async fn main() -> Result<()> {

        let pb_for_warn = pb.clone();
        let embeddings_result = tokio::task::spawn({
-            let llm = llm.clone();
+            let ollama_client = ollama.clone();
            async move {
+                // Generate embeddings in parallel for the batch
                let mut embeddings = Vec::new();
                for query in &queries {
-                    match llm.embed_document(query).await {
+                    match ollama_client.generate_embedding(query).await {
                        Ok(emb) => embeddings.push(Some(emb)),
                        Err(e) => {
                            pb_for_warn.println(format!("embedding failed for '{}': {}", query, e));
@@ -195,7 +195,6 @@ async fn main() -> anyhow::Result<()> {
    let generator = InsightGenerator::new(
        ollama,
        None,
-        None,
        sms_client,
        apollo_client,
        insight_dao.clone(),
@@ -336,7 +335,6 @@ async fn main() -> anyhow::Result<()> {
                args.top_p,
                args.top_k,
                args.min_p,
-                None, // enable_thinking: leave model/template default
                args.max_iterations,
                None,
                Vec::new(),
@@ -0,0 +1,250 @@
+//! Probe binary for RAM++ auto-tagging.
+//!
+//! No DB writes. Walks a library's `image_exif` rows, sends a sample
+//! through Apollo's `/api/internal/tags/auto`, and prints `(path, tags)`
+//! to stdout so the operator can eyeball whether the model's vocabulary
+//! and threshold defaults are appropriate for this library before
+//! committing to the persistence phase (new table, per-tick drain, UI).
+//!
+//! Usage:
+//!     cargo run --release --bin probe_auto_tags -- \
+//!         --library 1 --limit 50 --threshold 0.7
+//!
+//! Env: standard ImageApi `.env`. Requires either
+//! `APOLLO_TAG_API_BASE_URL` or `APOLLO_API_BASE_URL` to be set
+//! (otherwise the client is disabled and the probe bails).
+
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, Mutex};
+use std::time::Instant;
+
+use clap::Parser;
+use log::{info, warn};
+
+use image_api::ai::tag_client::{TagClient, TagDetectError, TagMeta};
+use image_api::database::{ExifDao, SqliteExifDao, connect};
+use image_api::exif;
+use image_api::file_types;
+use image_api::libraries::{self, Library};
+
+#[derive(Parser, Debug)]
+#[command(name = "probe_auto_tags")]
+#[command(about = "Print RAM++ auto-tags for a sample of image_exif rows")]
+struct Args {
+    /// Library id to sample from.
+    #[arg(long)]
+    library: i32,
+
+    /// Max files to probe. The binary scans more rows internally because
+    /// non-image rows (videos, junk) are skipped client-side.
+    #[arg(long, default_value_t = 25)]
+    limit: usize,
+
+    /// Per-call threshold sent to Apollo. Overrides the engine default.
+    /// Lower = more tags per photo, more noise. 0.5–0.75 is the useful
+    /// sweep range for ram_plus_swin_large_14m.
+    #[arg(long, default_value_t = 0.65)]
+    threshold: f32,
+
+    /// Offset into the library's rel_path listing (sorted by id ASC).
+    /// Bump on re-runs to sample a different slice.
+    #[arg(long, default_value_t = 0)]
+    offset: i64,
+
+    /// How many DB rows to scan before giving up on hitting the limit.
+    /// Useful when a library is mostly videos.
+    #[arg(long, default_value_t = 2000)]
+    max_scan: i64,
+}
+
+/// Mirror of `face_watch::read_image_bytes_for_detect` — it's pub(crate)
+/// so we can't import it across the bin boundary. The probe is throwaway
+/// scope; inlining is cleaner than changing the visibility.
+fn read_image_bytes(path: &Path) -> std::io::Result<Vec<u8>> {
+    if file_types::needs_ffmpeg_thumbnail(path)
+        && let Some(preview) = exif::extract_embedded_jpeg_preview(path)
+    {
+        return Ok(preview);
+    }
+    std::fs::read(path)
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    env_logger::init();
+    dotenv::dotenv().ok();
+
+    let args = Args::parse();
+
+    let client = TagClient::from_env();
+    if !client.is_enabled() {
+        anyhow::bail!(
+            "TagClient disabled: set APOLLO_TAG_API_BASE_URL or APOLLO_API_BASE_URL in .env"
+        );
+    }
+
+    // Quick health probe so we fail fast on a misconfig before grinding
+    // through a thousand rows.
+    match client.health().await {
+        Ok(h) => info!(
+            "tag engine: loaded={} device={} model={} threshold_default={}",
+            h.loaded, h.device, h.model_version, h.threshold
+        ),
+        Err(e) => warn!("health probe failed (continuing): {e}"),
+    }
+
+    let mut seed_conn = connect();
+    if let Some(base) = dotenv::var("BASE_PATH").ok().as_deref() {
+        libraries::seed_or_patch_from_env(&mut seed_conn, base);
+    }
+    let libs = libraries::load_all(&mut seed_conn);
+    drop(seed_conn);
+    let lib: Library = libs
+        .into_iter()
+        .find(|l| l.id == args.library)
+        .ok_or_else(|| anyhow::anyhow!("library id {} not found", args.library))?;
+    info!("probing library #{} ({}) at {}", lib.id, lib.name, lib.root_path);
+
+    let dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
+    let ctx = opentelemetry::Context::new();
+
+    // Paginate through (id, rel_path) for this library, filter to images
+    // on disk, take `limit`. Page size is tuned so we don't slam the DB
+    // when a library is video-heavy.
+    const PAGE: i64 = 500;
+    let mut offset = args.offset;
+    let mut scanned: i64 = 0;
+    let mut probed = 0usize;
+    let mut ok_count = 0usize;
+    let mut empty_count = 0usize;
+    let mut perm_fail = 0usize;
+    let mut transient_fail = 0usize;
+    let started = Instant::now();
+    let root = PathBuf::from(&lib.root_path);
+
+    'outer: loop {
+        if scanned >= args.max_scan {
+            warn!(
+                "scan cap ({}) reached before hitting limit ({}); bump --max-scan to scan deeper",
+                args.max_scan, args.limit
+            );
+            break;
+        }
+        let rows = {
+            let mut guard = dao.lock().expect("dao lock");
+            guard
+                .list_rel_paths_for_library_page(&ctx, lib.id, PAGE, offset)
+                .map_err(|e| anyhow::anyhow!("list rel_paths: {:?}", e))?
+        };
+        if rows.is_empty() {
+            info!("no more rows after offset {}", offset);
+            break;
+        }
+        offset += rows.len() as i64;
+        scanned += rows.len() as i64;
+
+        for (_id, rel_path) in rows {
+            if probed >= args.limit {
+                break 'outer;
+            }
+            let abs = root.join(&rel_path);
+            // Skip non-images and videos at the path level — same logic
+            // the face backlog drain uses, just inlined.
+            if !file_types::is_image_file(&abs) {
+                continue;
+            }
+            if !abs.exists() {
+                continue;
+            }
+            let bytes = match read_image_bytes(&abs) {
+                Ok(b) => b,
+                Err(e) => {
+                    warn!("read {rel_path}: {e}");
+                    continue;
+                }
+            };
+            // The probe doesn't need a real content_hash — Apollo only
+            // logs it. Pass an empty marker so we don't trip on no-hash
+            // image_exif rows.
+            let meta = TagMeta {
+                content_hash: String::new(),
+                library_id: lib.id,
+                rel_path: rel_path.clone(),
+                threshold: Some(args.threshold),
+            };
+
+            let call_start = Instant::now();
+            match client.auto_tag(bytes, meta).await {
+                Ok(resp) => {
+                    probed += 1;
+                    if resp.tags.is_empty() {
+                        empty_count += 1;
+                        println!(
+                            "[{:>3}] (no tags) {}ms  {}",
+                            probed, resp.duration_ms, rel_path
+                        );
+                    } else {
+                        ok_count += 1;
+                        let preview = resp
+                            .tags
+                            .iter()
+                            .map(|t| format!("{}({:.2})", t.name, t.confidence))
+                            .collect::<Vec<_>>()
+                            .join(", ");
+                        println!(
+                            "[{:>3}] {} tags {}ms  {}\n      {}",
+                            probed,
+                            resp.tags.len(),
+                            resp.duration_ms,
+                            rel_path,
+                            preview
+                        );
+                    }
+                }
+                Err(TagDetectError::Permanent(e)) => {
+                    probed += 1;
+                    perm_fail += 1;
+                    println!(
+                        "[{:>3}] PERMANENT FAIL ({:>4}ms) {}\n      {}",
+                        probed,
+                        call_start.elapsed().as_millis(),
+                        rel_path,
+                        e
+                    );
+                }
+                Err(TagDetectError::Transient(e)) => {
+                    probed += 1;
+                    transient_fail += 1;
+                    println!(
+                        "[{:>3}] TRANSIENT FAIL ({:>4}ms) {}\n      {}",
+                        probed,
+                        call_start.elapsed().as_millis(),
+                        rel_path,
+                        e
+                    );
+                }
+                Err(TagDetectError::Disabled) => {
+                    anyhow::bail!("tag client became disabled mid-run; impossible");
+                }
+            }
+        }
+    }
+
+    let elapsed = started.elapsed();
+    println!();
+    println!("── summary ───────────────────────────────────────");
+    println!("scanned rows         : {scanned}");
+    println!("probed files         : {probed}");
+    println!("  with tags          : {ok_count}");
+    println!("  empty (no tags)    : {empty_count}");
+    println!("  permanent failures : {perm_fail}");
+    println!("  transient failures : {transient_fail}");
+    println!("elapsed              : {:.1}s", elapsed.as_secs_f32());
+    if probed > 0 {
+        println!(
+            "throughput           : {:.2} photos/s",
+            probed as f32 / elapsed.as_secs_f32().max(0.001)
+        );
+    }
+    Ok(())
+}
@@ -1,273 +0,0 @@
-//! Probe binary for CLIP semantic search.
-//!
-//! No DB writes. Walks a library's `image_exif` rows, encodes a sample
-//! via Apollo's `/encode_image`, encodes the user's --query via
-//! `/encode_text`, and prints the top-K most similar photos by cosine
-//! similarity so the operator can eyeball quality before committing to
-//! the persistence phase (column populated by backlog drain, search
-//! endpoint, UI).
-//!
-//! Usage:
-//!     cargo run --release --bin probe_clip_search -- \
-//!         --library 1 --limit 200 --query "a beach at sunset" --top 10
-//!
-//! Env: standard ImageApi `.env`. Requires either
-//! `APOLLO_CLIP_API_BASE_URL` or `APOLLO_API_BASE_URL` to be set.
-
-use std::path::{Path, PathBuf};
-use std::sync::{Arc, Mutex};
-use std::time::Instant;
-
-use clap::Parser;
-use log::{info, warn};
-
-use image_api::ai::clip_client::{ClipClient, ClipError, EncodeImageMeta};
-use image_api::database::{ExifDao, SqliteExifDao, connect};
-use image_api::exif;
-use image_api::file_types;
-use image_api::libraries::{self, Library};
-
-#[derive(Parser, Debug)]
-#[command(name = "probe_clip_search")]
-#[command(about = "Top-K CLIP semantic search over a sample of image_exif rows")]
-struct Args {
-    /// Library id to sample from.
-    #[arg(long)]
-    library: i32,
-
-    /// Max files to encode. CPU inference is slow (~1-3 s per photo at
-    /// ViT-L/14); start small and grow once GPU is sorted.
-    #[arg(long, default_value_t = 50)]
-    limit: usize,
-
-    /// Natural-language query. Empty triggers an error from Apollo.
-    #[arg(long)]
-    query: String,
-
-    /// How many top results to print.
-    #[arg(long, default_value_t = 10)]
-    top: usize,
-
-    /// Offset into the library's rel_path listing.
-    #[arg(long, default_value_t = 0)]
-    offset: i64,
-
-    /// How many DB rows to scan before giving up on hitting the limit.
-    #[arg(long, default_value_t = 5000)]
-    max_scan: i64,
-}
-
-/// Same as `face_watch::read_image_bytes_for_detect` (which is pub(crate)).
-/// Inlined for the throwaway probe.
-fn read_image_bytes(path: &Path) -> std::io::Result<Vec<u8>> {
-    if file_types::needs_ffmpeg_thumbnail(path)
-        && let Some(preview) = exif::extract_embedded_jpeg_preview(path)
-    {
-        return Ok(preview);
-    }
-    std::fs::read(path)
-}
-
-/// Decode a base64'd LE float32 vector to a `Vec<f32>`.
-fn decode_f32_vec(b64: &str) -> anyhow::Result<Vec<f32>> {
-    use base64::Engine;
-    let bytes = base64::engine::general_purpose::STANDARD.decode(b64.as_bytes())?;
-    if bytes.len() % 4 != 0 {
-        anyhow::bail!("embedding byte length {} not divisible by 4", bytes.len());
-    }
-    let mut out = Vec::with_capacity(bytes.len() / 4);
-    for chunk in bytes.chunks_exact(4) {
-        out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
-    }
-    Ok(out)
-}
-
-/// Plain dot product. Apollo L2-normalizes both sides, so this is cosine sim.
-fn dot(a: &[f32], b: &[f32]) -> f32 {
-    a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
-}
-
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    env_logger::init();
-    dotenv::dotenv().ok();
-
-    let args = Args::parse();
-    if args.query.trim().is_empty() {
-        anyhow::bail!("--query must not be empty");
-    }
-
-    let client = ClipClient::from_env();
-    if !client.is_enabled() {
-        anyhow::bail!(
-            "ClipClient disabled: set APOLLO_CLIP_API_BASE_URL or APOLLO_API_BASE_URL in .env"
-        );
-    }
-
-    match client.health().await {
-        Ok(h) => info!(
-            "clip engine: loaded={} device={} model={} dim={}",
-            h.loaded, h.device, h.model_version, h.embedding_dim
-        ),
-        Err(e) => warn!("health probe failed (continuing): {e}"),
-    }
-
-    let mut seed_conn = connect();
-    if let Some(base) = dotenv::var("BASE_PATH").ok().as_deref() {
-        libraries::seed_or_patch_from_env(&mut seed_conn, base);
-    }
-    let libs = libraries::load_all(&mut seed_conn);
-    drop(seed_conn);
-    let lib: Library = libs
-        .into_iter()
-        .find(|l| l.id == args.library)
-        .ok_or_else(|| anyhow::anyhow!("library id {} not found", args.library))?;
-    info!(
-        "probing library #{} ({}) at {}",
-        lib.id, lib.name, lib.root_path
-    );
-
-    let dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(SqliteExifDao::new())));
-    let ctx = opentelemetry::Context::new();
-
-    // Encode the query up-front so the long image-encode loop doesn't
-    // race a slow query encode. Fails fast on a misspelled query.
-    let query_resp = client
-        .encode_text(&args.query)
-        .await
-        .map_err(|e| anyhow::anyhow!("encode_text: {e}"))?;
-    let query_vec = decode_f32_vec(&query_resp.embedding)?;
-    info!(
-        "query encoded ({}d, {}ms): {:?}",
-        query_resp.embedding_dim, query_resp.duration_ms, args.query
-    );
-
-    // Page through (id, rel_path), filter to images on disk, encode up
-    // to `limit`. Each encoded photo gets scored against the query and
-    // kept in a top-K heap.
-    const PAGE: i64 = 500;
-    let mut offset = args.offset;
-    let mut scanned: i64 = 0;
-    let mut encoded = 0usize;
-    let mut perm_fail = 0usize;
-    let mut transient_fail = 0usize;
-    let root = PathBuf::from(&lib.root_path);
-    let started = Instant::now();
-    // (similarity, rel_path) — we keep all scored results and sort at
-    // the end. With limit≤few-hundred this is trivial.
-    let mut scores: Vec<(f32, String)> = Vec::with_capacity(args.limit);
-
-    'outer: loop {
-        if scanned >= args.max_scan {
-            warn!(
-                "scan cap ({}) reached before hitting limit ({}); bump --max-scan to scan deeper",
-                args.max_scan, args.limit
-            );
-            break;
-        }
-        let rows = {
-            let mut guard = dao.lock().expect("dao lock");
-            guard
-                .list_rel_paths_for_library_page(&ctx, lib.id, PAGE, offset)
-                .map_err(|e| anyhow::anyhow!("list rel_paths: {:?}", e))?
-        };
-        if rows.is_empty() {
-            info!("no more rows after offset {}", offset);
-            break;
-        }
-        offset += rows.len() as i64;
-        scanned += rows.len() as i64;
-
-        for (_id, rel_path) in rows {
-            if encoded >= args.limit {
-                break 'outer;
-            }
-            let abs = root.join(&rel_path);
-            if !file_types::is_image_file(&abs) || !abs.exists() {
-                continue;
-            }
-            let bytes = match read_image_bytes(&abs) {
-                Ok(b) => b,
-                Err(e) => {
-                    warn!("read {rel_path}: {e}");
-                    continue;
-                }
-            };
-            let meta = EncodeImageMeta {
-                content_hash: String::new(),
-                library_id: lib.id,
-                rel_path: rel_path.clone(),
-            };
-            let call_start = Instant::now();
-            match client.encode_image(bytes, meta).await {
-                Ok(resp) => {
-                    encoded += 1;
-                    let vec = match decode_f32_vec(&resp.embedding) {
-                        Ok(v) => v,
-                        Err(e) => {
-                            warn!("decode {rel_path}: {e}");
-                            continue;
-                        }
-                    };
-                    if vec.len() != query_vec.len() {
-                        warn!(
-                            "dim mismatch for {rel_path}: image={} query={}",
-                            vec.len(),
-                            query_vec.len()
-                        );
-                        continue;
-                    }
-                    let sim = dot(&vec, &query_vec);
-                    scores.push((sim, rel_path.clone()));
-                    if encoded.is_multiple_of(10) {
-                        info!(
-                            "progress: {} encoded, {:.1}s elapsed",
-                            encoded,
-                            started.elapsed().as_secs_f32()
-                        );
-                    }
-                    let _ = call_start;
-                }
-                Err(ClipError::Permanent(e)) => {
-                    perm_fail += 1;
-                    warn!("permanent encode failure for {rel_path}: {e}");
-                }
-                Err(ClipError::Transient(e)) => {
-                    transient_fail += 1;
-                    warn!("transient encode failure for {rel_path}: {e}");
-                }
-                Err(ClipError::Disabled) => {
-                    anyhow::bail!("clip client became disabled mid-run; impossible");
-                }
-            }
-        }
-    }
-
-    scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
-    let elapsed = started.elapsed();
-    println!();
-    println!(
-        "── top {} for query: {:?} ──",
-        args.top.min(scores.len()),
-        args.query
-    );
-    for (i, (sim, path)) in scores.iter().take(args.top).enumerate() {
-        println!("[{:>2}] sim={:.3}  {}", i + 1, sim, path);
-    }
-    println!();
-    println!("── summary ─────────────────────────────────────");
-    println!("query                : {:?}", args.query);
-    println!("scanned rows         : {scanned}");
-    println!("encoded photos       : {encoded}");
-    println!("permanent failures   : {perm_fail}");
-    println!("transient failures   : {transient_fail}");
-    println!("elapsed              : {:.1}s", elapsed.as_secs_f32());
-    if encoded > 0 {
-        println!(
-            "throughput           : {:.2} photos/s ({:.0}ms/photo avg)",
-            encoded as f32 / elapsed.as_secs_f32().max(0.001),
-            elapsed.as_millis() as f32 / encoded as f32
-        );
-    }
-    Ok(())
-}
@@ -1,465 +0,0 @@
-//! Re-embed stored corpora through `LocalLlm`, i.e. the same
-//! `LLM_BACKEND` dispatch the query side uses. The original import /
-//! backfill tools always embedded via Ollama, so a deploy running
-//! `LLM_BACKEND=llamacpp` queries vector spaces the corpora may not live
-//! in. Three tables share the problem and are all covered here:
-//!
-//! - `daily_conversation_summaries` — re-embeds
-//!   `strip_summary_boilerplate(summary)` (what the original job fed the
-//!   embedder); also rewrites `model_version`.
-//! - `calendar_events` — re-embeds "summary description location" exactly
-//!   as `import_calendar` does; rows without an embedding are skipped (the
-//!   import only embeds under `--generate-embeddings`).
-//! - `search_history` — re-embeds the raw query text.
-//! - `entities` (knowledge graph) — re-embeds "name description" exactly as
-//!   `tool_store_entity` does; embedding-less rows are skipped (embedding
-//!   is best-effort at store time).
-//!
-//! Source text is untouched — only vectors are rewritten. The old↔new
-//! cosine report doubles as a diagnostic: ~1.0 means both backends already
-//! shared a space (re-embedding was a no-op); low values confirm the
-//! mismatch this tool exists to fix.
-
-use anyhow::{Context, Result};
-use clap::Parser;
-use diesel::prelude::*;
-use diesel::sql_query;
-use diesel::sqlite::SqliteConnection;
-use image_api::ai::{LocalLlm, strip_summary_boilerplate};
-use image_api::bin_progress;
-use std::env;
-
-#[derive(Parser, Debug)]
-#[command(author, version, about = "Re-embed stored corpora via the configured LLM_BACKEND", long_about = None)]
-struct Args {
-    /// Comma-separated tables to process: summaries, calendar, search, entities
-    #[arg(long, default_value = "summaries,calendar,search,entities")]
-    tables: String,
-
-    /// Only process the first N rows per table (smoke test)
-    #[arg(long)]
-    limit: Option<usize>,
-
-    /// Compute embeddings and report old↔new similarity without writing
-    #[arg(long, default_value_t = false)]
-    dry_run: bool,
-}
-
-#[derive(QueryableByName)]
-struct SummaryRow {
-    #[diesel(sql_type = diesel::sql_types::Integer)]
-    id: i32,
-    #[diesel(sql_type = diesel::sql_types::Text)]
-    summary: String,
-    #[diesel(sql_type = diesel::sql_types::Binary)]
-    embedding: Vec<u8>,
-    #[diesel(sql_type = diesel::sql_types::Text)]
-    model_version: String,
-}
-
-#[derive(QueryableByName)]
-struct CalendarRow {
-    #[diesel(sql_type = diesel::sql_types::Integer)]
-    id: i32,
-    #[diesel(sql_type = diesel::sql_types::Text)]
-    summary: String,
-    #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
-    description: Option<String>,
-    #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
-    location: Option<String>,
-    #[diesel(sql_type = diesel::sql_types::Binary)]
-    embedding: Vec<u8>,
-}
-
-#[derive(QueryableByName)]
-struct SearchRow {
-    #[diesel(sql_type = diesel::sql_types::BigInt)]
-    id: i64,
-    #[diesel(sql_type = diesel::sql_types::Text)]
-    query: String,
-    #[diesel(sql_type = diesel::sql_types::Binary)]
-    embedding: Vec<u8>,
-}
-
-#[derive(QueryableByName)]
-struct EntityRow {
-    #[diesel(sql_type = diesel::sql_types::Integer)]
-    id: i32,
-    #[diesel(sql_type = diesel::sql_types::Text)]
-    name: String,
-    #[diesel(sql_type = diesel::sql_types::Text)]
-    description: String,
-    #[diesel(sql_type = diesel::sql_types::Binary)]
-    embedding: Vec<u8>,
-}
-
-/// One unit of re-embed work, normalized across tables.
-struct WorkItem {
-    /// Row key, as i64 so both i32 ids and rowids fit.
-    id: i64,
-    /// Text fed to the embedder — must match what the original writer used.
-    text: String,
-    /// Existing vector bytes, for the old↔new similarity report.
-    old_embedding: Vec<u8>,
-}
-
-fn deserialize_vector(bytes: &[u8]) -> Option<Vec<f32>> {
-    if !bytes.len().is_multiple_of(4) {
-        return None;
-    }
-    Some(
-        bytes
-            .chunks_exact(4)
-            .map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
-            .collect(),
-    )
-}
-
-fn serialize_vector(vec: &[f32]) -> Vec<u8> {
-    vec.iter().flat_map(|f| f.to_le_bytes()).collect()
-}
-
-fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
-    if a.len() != b.len() {
-        return 0.0;
-    }
-    let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
-    let mag_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
-    let mag_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
-    if mag_a == 0.0 || mag_b == 0.0 {
-        return 0.0;
-    }
-    dot / (mag_a * mag_b)
-}
-
-/// Embed `text`, halving it on "input too large" errors until it fits the
-/// server's physical batch (`--ubatch-size`). Mirrors the silent truncation
-/// Ollama applied when these corpora were first embedded — llama-server
-/// returns a 500 instead — except here it's surfaced via the returned flag.
-/// Returns `(embedding, truncated)`.
-async fn embed_with_truncation(llm: &LocalLlm, text: &str) -> Result<(Vec<f32>, bool)> {
-    let mut text = text.to_string();
-    let mut truncated = false;
-    loop {
-        match llm.embed_document(&text).await {
-            Ok(emb) => return Ok((emb, truncated)),
-            Err(e)
-                if e.to_string().contains("too large to process") && text.chars().count() > 64 =>
-            {
-                let keep = text.chars().count() / 2;
-                text = text.chars().take(keep).collect();
-                truncated = true;
-            }
-            Err(e) => return Err(e),
-        }
-    }
-}
-
-/// Re-embed `items`, writing each new vector via `update`. Returns the
-/// old↔new cosines for the similarity report.
-async fn reembed_table(
-    conn: &mut SqliteConnection,
-    llm: &LocalLlm,
-    label: &str,
-    items: Vec<WorkItem>,
-    dry_run: bool,
-    update: impl Fn(&mut SqliteConnection, i64, Vec<u8>) -> Result<()>,
-) -> Result<Vec<f32>> {
-    println!("\n[{}] re-embedding {} rows...", label, items.len());
-    let pb = bin_progress::determinate(items.len() as u64, format!("re-embedding {}", label));
-
-    let mut sims: Vec<f32> = Vec::with_capacity(items.len());
-    let mut updated = 0usize;
-    let mut failed = 0usize;
-    let mut truncated_count = 0usize;
-
-    for item in &items {
-        let new_emb = match embed_with_truncation(llm, &item.text).await {
-            Ok((e, truncated)) => {
-                if truncated {
-                    truncated_count += 1;
-                    pb.println(format!(
-                        "⚠ {} id={}: input exceeded the embed server's batch size, \
-                         truncated before embedding",
-                        label, item.id
-                    ));
-                }
-                e
-            }
-            Err(e) => {
-                pb.inc(1);
-                failed += 1;
-                eprintln!("✗ {} id={}: {}", label, item.id, e);
-                continue;
-            }
-        };
-
-        // The whole pipeline (DAO checks, stored corpora) assumes
-        // EMBEDDING_DIM dims. A mismatch means the active embed slot is not
-        // serving the configured model — stop rather than corrupt the table.
-        anyhow::ensure!(
-            new_emb.len() == image_api::ai::embedding_dim(),
-            "backend returned {}-dim embedding (expected {}) — '{}' does not \
-             match the configured EMBEDDING_DIM",
-            new_emb.len(),
-            image_api::ai::embedding_dim(),
-            llm.embedding_model_version()
-        );
-
-        if let Some(old_emb) = deserialize_vector(&item.old_embedding) {
-            sims.push(cosine_similarity(&old_emb, &new_emb));
-        }
-
-        if !dry_run {
-            update(conn, item.id, serialize_vector(&new_emb))
-                .with_context(|| format!("updating {} id={}", label, item.id))?;
-        }
-        updated += 1;
-        pb.inc(1);
-    }
-    pb.finish_and_clear();
-
-    println!(
-        "[{}] {} re-embedded ({} truncated), {} failed",
-        label, updated, truncated_count, failed
-    );
-    Ok(sims)
-}
-
-fn report_similarity(label: &str, mut sims: Vec<f32>) {
-    if sims.is_empty() {
-        println!("[{}] no old↔new pairs to compare", label);
-        return;
-    }
-    sims.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
-    let mean: f32 = sims.iter().sum::<f32>() / sims.len() as f32;
-    let median = sims[sims.len() / 2];
-    println!(
-        "[{}] old↔new cosine over identical text: min={:.3} median={:.3} mean={:.3} max={:.3}",
-        label,
-        sims.first().unwrap(),
-        median,
-        mean,
-        sims.last().unwrap()
-    );
-    if median > 0.98 {
-        println!(
-            "[{}] → old and new backends agree (~same vector space); poor search \
-             results are coming from something else (prefixes, thresholds, corpus).",
-            label
-        );
-    } else if median > 0.9 {
-        println!(
-            "[{}] → same model family but measurably different vectors \
-             (quantization / runtime drift); re-embedding was worthwhile.",
-            label
-        );
-    } else {
-        println!(
-            "[{}] → vector-space mismatch confirmed — queries were searching a \
-             different space than the corpus. This re-embed should fix it.",
-            label
-        );
-    }
-}
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    dotenv::dotenv().ok();
-    env_logger::init();
-    let args = Args::parse();
-
-    let tables: Vec<&str> = args.tables.split(',').map(|t| t.trim()).collect();
-    for t in &tables {
-        anyhow::ensure!(
-            matches!(*t, "summaries" | "calendar" | "search" | "entities"),
-            "unknown table '{}' — expected summaries, calendar, search, entities",
-            t
-        );
-    }
-
-    let database_url = env::var("DATABASE_URL").unwrap_or_else(|_| "auth.db".to_string());
-    println!("Database: {}", database_url);
-
-    let mut conn = SqliteConnection::establish(&database_url)
-        .with_context(|| format!("connecting to {}", database_url))?;
-
-    let llm = LocalLlm::from_env();
-    let model_version = llm.embedding_model_version();
-    println!("Embedding via '{}'", model_version);
-    if args.dry_run {
-        println!("DRY RUN — no rows will be written");
-    }
-
-    if tables.contains(&"summaries") {
-        let mut rows: Vec<SummaryRow> = sql_query(
-            "SELECT id, summary, embedding, model_version
-             FROM daily_conversation_summaries ORDER BY date",
-        )
-        .load(&mut conn)
-        .context("loading daily summaries")?;
-        if let Some(limit) = args.limit {
-            rows.truncate(limit);
-        }
-        if let Some(first) = rows.first() {
-            println!(
-                "\n[summaries] previous model_version '{}' → '{}'",
-                first.model_version, model_version
-            );
-        }
-        let items = rows
-            .into_iter()
-            .map(|r| WorkItem {
-                id: r.id as i64,
-                text: strip_summary_boilerplate(&r.summary),
-                old_embedding: r.embedding,
-            })
-            .collect();
-        let mv = model_version.clone();
-        let sims = reembed_table(
-            &mut conn,
-            &llm,
-            "summaries",
-            items,
-            args.dry_run,
-            move |conn, id, emb| {
-                sql_query(
-                    "UPDATE daily_conversation_summaries
-                     SET embedding = ?1, model_version = ?2 WHERE id = ?3",
-                )
-                .bind::<diesel::sql_types::Binary, _>(emb)
-                .bind::<diesel::sql_types::Text, _>(&mv)
-                .bind::<diesel::sql_types::Integer, _>(id as i32)
-                .execute(conn)?;
-                Ok(())
-            },
-        )
-        .await?;
-        report_similarity("summaries", sims);
-    }
-
-    if tables.contains(&"calendar") {
-        let mut rows: Vec<CalendarRow> = sql_query(
-            "SELECT id, summary, description, location, embedding
-             FROM calendar_events WHERE embedding IS NOT NULL ORDER BY id",
-        )
-        .load(&mut conn)
-        .context("loading calendar events")?;
-        if let Some(limit) = args.limit {
-            rows.truncate(limit);
-        }
-        let items = rows
-            .into_iter()
-            .map(|r| WorkItem {
-                id: r.id as i64,
-                // Same text construction as import_calendar.
-                text: format!(
-                    "{} {} {}",
-                    r.summary,
-                    r.description.as_deref().unwrap_or(""),
-                    r.location.as_deref().unwrap_or("")
-                ),
-                old_embedding: r.embedding,
-            })
-            .collect();
-        let sims = reembed_table(
-            &mut conn,
-            &llm,
-            "calendar",
-            items,
-            args.dry_run,
-            |conn, id, emb| {
-                sql_query("UPDATE calendar_events SET embedding = ?1 WHERE id = ?2")
-                    .bind::<diesel::sql_types::Binary, _>(emb)
-                    .bind::<diesel::sql_types::Integer, _>(id as i32)
-                    .execute(conn)?;
-                Ok(())
-            },
-        )
-        .await?;
-        report_similarity("calendar", sims);
-    }
-
-    if tables.contains(&"search") {
-        let mut rows: Vec<SearchRow> = sql_query(
-            "SELECT rowid AS id, query, embedding
-             FROM search_history ORDER BY rowid",
-        )
-        .load(&mut conn)
-        .context("loading search history")?;
-        if let Some(limit) = args.limit {
-            rows.truncate(limit);
-        }
-        let items = rows
-            .into_iter()
-            .map(|r| WorkItem {
-                id: r.id,
-                text: r.query,
-                old_embedding: r.embedding,
-            })
-            .collect();
-        let sims = reembed_table(
-            &mut conn,
-            &llm,
-            "search",
-            items,
-            args.dry_run,
-            |conn, id, emb| {
-                sql_query("UPDATE search_history SET embedding = ?1 WHERE rowid = ?2")
-                    .bind::<diesel::sql_types::Binary, _>(emb)
-                    .bind::<diesel::sql_types::BigInt, _>(id)
-                    .execute(conn)?;
-                Ok(())
-            },
-        )
-        .await?;
-        report_similarity("search", sims);
-    }
-
-    if tables.contains(&"entities") {
-        let mut rows: Vec<EntityRow> = sql_query(
-            "SELECT id, name, description, embedding
-             FROM entities WHERE embedding IS NOT NULL ORDER BY id",
-        )
-        .load(&mut conn)
-        .context("loading knowledge entities")?;
-        if let Some(limit) = args.limit {
-            rows.truncate(limit);
-        }
-        let items = rows
-            .into_iter()
-            .map(|r| WorkItem {
-                id: r.id as i64,
-                // Same text construction as tool_store_entity.
-                text: format!("{} {}", r.name, r.description),
-                old_embedding: r.embedding,
-            })
-            .collect();
-        let sims = reembed_table(
-            &mut conn,
-            &llm,
-            "entities",
-            items,
-            args.dry_run,
-            |conn, id, emb| {
-                sql_query("UPDATE entities SET embedding = ?1 WHERE id = ?2")
-                    .bind::<diesel::sql_types::Binary, _>(emb)
-                    .bind::<diesel::sql_types::Integer, _>(id as i32)
-                    .execute(conn)?;
-                Ok(())
-            },
-        )
-        .await?;
-        report_similarity("entities", sims);
-    }
-
-    println!(
-        "\n{}",
-        if args.dry_run {
-            "Dry run complete"
-        } else {
-            "Done"
-        }
-    );
-    Ok(())
-}
@@ -1,382 +0,0 @@
-//! `/photos/search?q=<text>` — CLIP semantic photo search.
-//!
-//! The route lives outside `files.rs` to keep that 1500+ line module
-//! focused on EXIF / tag listing. The flow is:
-//!
-//! 1. Parse query params (`q`, `limit`, `threshold`, optional `library`).
-//! 2. Call Apollo's `/api/internal/clip/encode_text` to get the query
-//!    vector (L2-normalized 768-d f32 for ViT-L/14).
-//! 3. Load every `(content_hash, clip_embedding)` for the scope from
-//!    `image_exif` via `ExifDao::list_clip_index`. ~28–43 MB for a 14k
-//!    library at ViT-L/14; loaded fresh per request — fast enough for
-//!    v1, optimize via an AppState cache later if needed.
-//! 4. Dot product (= cosine since both sides are L2-normalized), filter
-//!    above `threshold`, top-K by score.
-//! 5. Resolve each surviving hash back to a `(library_id, rel_path)` so
-//!    the frontend can render the photo / hand off to the carousel.
-//!
-//! Response shape is intentionally minimal — paths + score — so the
-//! frontend can reuse existing PhotoGrid rendering by joining against
-//! `/api/photos/match` (or calling `/image/metadata` lazily). Don't
-//! bake camera/EXIF metadata into this route; it would force a fan-out
-//! per result and balloon the response.
-
-use crate::AppState;
-use crate::ai::clip_client::ClipError;
-use crate::database::ExifDao;
-use actix_web::{HttpResponse, Result as ActixResult, web};
-use base64::Engine;
-use serde::{Deserialize, Serialize};
-use std::sync::Mutex;
-
-#[derive(Debug, Deserialize)]
-pub struct SearchQuery {
-    /// Natural-language query. Required; empty triggers 400.
-    pub q: String,
-    /// Max results to return in this page. Capped to 200 server-side.
-    /// Defaults to 20. Pair with `offset` for pagination.
-    #[serde(default = "default_limit")]
-    pub limit: usize,
-    /// Zero-based offset into the sorted-and-filtered result set. The
-    /// scoring loop still runs over the full embedding matrix on every
-    /// page (cheap at personal-library scale — sub-100ms — and avoids
-    /// stateful pagination cursors). Defaults to 0.
-    #[serde(default)]
-    pub offset: usize,
-    /// Cosine-similarity floor below which results are dropped.
-    /// 0.20 is the rough "this is plausibly relevant" line for OpenAI
-    /// CLIP; tunable per call when sweeping. Defaults to 0.20.
-    #[serde(default = "default_threshold")]
-    pub threshold: f32,
-    /// Optional single-library scope. Legacy param — new clients pass
-    /// `library_ids` instead so multi-select scopes (Apollo's HUD library
-    /// chips, FileViewer-React's library picker) actually filter. Kept
-    /// for back-compat; `library_ids` wins when both are supplied.
-    pub library: Option<i32>,
-    /// Optional multi-library scope, comma-separated id list
-    /// (`?library_ids=1,3`). Empty / omitted = every enabled library
-    /// (the historical default). Apollo and FileViewer-React both send
-    /// this when 2+ libraries are selected; the single-library case
-    /// works through either param interchangeably.
-    pub library_ids: Option<String>,
-    /// Optional model-version filter. Defaults to the live engine's
-    /// version (queried lazily). Forces a strict join so mid-flight
-    /// model swaps can't mix geometries in a single response.
-    #[serde(default)]
-    pub model_version: Option<String>,
-}
-
-fn default_limit() -> usize {
-    20
-}
-
-fn default_threshold() -> f32 {
-    0.20
-}
-
-#[derive(Debug, Serialize)]
-pub struct SearchHit {
-    pub library_id: i32,
-    pub rel_path: String,
-    pub content_hash: String,
-    /// Cosine similarity in [-1, 1]. In practice OpenAI CLIP returns
-    /// 0.10–0.40 for the typical photo library.
-    pub score: f32,
-}
-
-#[derive(Debug, Serialize)]
-pub struct SearchResponse {
-    pub query: String,
-    pub model_version: String,
-    pub threshold: f32,
-    /// Total embeddings scored (= every photo in scope with a stored
-    /// embedding). Same value across pages of the same query.
-    pub considered: usize,
-    /// Count of results above threshold, before pagination. Lets the
-    /// client decide whether a "Load more" button is meaningful and
-    /// stop fetching when ``offset + results.len() >= total_matching``.
-    pub total_matching: usize,
-    pub offset: usize,
-    pub results: Vec<SearchHit>,
-}
-
-#[derive(Debug, Serialize)]
-struct SearchError {
-    error: String,
-}
-
-/// Decode a stored `clip_embedding` BLOB back into a `Vec<f32>`. Returns
-/// `None` on malformed bytes — those rows get skipped rather than
-/// failing the whole query.
-fn decode_embedding(bytes: &[u8]) -> Option<Vec<f32>> {
-    if bytes.is_empty() || !bytes.len().is_multiple_of(4) {
-        return None;
-    }
-    let mut out = Vec::with_capacity(bytes.len() / 4);
-    for chunk in bytes.chunks_exact(4) {
-        out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
-    }
-    Some(out)
-}
-
-#[inline]
-fn dot(a: &[f32], b: &[f32]) -> f32 {
-    a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
-}
-
-/// Failure modes of [`score_photos`]. Carries enough to let each caller pick
-/// an appropriate HTTP status (the CLIP service being down is a 502, a
-/// disabled feature is a 503, a rejected query is a 400, a DB failure 500).
-pub enum ScoreError {
-    /// CLIP search isn't configured at all (no Apollo endpoint).
-    Disabled,
-    /// The query was rejected by the encoder (client error).
-    Rejected(String),
-    /// The CLIP service is transiently unavailable (upstream error).
-    Unavailable(String),
-    /// The encoder returned an embedding we couldn't decode.
-    MalformedEmbedding,
-    /// A database / index load failure.
-    Internal(String),
-}
-
-/// Result of scoring the whole library against a query embedding: the
-/// resolved model version, how many embeddings were considered, and every
-/// `(score, content_hash)` above threshold, sorted by descending score.
-/// Pagination and path resolution are the caller's job (see [`resolve_hits`])
-/// so this core can be reused for both the plain search endpoint and the
-/// unified endpoint (which filters by hash before paginating).
-pub struct ScoredPhotos {
-    pub model_version: String,
-    pub considered: usize,
-    /// `(cosine_score, content_hash)` pairs, descending by score.
-    pub hits: Vec<(f32, String)>,
-}
-
-/// Encode `q_text` via CLIP and score it against every stored embedding in
-/// the given library scope. Returns all matches above `threshold`, sorted by
-/// descending similarity. Pure of HTTP concerns so it's shared by
-/// `search_photos` and the unified search endpoint.
-pub async fn score_photos(
-    state: &AppState,
-    exif_dao: &Mutex<Box<dyn ExifDao>>,
-    q_text: &str,
-    library_ids: &[i32],
-    threshold: f32,
-    model_version: Option<&str>,
-) -> Result<ScoredPhotos, ScoreError> {
-    if !state.clip_client.is_enabled() {
-        return Err(ScoreError::Disabled);
-    }
-
-    // 1. Encode the query text. Fast — Apollo's text encoder is ~50ms on CPU.
-    let query_resp = match state.clip_client.encode_text(q_text).await {
-        Ok(r) => r,
-        Err(ClipError::Permanent(e)) => return Err(ScoreError::Rejected(e.to_string())),
-        Err(ClipError::Transient(e)) => return Err(ScoreError::Unavailable(e.to_string())),
-        Err(ClipError::Disabled) => return Err(ScoreError::Disabled),
-    };
-    // decode_embedding works on raw bytes; the wire format is b64.
-    let query_bytes = base64::engine::general_purpose::STANDARD
-        .decode(query_resp.embedding.as_bytes())
-        .unwrap_or_default();
-    let query_vec = decode_embedding(&query_bytes).ok_or(ScoreError::MalformedEmbedding)?;
-
-    // 2. Pull the (hash, embedding) matrix under the dao lock, release
-    // before scoring. The caller-supplied `model_version` (or the live
-    // engine's) forces a strict join so a mid-flight model swap can't mix
-    // geometries.
-    let ctx = opentelemetry::Context::current();
-    let rows: Vec<(String, Vec<u8>)> = {
-        let mut dao = exif_dao.lock().expect("exif dao");
-        dao.list_clip_index(
-            &ctx,
-            library_ids,
-            model_version.or(Some(&query_resp.model_version)),
-        )
-        .map_err(|e| {
-            log::warn!("clip_search: list_clip_index failed: {:?}", e);
-            ScoreError::Internal("failed to load search index".into())
-        })?
-    };
-    let considered = rows.len();
-
-    // 3. Score. Keep all matches and sort at the end (~microseconds at 14k).
-    let mut hits: Vec<(f32, String)> = Vec::with_capacity(considered);
-    for (hash, blob) in rows {
-        let Some(emb) = decode_embedding(&blob) else {
-            continue;
-        };
-        if emb.len() != query_vec.len() {
-            continue;
-        }
-        let sim = dot(&emb, &query_vec);
-        if sim < threshold {
-            continue;
-        }
-        hits.push((sim, hash));
-    }
-    hits.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
-
-    Ok(ScoredPhotos {
-        model_version: query_resp.model_version,
-        considered,
-        hits,
-    })
-}
-
-/// Resolve a page of `(score, content_hash)` pairs back to [`SearchHit`]s
-/// (each carrying `library_id` + `rel_path`). Hashes that no longer resolve
-/// to a row are skipped. Shared by both endpoints.
-pub fn resolve_hits(
-    exif_dao: &Mutex<Box<dyn ExifDao>>,
-    scored: &[(f32, String)],
-) -> Vec<SearchHit> {
-    if scored.is_empty() {
-        return Vec::new();
-    }
-    let ctx = opentelemetry::Context::current();
-    let hashes: Vec<String> = scored.iter().map(|(_, h)| h.clone()).collect();
-    let mut dao = exif_dao.lock().expect("exif dao");
-    let path_map = dao
-        .get_rel_paths_for_hashes(&ctx, &hashes)
-        .unwrap_or_else(|e| {
-            log::warn!("clip_search: get_rel_paths_for_hashes failed: {:?}", e);
-            std::collections::HashMap::new()
-        });
-
-    let mut results = Vec::with_capacity(scored.len());
-    for (score, hash) in scored {
-        let row = match dao.find_by_content_hash(&ctx, hash) {
-            Ok(Some(r)) => r,
-            Ok(None) => continue,
-            Err(e) => {
-                log::warn!("clip_search: find_by_content_hash failed for {hash}: {e:?}");
-                continue;
-            }
-        };
-        // Prefer get_rel_paths_for_hashes's first entry (shares image_exif's
-        // natural order), falling back to the ImageExif row.
-        let rel_path = path_map
-            .get(hash)
-            .and_then(|paths| paths.first().cloned())
-            .unwrap_or(row.file_path);
-        results.push(SearchHit {
-            library_id: row.library_id,
-            rel_path,
-            content_hash: hash.clone(),
-            score: *score,
-        });
-    }
-    results
-}
-
-/// Parse the `library_ids` (multi) / `library` (single) scope params into a
-/// deduped id list. Empty = "every enabled library". Shared so the unified
-/// endpoint scopes CLIP identically.
-pub fn parse_library_scope(
-    library_ids: Option<&str>,
-    library: Option<i32>,
-) -> Result<Vec<i32>, String> {
-    if let Some(raw) = library_ids {
-        let mut out: Vec<i32> = Vec::new();
-        for piece in raw.split(',') {
-            let trimmed = piece.trim();
-            if trimmed.is_empty() {
-                continue;
-            }
-            match trimmed.parse::<i32>() {
-                Ok(id) => {
-                    if !out.contains(&id) {
-                        out.push(id);
-                    }
-                }
-                Err(_) => return Err(format!("invalid library_ids entry: {trimmed:?}")),
-            }
-        }
-        Ok(out)
-    } else if let Some(id) = library {
-        Ok(vec![id])
-    } else {
-        Ok(Vec::new())
-    }
-}
-
-pub async fn search_photos(
-    state: web::Data<AppState>,
-    exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
-    query: web::Query<SearchQuery>,
-) -> ActixResult<HttpResponse> {
-    let q_text = query.q.trim().to_string();
-    if q_text.is_empty() {
-        return Ok(HttpResponse::BadRequest().json(SearchError {
-            error: "query parameter `q` is required".into(),
-        }));
-    }
-
-    let limit = query.limit.clamp(1, 200);
-    let offset = query.offset;
-    let threshold = query.threshold.clamp(-1.0, 1.0);
-
-    let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) {
-        Ok(ids) => ids,
-        Err(msg) => return Ok(HttpResponse::BadRequest().json(SearchError { error: msg })),
-    };
-
-    let scored = match score_photos(
-        &state,
-        &exif_dao,
-        &q_text,
-        &library_ids,
-        threshold,
-        query.model_version.as_deref(),
-    )
-    .await
-    {
-        Ok(s) => s,
-        Err(e) => return Ok(score_error_response(e)),
-    };
-
-    let total_matching = scored.hits.len();
-    // Pagination — slice the sorted list at `[offset, offset+limit)`. Offsets
-    // past the end produce empty pages so "load more" stops naturally.
-    let page: Vec<(f32, String)> = if offset >= total_matching {
-        Vec::new()
-    } else {
-        let end = (offset + limit).min(total_matching);
-        scored.hits[offset..end].to_vec()
-    };
-    let results = resolve_hits(&exif_dao, &page);
-
-    Ok(HttpResponse::Ok().json(SearchResponse {
-        query: q_text,
-        model_version: scored.model_version,
-        threshold,
-        considered: scored.considered,
-        total_matching,
-        offset,
-        results,
-    }))
-}
-
-/// Map a [`ScoreError`] to the HTTP response `search_photos` historically
-/// returned for each failure mode. Reused by the unified endpoint.
-pub fn score_error_response(e: ScoreError) -> HttpResponse {
-    match e {
-        ScoreError::Disabled => HttpResponse::ServiceUnavailable().json(SearchError {
-            error: "CLIP search is disabled (no Apollo CLIP endpoint configured)".into(),
-        }),
-        ScoreError::Rejected(msg) => HttpResponse::BadRequest().json(SearchError {
-            error: format!("query rejected: {msg}"),
-        }),
-        ScoreError::Unavailable(msg) => HttpResponse::BadGateway().json(SearchError {
-            error: format!("CLIP service unavailable: {msg}"),
-        }),
-        ScoreError::MalformedEmbedding => HttpResponse::BadGateway().json(SearchError {
-            error: "CLIP service returned a malformed query embedding".into(),
-        }),
-        ScoreError::Internal(msg) => {
-            HttpResponse::InternalServerError().json(SearchError { error: msg })
-        }
-    }
-}
@@ -1,246 +0,0 @@
-//! CLIP-encoding pass for the file watcher.
-//!
-//! `process_clip_backlog` in `backfill.rs` calls [`run_clip_encoding_pass`]
-//! with the page of candidates returned by
-//! `ExifDao::list_clip_unencoded_candidates`. We walk those, fan out K
-//! parallel encode calls to Apollo, and persist the resulting embeddings
-//! into `image_exif.clip_embedding` / `clip_model_version`.
-//!
-//! Unlike the face pipeline, CLIP has no marker rows — a permanent
-//! failure (un-decodable bytes) leaves the row's `clip_embedding` NULL
-//! and the drain will retry on the next tick. For personal-library
-//! scale this is fine; the per-tick cap bounds the wasted work, and
-//! `file_types::is_image_file` filters out videos / non-media client-
-//! side so most permanent failures are decoded-but-corrupt files (rare).
-//!
-//! The watcher thread isn't in any pre-existing async context, so we
-//! build a short-lived tokio runtime per pass and `block_on` the join
-//! of K encode futures. Concurrency knob: `CLIP_ENCODE_CONCURRENCY`
-//! (default 4 — lower than faces because Apollo's CLIP path doesn't
-//! release the GIL between preprocess and forward as cleanly).
-
-use crate::ai::clip_client::{ClipClient, ClipError, EncodeImageMeta};
-use crate::database::ExifDao;
-use crate::exif;
-use crate::file_types;
-use crate::libraries::Library;
-use crate::memories::PathExcluder;
-use log::{debug, info, warn};
-use std::path::Path;
-use std::sync::{Arc, Mutex};
-use tokio::sync::Semaphore;
-
-/// One file the watcher would like to CLIP-encode. Built from the DAO
-/// `list_clip_unencoded_candidates` result — needs the `content_hash`
-/// for traceability in Apollo's log lines, even though the embedding
-/// itself is keyed on `(library_id, rel_path)` for the back-write.
-#[derive(Debug, Clone)]
-pub struct ClipCandidate {
-    pub rel_path: String,
-    pub content_hash: String,
-}
-
-/// Synchronous entry point. Returns once every candidate has been
-/// processed (or definitively skipped). No-op when the client is
-/// disabled so the caller can call unconditionally.
-pub fn run_clip_encoding_pass(
-    library: &Library,
-    excluded_dirs: &[String],
-    clip_client: &ClipClient,
-    exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
-    candidates: Vec<ClipCandidate>,
-) {
-    if !clip_client.is_enabled() {
-        return;
-    }
-    if candidates.is_empty() {
-        return;
-    }
-
-    let base = Path::new(&library.root_path);
-    let filtered = filter_excluded(base, excluded_dirs, candidates, Some(&library.name));
-    if filtered.is_empty() {
-        return;
-    }
-
-    let concurrency: usize = std::env::var("CLIP_ENCODE_CONCURRENCY")
-        .ok()
-        .and_then(|s| s.parse().ok())
-        .filter(|n: &usize| *n > 0)
-        .unwrap_or(4);
-
-    info!(
-        "clip_watch: encoding {} candidate(s) for library '{}' (concurrency {})",
-        filtered.len(),
-        library.name,
-        concurrency
-    );
-
-    let rt = match tokio::runtime::Builder::new_multi_thread()
-        .worker_threads(2)
-        .enable_all()
-        .build()
-    {
-        Ok(rt) => rt,
-        Err(e) => {
-            warn!("clip_watch: failed to build tokio runtime: {e}");
-            return;
-        }
-    };
-
-    let library_id = library.id;
-    let library_root = library.root_path.clone();
-    rt.block_on(async move {
-        let sem = Arc::new(Semaphore::new(concurrency));
-        let mut handles = Vec::with_capacity(filtered.len());
-        for cand in filtered {
-            let permit_sem = sem.clone();
-            let clip_client = clip_client.clone();
-            let exif_dao = exif_dao.clone();
-            let library_root = library_root.clone();
-            handles.push(tokio::spawn(async move {
-                let _permit = permit_sem.acquire().await.expect("clip semaphore");
-                process_one(library_id, &library_root, cand, &clip_client, exif_dao).await;
-            }));
-        }
-        for h in handles {
-            let _ = h.await;
-        }
-    });
-}
-
-async fn process_one(
-    library_id: i32,
-    library_root: &str,
-    cand: ClipCandidate,
-    clip_client: &ClipClient,
-    exif_dao: Arc<Mutex<Box<dyn ExifDao>>>,
-) {
-    let abs = Path::new(library_root).join(&cand.rel_path);
-    let bytes = match read_image_bytes_for_encode(&abs) {
-        Ok(b) => b,
-        Err(e) => {
-            // Same rationale as face_watch: don't mark — the file may
-            // have been moved/renamed mid-scan; let the next pass retry.
-            warn!(
-                "clip_watch: read failed for {} (lib {}): {}",
-                cand.rel_path, library_id, e
-            );
-            return;
-        }
-    };
-
-    let meta = EncodeImageMeta {
-        content_hash: cand.content_hash.clone(),
-        library_id,
-        rel_path: cand.rel_path.clone(),
-    };
-    let ctx = opentelemetry::Context::current();
-
-    match clip_client.encode_image(bytes, meta).await {
-        Ok(resp) => {
-            let emb_bytes = match resp.decode_embedding() {
-                Ok(b) => b,
-                Err(e) => {
-                    warn!("clip_watch: bad embedding for {}: {:?}", cand.rel_path, e);
-                    return;
-                }
-            };
-            let mut dao = exif_dao.lock().expect("exif dao");
-            if let Err(e) = dao.backfill_clip_embedding(
-                &ctx,
-                library_id,
-                &cand.rel_path,
-                &emb_bytes,
-                &resp.model_version,
-            ) {
-                warn!(
-                    "clip_watch: backfill_clip_embedding failed for {}: {:?}",
-                    cand.rel_path, e
-                );
-                return;
-            }
-            debug!(
-                "clip_watch: {} → dim={} ({}ms, {})",
-                cand.rel_path, resp.embedding_dim, resp.duration_ms, resp.model_version
-            );
-        }
-        Err(ClipError::Permanent(e)) => {
-            // No marker — the row sits with NULL embedding and the drain
-            // retries next pass. For personal-library scale the cost of
-            // re-attempting permanently-broken files is bounded by the
-            // per-tick cap. If this becomes a recurring noise source,
-            // add a `clip_status` column with `failed` semantics like
-            // face_detections has.
-            warn!(
-                "clip_watch: permanent failure on {} (will retry next pass): {}",
-                cand.rel_path, e
-            );
-        }
-        Err(ClipError::Transient(e)) => {
-            debug!(
-                "clip_watch: transient on {}: {} (will retry next pass)",
-                cand.rel_path, e
-            );
-        }
-        Err(ClipError::Disabled) => {
-            // Defensive — the entry-point already checked is_enabled().
-        }
-    }
-}
-
-/// Drop candidates whose paths land in an excluded dir or whose
-/// extension isn't an image. Mirrors `face_watch::filter_excluded` so
-/// the two backlogs stay shape-consistent. Library name is passed
-/// purely for the log line that surfaces an exclusion hit.
-pub fn filter_excluded(
-    base: &Path,
-    excluded_dirs: &[String],
-    candidates: Vec<ClipCandidate>,
-    library_name: Option<&str>,
-) -> Vec<ClipCandidate> {
-    let excluder = if excluded_dirs.is_empty() {
-        None
-    } else {
-        Some(PathExcluder::new(base, excluded_dirs))
-    };
-    candidates
-        .into_iter()
-        .filter(|c| {
-            let abs = base.join(&c.rel_path);
-            if !file_types::is_image_file(&abs) {
-                debug!(
-                    "clip_watch: skipping non-image '{}' (lib {})",
-                    c.rel_path,
-                    library_name.unwrap_or("<unknown>")
-                );
-                return false;
-            }
-            if let Some(ex) = excluder.as_ref()
-                && ex.is_excluded(&abs)
-            {
-                debug!(
-                    "clip_watch: skipping excluded '{}' (lib {})",
-                    c.rel_path,
-                    library_name.unwrap_or("<unknown>")
-                );
-                return false;
-            }
-            true
-        })
-        .collect()
-}
-
-/// Read image bytes for CLIP encoding. Same logic as
-/// `face_watch::read_image_bytes_for_detect` — RAW / HEIC files don't
-/// decode in Apollo's PIL pipeline, so we pull the embedded JPEG
-/// preview the thumbnail pipeline already extracts. Plain JPEG / PNG /
-/// WebP go through a direct read.
-pub fn read_image_bytes_for_encode(path: &Path) -> std::io::Result<Vec<u8>> {
-    if file_types::needs_ffmpeg_thumbnail(path)
-        && let Some(preview) = exif::extract_embedded_jpeg_preview(path)
-    {
-        return Ok(preview);
-    }
-    std::fs::read(path)
-}
@@ -50,32 +50,14 @@ pub fn thumbnail_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
    thumbs_dir.join(shard).join(format!("{}.jpg", hash))
 }

-/// Hash-keyed large-preview path: `<thumbs_dir>/_large/<hash[..2]>/<hash>.jpg`.
-/// Kept under the same root as 200px thumbs so deployments don't need a
-/// second env var, but namespaced under `_large/` so the existing 200px
-/// shards don't collide with the larger derivative.
-pub fn large_preview_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
-    let shard = shard_prefix(hash);
-    thumbs_dir
-        .join("_large")
-        .join(shard)
-        .join(format!("{}.jpg", hash))
-}
-
-/// Hash-keyed xlarge-preview path: `<thumbs_dir>/_xlarge/<hash[..2]>/<hash>.jpg`.
-pub fn xlarge_preview_path(thumbs_dir: &Path, hash: &str) -> PathBuf {
-    let shard = shard_prefix(hash);
-    thumbs_dir
-        .join("_xlarge")
-        .join(shard)
-        .join(format!("{}.jpg", hash))
-}
-
 /// Hash-keyed HLS output directory: `<video_dir>/<hash[..2]>/<hash>/`.
 /// The playlist lives at `playlist.m3u8` inside this directory and its
-/// segments are co-located so HLS relative references Just Work. See
-/// [`crate::video::hls_paths`] for the filename constants and the
-/// per-file helpers built on this dir.
+/// segments are co-located so HLS relative references Just Work.
+///
+/// Allow-dead until Branch B/C rewires the HLS pipeline to use it; the
+/// helper lives here today so Branch A's path layout decisions stay
+/// adjacent to thumbnail/legacy ones.
+#[allow(dead_code)]
 pub fn hls_dir(video_dir: &Path, hash: &str) -> PathBuf {
    let shard = shard_prefix(hash);
    video_dir.join(shard).join(hash)
@@ -141,9 +123,6 @@ mod tests {
        let p = thumbnail_path(thumbs, "abcdef0123");
        assert_eq!(p, PathBuf::from("/tmp/thumbs/ab/abcdef0123.jpg"));

-        let l = large_preview_path(thumbs, "abcdef0123");
-        assert_eq!(l, PathBuf::from("/tmp/thumbs/_large/ab/abcdef0123.jpg"));
-
        let video = Path::new("/tmp/video");
        let d = hls_dir(video, "1234deadbeef");
        assert_eq!(d, PathBuf::from("/tmp/video/12/1234deadbeef"));
@@ -194,8 +194,6 @@ pub enum MediaType {
 #[serde(rename_all = "lowercase")]
 pub enum PhotoSize {
    Full,
-    XLarge,
-    Large,
    Thumb,
 }

@@ -222,12 +222,11 @@ impl CalendarEventDao for SqliteCalendarEventDao {

            // Validate embedding dimensions if provided
            if let Some(ref emb) = event.embedding
-                && emb.len() != crate::ai::embedding_dim()
+                && emb.len() != 768
            {
                return Err(anyhow::anyhow!(
-                    "Invalid embedding dimensions: {} (expected {})",
-                    emb.len(),
-                    crate::ai::embedding_dim()
+                    "Invalid embedding dimensions: {} (expected 768)",
+                    emb.len()
                ));
            }

@@ -275,7 +274,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
                source_file: event.source_file,
            })
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn store_events_batch(
@@ -294,7 +293,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
                for event in events {
                    // Validate embedding if provided
                    if let Some(ref emb) = event.embedding
-                        && emb.len() != crate::ai::embedding_dim()
+                        && emb.len() != 768
                    {
                        log::warn!(
                            "Skipping event with invalid embedding dimensions: {}",
@@ -349,7 +348,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {

            Ok(inserted)
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn find_events_in_range(
@@ -374,7 +373,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
            .map(|rows| rows.into_iter().map(|r| r.to_calendar_event()).collect())
            .map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn find_similar_events(
@@ -386,11 +385,10 @@ impl CalendarEventDao for SqliteCalendarEventDao {
        trace_db_call(context, "query", "find_similar_events", |_span| {
            let mut conn = self.connection.lock().expect("Unable to get CalendarEventDao");

-            if query_embedding.len() != crate::ai::embedding_dim() {
+            if query_embedding.len() != 768 {
                return Err(anyhow::anyhow!(
-                    "Invalid query embedding dimensions: {} (expected {})",
-                    query_embedding.len(),
-                    crate::ai::embedding_dim()
+                    "Invalid query embedding dimensions: {} (expected 768)",
+                    query_embedding.len()
                ));
            }

@@ -431,7 +429,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {

            Ok(scored_events.into_iter().take(limit).map(|(_, event)| event).collect())
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn find_relevant_events_hybrid(
@@ -463,11 +461,10 @@ impl CalendarEventDao for SqliteCalendarEventDao {

            // Step 2: If query embedding provided, rank by semantic similarity
            if let Some(query_emb) = query_embedding {
-                if query_emb.len() != crate::ai::embedding_dim() {
+                if query_emb.len() != 768 {
                    return Err(anyhow::anyhow!(
-                        "Invalid query embedding dimensions: {} (expected {})",
-                        query_emb.len(),
-                        crate::ai::embedding_dim()
+                        "Invalid query embedding dimensions: {} (expected 768)",
+                        query_emb.len()
                    ));
                }

@@ -503,7 +500,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {
                Ok(events_in_range.into_iter().take(limit).map(|r| r.to_calendar_event()).collect())
            }
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn event_exists(
@@ -531,7 +528,7 @@ impl CalendarEventDao for SqliteCalendarEventDao {

            Ok(result.count > 0)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_event_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
@@ -554,6 +551,6 @@ impl CalendarEventDao for SqliteCalendarEventDao {

            Ok(result.count)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
 }
@@ -150,11 +150,10 @@ impl DailySummaryDao for SqliteDailySummaryDao {
                .expect("Unable to get DailySummaryDao");

            // Validate embedding dimensions
-            if summary.embedding.len() != crate::ai::embedding_dim() {
+            if summary.embedding.len() != 768 {
                return Err(anyhow::anyhow!(
-                    "Invalid embedding dimensions: {} (expected {})",
-                    summary.embedding.len(),
-                    crate::ai::embedding_dim()
+                    "Invalid embedding dimensions: {} (expected 768)",
+                    summary.embedding.len()
                ));
            }

@@ -191,7 +190,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
                model_version: summary.model_version,
            })
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn find_similar_summaries(
@@ -203,11 +202,10 @@ impl DailySummaryDao for SqliteDailySummaryDao {
        trace_db_call(context, "query", "find_similar_summaries", |_span| {
            let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao");

-            if query_embedding.len() != crate::ai::embedding_dim() {
+            if query_embedding.len() != 768 {
                return Err(anyhow::anyhow!(
-                    "Invalid query embedding dimensions: {} (expected {})",
-                    query_embedding.len(),
-                    crate::ai::embedding_dim()
+                    "Invalid query embedding dimensions: {} (expected 768)",
+                    query_embedding.len()
                ));
            }

@@ -288,7 +286,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {

            Ok(top_results)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn find_similar_summaries_with_time_weight(
@@ -301,11 +299,10 @@ impl DailySummaryDao for SqliteDailySummaryDao {
        trace_db_call(context, "query", "find_similar_summaries_with_time_weight", |_span| {
            let mut conn = self.connection.lock().expect("Unable to get DailySummaryDao");

-            if query_embedding.len() != crate::ai::embedding_dim() {
+            if query_embedding.len() != 768 {
                return Err(anyhow::anyhow!(
-                    "Invalid query embedding dimensions: {} (expected {})",
-                    query_embedding.len(),
-                    crate::ai::embedding_dim()
+                    "Invalid query embedding dimensions: {} (expected 768)",
+                    query_embedding.len()
                ));
            }

@@ -411,7 +408,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {

            Ok(top_results)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn summary_exists(
@@ -438,7 +435,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {

            Ok(count > 0)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_summary_count(
@@ -460,7 +457,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {
            .map(|r| r.count)
            .map_err(|e| anyhow::anyhow!("Count query error: {:?}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn has_any_summaries(&mut self, context: &opentelemetry::Context) -> Result<bool, DbError> {
@@ -484,7 +481,7 @@ impl DailySummaryDao for SqliteDailySummaryDao {

            Ok(!rows.is_empty())
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
 }

@@ -1,681 +0,0 @@
-use diesel::prelude::*;
-use diesel::sqlite::SqliteConnection;
-use std::ops::DerefMut;
-use std::sync::{Arc, Mutex};
-
-use crate::database::models::{
-    InsertInsightGenerationJob, InsightGenerationJob, InsightGenerationType, InsightJobStatus,
-};
-use crate::database::schema;
-use crate::database::{DbError, DbErrorKind, connect};
-use crate::otel::trace_db_call;
-
-/// Tracks async insight generation jobs. Each call to `create_job` inserts
-/// a new row; the application layer prevents concurrent running jobs by
-/// cancelling the old one before creating a new one.
-pub trait InsightGenerationJobDao: Sync + Send {
-    /// Insert a new running job. Always creates a new row (no upsert).
-    /// Cleans up terminal-state rows for the same key first.
-    fn create_job(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_id: i32,
-        file_path: &str,
-        generation_type: InsightGenerationType,
-    ) -> Result<i32, DbError>;
-
-    /// Mark a job as completed with the resulting insight id. Only updates
-    /// if the job is still in "running" status (prevents overwriting a
-    /// cancelled job with a late-completing task).
-    fn complete_job(
-        &mut self,
-        context: &opentelemetry::Context,
-        job_id: i32,
-        insight_id: i32,
-    ) -> Result<(), DbError>;
-
-    /// Mark a job as failed with an error message. Only updates if the job
-    /// is still in "running" status.
-    fn fail_job(
-        &mut self,
-        context: &opentelemetry::Context,
-        job_id: i32,
-        error_message: &str,
-    ) -> Result<(), DbError>;
-
-    /// Cancel a specific job by id. Only updates if the job is still
-    /// in "running" status. Returns true if a row was updated.
-    fn cancel_job(
-        &mut self,
-        context: &opentelemetry::Context,
-        job_id: i32,
-    ) -> Result<bool, DbError>;
-
-    /// Cancel all running jobs for a given file. Returns the number of
-    /// jobs cancelled.
-    fn cancel_active_jobs(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_id: i32,
-        file_path: &str,
-    ) -> Result<usize, DbError>;
-
-    /// Find the latest running job for a given file. Returns None if no
-    /// running job exists.
-    fn get_active_job(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_id: i32,
-        file_path: &str,
-    ) -> Result<Option<InsightGenerationJob>, DbError>;
-
-    /// Find any job by id regardless of status.
-    fn get_job_by_id(
-        &mut self,
-        context: &opentelemetry::Context,
-        job_id: i32,
-    ) -> Result<Option<InsightGenerationJob>, DbError>;
-
-    /// Mark all jobs still in "running" status as "failed" with a recovery
-    /// error message. Returns the number of jobs recovered.
-    fn recover_orphaned_jobs(&mut self, context: &opentelemetry::Context)
-    -> Result<usize, DbError>;
-}
-
-pub struct SqliteInsightGenerationJobDao {
-    connection: Arc<Mutex<SqliteConnection>>,
-}
-
-impl Default for SqliteInsightGenerationJobDao {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl SqliteInsightGenerationJobDao {
-    pub fn new() -> Self {
-        Self {
-            connection: Arc::new(Mutex::new(connect())),
-        }
-    }
-
-    #[cfg(test)]
-    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
-        Self { connection: conn }
-    }
-}
-
-impl InsightGenerationJobDao for SqliteInsightGenerationJobDao {
-    fn create_job(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_id: i32,
-        file_path: &str,
-        generation_type: InsightGenerationType,
-    ) -> Result<i32, DbError> {
-        trace_db_call(context, "insert", "create_job", |_span| {
-            use schema::insight_generation_jobs::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock InsightGenerationJobDao");
-
-            let now = std::time::SystemTime::now()
-                .duration_since(std::time::UNIX_EPOCH)
-                .expect("Time went backwards")
-                .as_secs() as i64;
-
-            let new_job = InsertInsightGenerationJob {
-                library_id,
-                path: file_path.to_string(),
-                gen_type: generation_type.to_string(),
-                status: InsightJobStatus::Running.to_string(),
-                started_at: now,
-            };
-
-            diesel::insert_into(dsl::insight_generation_jobs)
-                .values(&new_job)
-                .execute(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Failed to insert job: {}", e))?;
-
-            dsl::insight_generation_jobs
-                .filter(
-                    dsl::library_id
-                        .eq(library_id)
-                        .and(dsl::file_path.eq(file_path))
-                        .and(dsl::generation_type.eq(generation_type.as_str()))
-                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
-                )
-                .select(dsl::id)
-                .order(dsl::id.desc())
-                .first::<i32>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Failed to get job id: {}", e))
-        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
-    }
-
-    fn complete_job(
-        &mut self,
-        context: &opentelemetry::Context,
-        job_id: i32,
-        insight_id: i32,
-    ) -> Result<(), DbError> {
-        trace_db_call(context, "update", "complete_job", |_span| {
-            use schema::insight_generation_jobs::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock InsightGenerationJobDao");
-
-            let now = std::time::SystemTime::now()
-                .duration_since(std::time::UNIX_EPOCH)
-                .expect("Time went backwards")
-                .as_secs() as i64;
-
-            // Only update if still running — prevents cancelled job from
-            // being overwritten by a late-completing task.
-            diesel::update(
-                dsl::insight_generation_jobs.filter(
-                    dsl::id
-                        .eq(job_id)
-                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
-                ),
-            )
-            .set((
-                dsl::status.eq(InsightJobStatus::Completed.as_str()),
-                dsl::completed_at.eq(Some(now)),
-                dsl::result_insight_id.eq(Some(insight_id)),
-            ))
-            .execute(connection.deref_mut())
-            .map(|_| ())
-            .map_err(|e| anyhow::anyhow!("Failed to complete job: {}", e))
-        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
-    }
-
-    fn fail_job(
-        &mut self,
-        context: &opentelemetry::Context,
-        job_id: i32,
-        error_message: &str,
-    ) -> Result<(), DbError> {
-        trace_db_call(context, "update", "fail_job", |_span| {
-            use schema::insight_generation_jobs::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock InsightGenerationJobDao");
-
-            let now = std::time::SystemTime::now()
-                .duration_since(std::time::UNIX_EPOCH)
-                .expect("Time went backwards")
-                .as_secs() as i64;
-
-            // Only update if still running.
-            diesel::update(
-                dsl::insight_generation_jobs.filter(
-                    dsl::id
-                        .eq(job_id)
-                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
-                ),
-            )
-            .set((
-                dsl::status.eq(InsightJobStatus::Failed.as_str()),
-                dsl::completed_at.eq(Some(now)),
-                dsl::error_message.eq(Some(error_message.to_string())),
-            ))
-            .execute(connection.deref_mut())
-            .map(|_| ())
-            .map_err(|e| anyhow::anyhow!("Failed to fail job: {}", e))
-        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
-    }
-
-    fn cancel_job(
-        &mut self,
-        context: &opentelemetry::Context,
-        job_id: i32,
-    ) -> Result<bool, DbError> {
-        trace_db_call(context, "update", "cancel_job", |_span| {
-            use schema::insight_generation_jobs::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock InsightGenerationJobDao");
-
-            let now = std::time::SystemTime::now()
-                .duration_since(std::time::UNIX_EPOCH)
-                .expect("Time went backwards")
-                .as_secs() as i64;
-
-            let rows = diesel::update(
-                dsl::insight_generation_jobs.filter(
-                    dsl::id
-                        .eq(job_id)
-                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
-                ),
-            )
-            .set((
-                dsl::status.eq(InsightJobStatus::Cancelled.as_str()),
-                dsl::completed_at.eq(Some(now)),
-                dsl::error_message.eq(Some("cancelled by user".to_string())),
-            ))
-            .execute(connection.deref_mut())
-            .map_err(|e| anyhow::anyhow!("Failed to cancel job: {}", e))?;
-
-            Ok(rows > 0)
-        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
-    }
-
-    fn cancel_active_jobs(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_id: i32,
-        file_path: &str,
-    ) -> Result<usize, DbError> {
-        trace_db_call(context, "update", "cancel_active_jobs", |_span| {
-            use schema::insight_generation_jobs::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock InsightGenerationJobDao");
-
-            let now = std::time::SystemTime::now()
-                .duration_since(std::time::UNIX_EPOCH)
-                .expect("Time went backwards")
-                .as_secs() as i64;
-
-            let rows = diesel::update(
-                dsl::insight_generation_jobs.filter(
-                    dsl::library_id
-                        .eq(library_id)
-                        .and(dsl::file_path.eq(file_path))
-                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
-                ),
-            )
-            .set((
-                dsl::status.eq(InsightJobStatus::Cancelled.as_str()),
-                dsl::completed_at.eq(Some(now)),
-                dsl::error_message.eq(Some("cancelled by newer request".to_string())),
-            ))
-            .execute(connection.deref_mut())
-            .map_err(|e| anyhow::anyhow!("Failed to cancel active jobs: {}", e))?;
-
-            Ok(rows)
-        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
-    }
-
-    fn get_active_job(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_id: i32,
-        file_path: &str,
-    ) -> Result<Option<InsightGenerationJob>, DbError> {
-        trace_db_call(context, "query", "get_active_job", |_span| {
-            use schema::insight_generation_jobs::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock InsightGenerationJobDao");
-
-            dsl::insight_generation_jobs
-                .filter(
-                    dsl::library_id
-                        .eq(library_id)
-                        .and(dsl::file_path.eq(file_path))
-                        .and(dsl::status.eq(InsightJobStatus::Running.as_str())),
-                )
-                .order(dsl::id.desc())
-                .first::<InsightGenerationJob>(connection.deref_mut())
-                .optional()
-                .map_err(|e| anyhow::anyhow!("Failed to get active job: {}", e))
-        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
-    }
-
-    fn get_job_by_id(
-        &mut self,
-        context: &opentelemetry::Context,
-        job_id: i32,
-    ) -> Result<Option<InsightGenerationJob>, DbError> {
-        trace_db_call(context, "query", "get_job_by_id", |_span| {
-            use schema::insight_generation_jobs::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock InsightGenerationJobDao");
-
-            dsl::insight_generation_jobs
-                .filter(dsl::id.eq(job_id))
-                .first::<InsightGenerationJob>(connection.deref_mut())
-                .optional()
-                .map_err(|e| anyhow::anyhow!("Failed to get job: {}", e))
-        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
-    }
-
-    fn recover_orphaned_jobs(
-        &mut self,
-        context: &opentelemetry::Context,
-    ) -> Result<usize, DbError> {
-        trace_db_call(context, "update", "recover_orphaned_jobs", |_span| {
-            use schema::insight_generation_jobs::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock InsightGenerationJobDao");
-
-            let now = std::time::SystemTime::now()
-                .duration_since(std::time::UNIX_EPOCH)
-                .expect("Time went backwards")
-                .as_secs() as i64;
-
-            let rows = diesel::update(
-                dsl::insight_generation_jobs
-                    .filter(dsl::status.eq(InsightJobStatus::Running.as_str())),
-            )
-            .set((
-                dsl::status.eq(InsightJobStatus::Failed.as_str()),
-                dsl::completed_at.eq(Some(now)),
-                dsl::error_message.eq(Some("server crashed while running".to_string())),
-            ))
-            .execute(connection.deref_mut())
-            .map_err(|e| anyhow::anyhow!("Failed to recover orphaned jobs: {}", e))?;
-
-            Ok(rows)
-        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use diesel::Connection;
-    use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
-
-    const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
-
-    fn setup_dao() -> SqliteInsightGenerationJobDao {
-        let mut conn = SqliteConnection::establish(":memory:")
-            .expect("Unable to create in-memory db connection");
-        conn.run_pending_migrations(DB_MIGRATIONS)
-            .expect("Failure running DB migrations");
-        SqliteInsightGenerationJobDao::from_connection(Arc::new(Mutex::new(conn)))
-    }
-
-    fn ctx() -> opentelemetry::Context {
-        opentelemetry::Context::new()
-    }
-
-    #[test]
-    fn create_job_inserts_new_row() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let job_id_1 = dao
-            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
-            .unwrap();
-
-        let job_id_2 = dao
-            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
-            .unwrap();
-
-        assert_ne!(job_id_1, job_id_2, "each create_job call inserts a new row");
-    }
-
-    #[test]
-    fn complete_job_sets_result() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let job_id = dao
-            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
-            .unwrap();
-
-        dao.complete_job(&ctx, job_id, 42).unwrap();
-
-        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
-        assert_eq!(job.status, InsightJobStatus::Completed.as_str());
-        assert_eq!(job.result_insight_id, Some(42));
-        assert!(job.completed_at.is_some());
-    }
-
-    #[test]
-    fn fail_job_sets_error() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let job_id = dao
-            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Agentic)
-            .unwrap();
-
-        dao.fail_job(&ctx, job_id, "model timeout").unwrap();
-
-        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
-        assert_eq!(job.status, InsightJobStatus::Failed.as_str());
-        assert_eq!(job.error_message.as_deref(), Some("model timeout"));
-        assert!(job.completed_at.is_some());
-    }
-
-    #[test]
-    fn get_active_job_returns_none_when_completed() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let job_id = dao
-            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
-            .unwrap();
-
-        // Job is running
-        let active = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
-        assert!(active.is_some());
-        assert_eq!(active.unwrap().id, job_id);
-
-        // Complete it
-        dao.complete_job(&ctx, job_id, 1).unwrap();
-
-        // No longer active
-        let active = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
-        assert!(active.is_none());
-    }
-
-    #[test]
-    fn cancel_active_jobs() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let job_id = dao
-            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
-            .unwrap();
-
-        let cancelled = dao.cancel_active_jobs(&ctx, 1, "photos/test.jpg").unwrap();
-        assert_eq!(cancelled, 1, "should cancel 1 running job");
-
-        // Job is no longer active
-        let active = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
-        assert!(active.is_none());
-
-        // Job exists with cancelled status
-        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
-        assert_eq!(job.status, InsightJobStatus::Cancelled.as_str());
-
-        // Cancelling again returns 0 (nothing to cancel)
-        let cancelled2 = dao.cancel_active_jobs(&ctx, 1, "photos/test.jpg").unwrap();
-        assert_eq!(cancelled2, 0, "should return 0 when no running job");
-    }
-
-    #[test]
-    fn get_active_job_scoped_by_library() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let job_id_1 = dao
-            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
-            .unwrap();
-
-        let job_id_2 = dao
-            .create_job(&ctx, 2, "photos/test.jpg", InsightGenerationType::Standard)
-            .unwrap();
-
-        assert_ne!(
-            job_id_1, job_id_2,
-            "different libraries should have separate jobs"
-        );
-
-        // Complete lib1's job
-        dao.complete_job(&ctx, job_id_1, 1).unwrap();
-
-        // lib1 has no active job
-        let active1 = dao.get_active_job(&ctx, 1, "photos/test.jpg").unwrap();
-        assert!(active1.is_none());
-
-        // lib2 still has active job
-        let active2 = dao.get_active_job(&ctx, 2, "photos/test.jpg").unwrap();
-        assert!(active2.is_some());
-        assert_eq!(active2.unwrap().id, job_id_2);
-    }
-
-    #[test]
-    fn get_job_by_id_finds_any_status() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let job_id = dao
-            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
-            .unwrap();
-
-        // Find while running
-        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
-        assert_eq!(job.status, InsightJobStatus::Running.as_str());
-
-        // Complete it
-        dao.complete_job(&ctx, job_id, 99).unwrap();
-
-        // Still findable
-        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
-        assert_eq!(job.status, InsightJobStatus::Completed.as_str());
-        assert_eq!(job.result_insight_id, Some(99));
-    }
-
-    #[test]
-    fn recover_orphaned_jobs() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        // Create two running jobs
-        let job_id_1 = dao
-            .create_job(&ctx, 1, "photos/a.jpg", InsightGenerationType::Standard)
-            .unwrap();
-        let job_id_2 = dao
-            .create_job(&ctx, 1, "photos/b.jpg", InsightGenerationType::Agentic)
-            .unwrap();
-
-        // Complete one
-        dao.complete_job(&ctx, job_id_1, 1).unwrap();
-
-        // Recover should only affect the running job
-        let recovered = dao.recover_orphaned_jobs(&ctx).unwrap();
-        assert_eq!(recovered, 1, "should recover exactly 1 running job");
-
-        // job_id_1 is still completed
-        let job1 = dao.get_job_by_id(&ctx, job_id_1).unwrap().unwrap();
-        assert_eq!(job1.status, InsightJobStatus::Completed.as_str());
-
-        // job_id_2 is now failed with recovery message
-        let job2 = dao.get_job_by_id(&ctx, job_id_2).unwrap().unwrap();
-        assert_eq!(job2.status, InsightJobStatus::Failed.as_str());
-        assert_eq!(
-            job2.error_message.as_deref(),
-            Some("server crashed while running")
-        );
-
-        // Second recovery is a no-op
-        let recovered2 = dao.recover_orphaned_jobs(&ctx).unwrap();
-        assert_eq!(recovered2, 0, "no running jobs remain");
-    }
-
-    #[test]
-    fn complete_job_noop_when_cancelled() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let job_id = dao
-            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
-            .unwrap();
-
-        dao.cancel_job(&ctx, job_id).unwrap();
-
-        // Late-completing task tries to mark as completed — should be a no-op
-        dao.complete_job(&ctx, job_id, 42).unwrap();
-
-        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
-        assert_eq!(
-            job.status,
-            InsightJobStatus::Cancelled.as_str(),
-            "cancelled status must not be overwritten by late complete"
-        );
-        assert_eq!(
-            job.result_insight_id, None,
-            "insight_id must stay None when complete is a no-op"
-        );
-    }
-
-    #[test]
-    fn fail_job_noop_when_cancelled() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let job_id = dao
-            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Agentic)
-            .unwrap();
-
-        dao.cancel_job(&ctx, job_id).unwrap();
-
-        // Late-failing task tries to mark as failed — should be a no-op
-        dao.fail_job(&ctx, job_id, "timeout after 120s").unwrap();
-
-        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
-        assert_eq!(
-            job.status,
-            InsightJobStatus::Cancelled.as_str(),
-            "cancelled status must not be overwritten by late fail"
-        );
-        assert_eq!(
-            job.error_message.as_deref(),
-            Some("cancelled by user"),
-            "error_message must reflect the cancel, not the late fail"
-        );
-    }
-
-    #[test]
-    fn cancel_job_by_id() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let job_id = dao
-            .create_job(&ctx, 1, "photos/test.jpg", InsightGenerationType::Standard)
-            .unwrap();
-
-        let cancelled = dao.cancel_job(&ctx, job_id).unwrap();
-        assert!(cancelled, "should cancel running job");
-
-        let job = dao.get_job_by_id(&ctx, job_id).unwrap().unwrap();
-        assert_eq!(job.status, InsightJobStatus::Cancelled.as_str());
-        assert!(job.completed_at.is_some());
-
-        // Cancelling again is a no-op
-        let cancelled2 = dao.cancel_job(&ctx, job_id).unwrap();
-        assert!(!cancelled2, "already cancelled job should return false");
-    }
-}
@@ -47,6 +47,7 @@ pub trait InsightDao: Sync + Send {
        paths: &[String],
    ) -> Result<Option<PhotoInsight>, DbError>;

+    #[allow(dead_code)]
    fn get_insight_history(
        &mut self,
        context: &opentelemetry::Context,
@@ -81,17 +82,6 @@ pub trait InsightDao: Sync + Send {
        approved: bool,
    ) -> Result<(), DbError>;

-    /// Rate a specific insight version by primary key, regardless of
-    /// `is_current`. Used by the per-file history view to approve/reject
-    /// previously generated (superseded) versions, which the path-based
-    /// `rate_insight` (current row only) cannot reach.
-    fn rate_insight_by_id(
-        &mut self,
-        context: &opentelemetry::Context,
-        insight_id: i32,
-        approved: bool,
-    ) -> Result<(), DbError>;
-
    fn get_approved_insights(
        &mut self,
        context: &opentelemetry::Context,
@@ -100,15 +90,13 @@ pub trait InsightDao: Sync + Send {
    /// Replace the `training_messages` JSON blob on the current row for
    /// `(library_id, rel_path)`. Used by chat-turn append mode to persist
    /// the extended conversation without inserting a new insight version.
-    /// Returns the number of rows affected (0 if no current row matched,
-    /// indicating a concurrent regenerate/reconcile flipped `is_current`).
    fn update_training_messages(
        &mut self,
        context: &opentelemetry::Context,
        library_id: i32,
        file_path: &str,
        training_messages_json: &str,
-    ) -> Result<usize, DbError>;
+    ) -> Result<(), DbError>;
 }

 pub struct SqliteInsightDao {
@@ -171,13 +159,13 @@ impl InsightDao for SqliteInsightDao {
            )
            .set(is_current.eq(false))
            .execute(connection.deref_mut())
-            .map_err(|e| anyhow::anyhow!("Failed to flip is_current: {}", e))?;
+            .map_err(|_| anyhow::anyhow!("Update is_current error"))?;

            // Insert the new insight as current
            diesel::insert_into(photo_insights)
                .values(&insight)
                .execute(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Failed to insert insight: {}", e))?;
+                .map_err(|_| anyhow::anyhow!("Insert error"))?;

            // Retrieve the inserted record (is_current = true)
            photo_insights
@@ -185,12 +173,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(rel_path.eq(&insight.file_path))
                .filter(is_current.eq(true))
                .first::<PhotoInsight>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Failed to retrieve inserted insight: {}", e))
-        })
-        .map_err(|e| {
-            log::error!("store_insight failed: {}", e);
-            DbError::new(DbErrorKind::InsertError)
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn get_insight(
@@ -208,9 +193,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(is_current.eq(true))
                .first::<PhotoInsight>(connection.deref_mut())
                .optional()
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_current_insight_for_library(
@@ -234,10 +219,10 @@ impl InsightDao for SqliteInsightDao {
                    .filter(is_current.eq(true))
                    .first::<PhotoInsight>(connection.deref_mut())
                    .optional()
-                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                    .map_err(|_| anyhow::anyhow!("Query error"))
            },
        )
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_insight_for_paths(
@@ -259,9 +244,9 @@ impl InsightDao for SqliteInsightDao {
                .order(generated_at.desc())
                .first::<PhotoInsight>(connection.deref_mut())
                .optional()
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_insight_history(
@@ -278,9 +263,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(rel_path.eq(path))
                .order(generated_at.desc())
                .load::<PhotoInsight>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_insight_by_id(
@@ -297,9 +282,9 @@ impl InsightDao for SqliteInsightDao {
                .find(insight_id)
                .first::<PhotoInsight>(connection.deref_mut())
                .optional()
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn delete_insight(
@@ -315,9 +300,9 @@ impl InsightDao for SqliteInsightDao {
            diesel::delete(photo_insights.filter(rel_path.eq(path)))
                .execute(connection.deref_mut())
                .map(|_| ())
-                .map_err(|e| anyhow::anyhow!("Delete error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Delete error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_all_insights(
@@ -333,9 +318,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(is_current.eq(true))
                .order(generated_at.desc())
                .load::<PhotoInsight>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn rate_insight(
@@ -357,29 +342,9 @@ impl InsightDao for SqliteInsightDao {
            .set(approved.eq(Some(is_approved)))
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
+            .map_err(|_| anyhow::anyhow!("Update error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
-    }
-
-    fn rate_insight_by_id(
-        &mut self,
-        context: &opentelemetry::Context,
-        target_id: i32,
-        is_approved: bool,
-    ) -> Result<(), DbError> {
-        trace_db_call(context, "update", "rate_insight_by_id", |_span| {
-            use schema::photo_insights::dsl::*;
-
-            let mut connection = self.connection.lock().expect("Unable to get InsightDao");
-
-            diesel::update(photo_insights.find(target_id))
-                .set(approved.eq(Some(is_approved)))
-                .execute(connection.deref_mut())
-                .map(|_| ())
-                .map_err(|e| anyhow::anyhow!("Update error: {}", e))
-        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn get_approved_insights(
@@ -396,9 +361,9 @@ impl InsightDao for SqliteInsightDao {
                .filter(training_messages.is_not_null())
                .order(generated_at.desc())
                .load::<PhotoInsight>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn update_training_messages(
@@ -407,7 +372,7 @@ impl InsightDao for SqliteInsightDao {
        lib_id: i32,
        path: &str,
        training_messages_json: &str,
-    ) -> Result<usize, DbError> {
+    ) -> Result<(), DbError> {
        trace_db_call(context, "update", "update_training_messages", |_span| {
            use schema::photo_insights::dsl::*;

@@ -421,95 +386,9 @@ impl InsightDao for SqliteInsightDao {
            )
            .set(training_messages.eq(Some(training_messages_json.to_string())))
            .execute(connection.deref_mut())
-            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
+            .map(|_| ())
+            .map_err(|_| anyhow::anyhow!("Update error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::database::test::in_memory_db_connection;
-
-    fn dao() -> SqliteInsightDao {
-        let conn = Arc::new(Mutex::new(in_memory_db_connection()));
-        SqliteInsightDao::from_connection(conn)
-    }
-
-    /// Build an insight insert with sensible defaults; tests override the
-    /// fields they care about (path, generated_at, model).
-    fn insert(path: &str, generated_at: i64, model: &str) -> InsertPhotoInsight {
-        InsertPhotoInsight {
-            library_id: 1,
-            file_path: path.to_string(),
-            title: format!("title for {model}"),
-            summary: "summary".to_string(),
-            generated_at,
-            model_version: model.to_string(),
-            is_current: true,
-            training_messages: None,
-            backend: "local".to_string(),
-            fewshot_source_ids: None,
-            content_hash: None,
-            num_ctx: None,
-            temperature: None,
-            top_p: None,
-            top_k: None,
-            min_p: None,
-            system_prompt: None,
-            persona_id: None,
-            prompt_eval_count: None,
-            eval_count: None,
-        }
-    }
-
-    #[test]
-    fn get_insight_history_returns_all_versions_newest_first() {
-        let cx = opentelemetry::Context::new();
-        let mut dao = dao();
-
-        // store_insight flips prior rows to is_current=false, so three
-        // generations for the same path leave a 3-row history.
-        dao.store_insight(&cx, insert("a.jpg", 100, "m1")).unwrap();
-        dao.store_insight(&cx, insert("a.jpg", 200, "m2")).unwrap();
-        dao.store_insight(&cx, insert("a.jpg", 300, "m3")).unwrap();
-        // A different path must not leak into the history.
-        dao.store_insight(&cx, insert("b.jpg", 250, "other"))
-            .unwrap();
-
-        let history = dao.get_insight_history(&cx, "a.jpg").unwrap();
-        assert_eq!(history.len(), 3);
-        assert_eq!(
-            history.iter().map(|i| i.generated_at).collect::<Vec<_>>(),
-            vec![300, 200, 100],
-            "history should be newest-first"
-        );
-        // Exactly one version is current (the latest generation).
-        let current: Vec<_> = history.iter().filter(|i| i.is_current).collect();
-        assert_eq!(current.len(), 1);
-        assert_eq!(current[0].generated_at, 300);
-    }
-
-    #[test]
-    fn rate_insight_by_id_rates_only_the_targeted_version() {
-        let cx = opentelemetry::Context::new();
-        let mut dao = dao();
-
-        dao.store_insight(&cx, insert("a.jpg", 100, "m1")).unwrap();
-        dao.store_insight(&cx, insert("a.jpg", 200, "m2")).unwrap();
-
-        // History is newest-first: [200 (current), 100 (superseded)].
-        let history = dao.get_insight_history(&cx, "a.jpg").unwrap();
-        let old_version = history.iter().find(|i| i.generated_at == 100).unwrap();
-        assert!(!old_version.is_current);
-
-        dao.rate_insight_by_id(&cx, old_version.id, true).unwrap();
-
-        let history = dao.get_insight_history(&cx, "a.jpg").unwrap();
-        let old = history.iter().find(|i| i.generated_at == 100).unwrap();
-        let current = history.iter().find(|i| i.generated_at == 200).unwrap();
-        assert_eq!(old.approved, Some(true), "targeted version is rated");
-        assert_eq!(current.approved, None, "current version is untouched");
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }
 }
@@ -235,7 +235,6 @@ pub trait KnowledgeDao: Sync + Send {
    ///   - entity_type: optional, restricts nodes to one type
    ///   - node_limit: caps the number of nodes; lower-fact-count
    ///     entities drop first
-    ///
    /// Edges between dropped entities are pruned. Persona scoping
    /// affects fact_count + edge inclusion (rejected / superseded
    /// excluded; All vs Single mirrors the existing pattern).
@@ -582,7 +581,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
            }
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn get_entity_by_id(
@@ -599,7 +598,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_entity_by_name(
@@ -624,7 +623,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .load::<Entity>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_entities_with_embeddings(
@@ -649,7 +648,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .load::<Entity>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn list_entities(
@@ -706,7 +705,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {

            Ok((results, total))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn list_entities_with_fact_counts(
@@ -894,7 +893,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {

            Ok((pairs, total))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_predicate_stats(
@@ -938,10 +937,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            let mut conn = self.connection.lock().expect("KnowledgeDao lock");
            let mut q = sql_query(sql).into_boxed();
            match persona {
-                PersonaFilter::Single {
-                    user_id,
-                    persona_id,
-                } => {
+                PersonaFilter::Single { user_id, persona_id } => {
                    q = q
                        .bind::<Integer, _>(*user_id)
                        .bind::<Text, _>(persona_id.clone());
@@ -957,7 +953,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
            Ok(rows.into_iter().map(|r| (r.predicate, r.cnt)).collect())
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn bulk_reject_facts_by_predicate(
@@ -981,10 +977,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            // rows flip — REVIEWED survives so the curator can preserve
            // a hand-approved exception under the same predicate.
            let touched = match persona {
-                PersonaFilter::Single {
-                    user_id: uid,
-                    persona_id: pid,
-                } => diesel::update(
+                PersonaFilter::Single { user_id: uid, persona_id: pid } => diesel::update(
                    entity_facts
                        .filter(predicate.eq(target_predicate))
                        .filter(user_id.eq(*uid))
@@ -1016,7 +1009,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            };
            Ok(touched)
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn build_entity_graph(
@@ -1194,7 +1187,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {

            Ok(EntityGraph { nodes, edges })
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn find_consolidation_proposals(
@@ -1289,7 +1282,8 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                        Some(v) => v,
                        None => continue,
                    };
-                    for &ib in &indices[a + 1..] {
+                    for b in (a + 1)..indices.len() {
+                        let ib = indices[b];
                        let vb = match &decoded[ib] {
                            Some(v) => v,
                            None => continue,
@@ -1349,7 +1343,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            result.truncate(max_groups);
            Ok(result)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_persona_breakdowns_for_entities(
@@ -1411,7 +1405,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            }
            Ok(out)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn update_entity_status(
@@ -1429,7 +1423,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .map(|_| ())
                .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn update_entity(
@@ -1475,7 +1469,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn delete_entity(
@@ -1565,7 +1559,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            })
            .map_err(|e| anyhow::anyhow!("Merge transaction error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    // -----------------------------------------------------------------------
@@ -1636,7 +1630,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                Ok((inserted, true)) // true = newly created
            }
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn get_facts_for_entity(
@@ -1662,7 +1656,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            q.load::<EntityFact>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn list_facts(
@@ -1719,7 +1713,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {

            Ok((results, total))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn update_fact(
@@ -1801,7 +1795,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn update_facts_insight_id(
@@ -1823,7 +1817,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            .map(|_| ())
            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn delete_fact(&mut self, cx: &opentelemetry::Context, fact_id: i32) -> Result<(), DbError> {
@@ -2015,7 +2009,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
            .map(|_| ())
            .map_err(|e| anyhow::anyhow!("Insert error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn delete_photo_links_for_file(
@@ -2031,7 +2025,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .map(|_| ())
                .map_err(|e| anyhow::anyhow!("Delete error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_links_for_photo(
@@ -2047,7 +2041,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .load::<EntityPhotoLink>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_links_for_entity(
@@ -2063,7 +2057,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                .load::<EntityPhotoLink>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    // -----------------------------------------------------------------------
@@ -2111,7 +2105,7 @@ impl KnowledgeDao for SqliteKnowledgeDao {
                facts: recent_facts,
            })
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
 }

@@ -216,12 +216,11 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {

            // Validate embedding dimensions if provided (rare for location data)
            if let Some(ref emb) = location.embedding
-                && emb.len() != crate::ai::embedding_dim()
+                && emb.len() != 768
            {
                return Err(anyhow::anyhow!(
-                    "Invalid embedding dimensions: {} (expected {})",
-                    emb.len(),
-                    crate::ai::embedding_dim()
+                    "Invalid embedding dimensions: {} (expected 768)",
+                    emb.len()
                ));
            }

@@ -274,7 +273,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
                source_file: location.source_file,
            })
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn store_locations_batch(
@@ -293,7 +292,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
                for location in locations {
                    // Validate embedding if provided (rare)
                    if let Some(ref emb) = location.embedding
-                        && emb.len() != crate::ai::embedding_dim()
+                        && emb.len() != 768
                    {
                        log::warn!(
                            "Skipping location with invalid embedding dimensions: {}",
@@ -351,7 +350,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {

            Ok(inserted)
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn find_nearest_location(
@@ -386,7 +385,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {

            Ok(results.into_iter().next().map(|r| r.to_location_record()))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn find_locations_in_range(
@@ -414,7 +413,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {
            .map(|rows| rows.into_iter().map(|r| r.to_location_record()).collect())
            .map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn find_locations_near_point(
@@ -469,7 +468,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {

            Ok(filtered)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn location_exists(
@@ -503,7 +502,7 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {

            Ok(result.count > 0)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_location_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
@@ -526,6 +525,6 @@ impl LocationHistoryDao for SqliteLocationHistoryDao {

            Ok(result.count)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
 }
@@ -45,22 +45,18 @@ pub struct DuplicateRow {

 pub mod calendar_dao;
 pub mod daily_summary_dao;
-pub mod insight_generation_job_dao;
 pub mod insights_dao;
 pub mod knowledge_dao;
 pub mod location_dao;
 pub mod models;
 pub mod persona_dao;
-pub mod precomputed_reel_dao;
 pub mod preview_dao;
 pub mod reconcile;
 pub mod schema;
 pub mod search_dao;
-pub mod user_ai_prefs_dao;

 pub use calendar_dao::{CalendarEventDao, SqliteCalendarEventDao};
 pub use daily_summary_dao::{DailySummaryDao, InsertDailySummary, SqliteDailySummaryDao};
-pub use insight_generation_job_dao::{InsightGenerationJobDao, SqliteInsightGenerationJobDao};
 pub use insights_dao::{InsightDao, SqliteInsightDao};
 pub use knowledge_dao::{
    ConsolidationGroup, EntityFilter, EntityGraph, EntityPatch, EntitySort, FactFilter, FactPatch,
@@ -68,10 +64,8 @@ pub use knowledge_dao::{
 };
 pub use location_dao::{LocationHistoryDao, SqliteLocationHistoryDao};
 pub use persona_dao::{ImportPersona, PersonaDao, PersonaPatch, SqlitePersonaDao};
-pub use precomputed_reel_dao::{PrecomputedReelDao, SqlitePrecomputedReelDao};
 pub use preview_dao::{PreviewDao, SqlitePreviewDao};
 pub use search_dao::{SearchHistoryDao, SqliteSearchHistoryDao};
-pub use user_ai_prefs_dao::{SqliteUserAiPrefsDao, UserAiPrefsDao};

 pub trait UserDao {
    fn create_user(&mut self, user: &str, password: &str) -> Option<User>;
@@ -197,26 +191,14 @@ pub fn connect() -> SqliteConnection {
    conn
 }

+#[derive(Debug)]
 pub struct DbError {
    pub kind: DbErrorKind,
-    pub source: Option<String>,
 }

 impl DbError {
    fn new(kind: DbErrorKind) -> Self {
-        DbError { kind, source: None }
-    }
-
-    /// Capture the source error message AND log it. Callers should use
-    /// this from `map_err` closures so the underlying Diesel/SQLite
-    /// error survives the conversion to `DbError`.
-    fn log(kind: DbErrorKind, source: impl std::fmt::Display) -> Self {
-        let msg = source.to_string();
-        log::error!("DB {:?}: {}", kind, msg);
-        DbError {
-            kind,
-            source: Some(msg),
-        }
+        DbError { kind }
    }

    fn exists() -> Self {
@@ -224,26 +206,6 @@ impl DbError {
    }
 }

-impl std::fmt::Debug for DbError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match &self.source {
-            Some(s) => write!(f, "DbError {{ kind: {:?}, source: {} }}", self.kind, s),
-            None => write!(f, "DbError {{ kind: {:?} }}", self.kind),
-        }
-    }
-}
-
-impl std::fmt::Display for DbError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match &self.source {
-            Some(s) => write!(f, "{:?}: {}", self.kind, s),
-            None => write!(f, "{:?}", self.kind),
-        }
-    }
-}
-
-impl std::error::Error for DbError {}
-
 #[derive(Debug, PartialEq)]
 pub enum DbErrorKind {
    AlreadyExists,
@@ -298,7 +260,7 @@ impl FavoriteDao for SqliteFavoriteDao {
                    path: favorite_path,
                })
                .execute(connection.deref_mut())
-                .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+                .map_err(|_| DbError::new(DbErrorKind::InsertError))
        } else {
            Err(DbError::exists())
        }
@@ -319,7 +281,7 @@ impl FavoriteDao for SqliteFavoriteDao {
        favorites
            .filter(userid.eq(user_id))
            .load::<Favorite>(self.connection.lock().unwrap().deref_mut())
-            .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+            .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn update_path(&mut self, old_path: &str, new_path: &str) -> Result<(), DbError> {
@@ -328,7 +290,7 @@ impl FavoriteDao for SqliteFavoriteDao {
        diesel::update(favorites.filter(rel_path.eq(old_path)))
            .set(rel_path.eq(new_path))
            .execute(self.connection.lock().unwrap().deref_mut())
-            .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))?;
+            .map_err(|_| DbError::new(DbErrorKind::UpdateError))?;
        Ok(())
    }

@@ -339,7 +301,7 @@ impl FavoriteDao for SqliteFavoriteDao {
            .select(rel_path)
            .distinct()
            .load(self.connection.lock().unwrap().deref_mut())
-            .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+            .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
 }

@@ -452,27 +414,6 @@ pub trait ExifDao: Sync + Send {
        size_bytes: i64,
    ) -> Result<(), DbError>;

-    /// Every distinct non-NULL `content_hash` across all libraries. Used
-    /// by HLS orphan cleanup to identify hash dirs under `$VIDEO_PATH`
-    /// whose source video no longer exists. Cheap query (single column,
-    /// indexed) but unbounded in size — the result is a HashSet membership
-    /// check, so a 100k-photo library produces ~100k strings.
-    fn list_distinct_content_hashes(
-        &mut self,
-        context: &opentelemetry::Context,
-    ) -> Result<Vec<String>, DbError>;
-
-    /// Every row in `image_exif` for `library_id`, as
-    /// `(rel_path, content_hash)`. The hash is Option because rows
-    /// mid-backfill carry NULL. Used by HLS readiness stats; callers
-    /// filter by extension client-side because the DB schema doesn't
-    /// carry media type.
-    fn list_paths_and_hashes_for_library(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_id: i32,
-    ) -> Result<Vec<(String, Option<String>)>, DbError>;
-
    /// Return image_exif rows that need their `date_taken` resolved by the
    /// canonical-date waterfall (see `crate::date_resolver`): `date_taken
    /// IS NULL`. Returns `(library_id, rel_path)`. The caller filters to
@@ -508,61 +449,6 @@ pub trait ExifDao: Sync + Send {
        source: &str,
    ) -> Result<(), DbError>;

-    /// Find image_exif rows needing a CLIP embedding for semantic search:
-    /// `clip_embedding IS NULL AND content_hash IS NOT NULL`, ordered by id
-    /// ASC, limited. Hash-less rows wait for `backfill_unhashed_backlog` to
-    /// hash them first — embedding a row we can't key on bytes is wasted
-    /// work that the next library/move detection would invalidate. Backed
-    /// by the partial index `idx_image_exif_clip_backfill`.
-    ///
-    /// Returns `(rel_path, content_hash)` for the given library only. Video
-    /// rows are returned too (the underlying anti-join is shape-uniform);
-    /// the caller filters them out via `file_types::is_image_file` before
-    /// sending to Apollo, mirroring `face_watch::filter_excluded`.
-    ///
-    /// **Model upgrades** (re-encoding everything on a new
-    /// `APOLLO_CLIP_MODEL`) are handled out-of-band — run
-    /// `UPDATE image_exif SET clip_embedding = NULL
-    ///  WHERE clip_model_version != '<new model>';`
-    /// and the drain picks up the freshly-nulled rows on the next tick.
-    /// Mixing in-flight model versions in a single query is intentionally
-    /// not the drain's problem.
-    fn list_clip_unencoded_candidates(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_id: i32,
-        limit: i64,
-    ) -> Result<Vec<(String, String)>, DbError>;
-
-    /// Persist a CLIP embedding for an existing row. Touches
-    /// `clip_embedding` and `clip_model_version` only — leaves every
-    /// other column alone so the drain can't accidentally clobber EXIF /
-    /// hash / date-resolver state that other paths have written.
-    fn backfill_clip_embedding(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_id: i32,
-        rel_path: &str,
-        embedding: &[u8],
-        model_version: &str,
-    ) -> Result<(), DbError>;
-
-    /// Load every `(content_hash, clip_embedding)` pair from the live
-    /// image_exif rows for the given libraries, optionally filtered to a
-    /// single `model_version` (cosine sim across mixed geometries is
-    /// meaningless). Used by `/photos/search` to rerank against the query
-    /// embedding in-memory.
-    ///
-    /// Returns one pair per content_hash. If a hash appears under more
-    /// than one library, the first row wins (Diesel's natural ORDER BY id
-    /// ASC). Hash-less and embedding-less rows are filtered server-side.
-    fn list_clip_index(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_ids: &[i32],
-        model_version: Option<&str>,
-    ) -> Result<Vec<(String, Vec<u8>)>, DbError>;
-
    /// Operator-driven date_taken override (POST /image/exif/date). Snapshots
    /// the prior `(date_taken, date_taken_source)` into the `original_*`
    /// pair on first override, then writes the new value with
@@ -595,9 +481,9 @@ pub trait ExifDao: Sync + Send {
    /// whose calendar position matches the request's span:
    ///   - `"day"`   — same month + day-of-month (any year)
    ///   - `"week"`  — same week-of-year (SQLite `%W`, Monday-anchored —
-    ///     close to but not exactly ISO week 8601; the boundary cases
-    ///     at year-start/end can shift by ±1 vs the prior request-time
-    ///     `iso_week()` filter)
+    ///                 close to but not exactly ISO week 8601; the
+    ///                 boundary cases at year-start/end can shift by ±1
+    ///                 vs the prior request-time `iso_week()` filter)
    ///   - `"month"` — same month (any year)
    ///
    /// `tz_offset_minutes` is applied to both sides of the strftime
@@ -959,7 +845,7 @@ impl ExifDao for SqliteExifDao {
                .first::<ImageExif>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Post-insert lookup failed: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn get_exif(
@@ -986,7 +872,7 @@ impl ExifDao for SqliteExifDao {
                Err(_) => Err(anyhow::anyhow!("Query error")),
            }
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn update_exif(
@@ -1023,15 +909,15 @@ impl ExifDao for SqliteExifDao {
                last_modified.eq(&exif_data.last_modified),
            ))
            .execute(connection.deref_mut())
-            .map_err(|e| anyhow::anyhow!("Update error: {}", e))?;
+            .map_err(|_| anyhow::anyhow!("Update error"))?;

            image_exif
                .filter(library_id.eq(exif_data.library_id))
                .filter(rel_path.eq(&exif_data.file_path))
                .first::<ImageExif>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn delete_exif(&mut self, context: &opentelemetry::Context, path: &str) -> Result<(), DbError> {
@@ -1041,9 +927,9 @@ impl ExifDao for SqliteExifDao {
            diesel::delete(image_exif.filter(rel_path.eq(path)))
                .execute(self.connection.lock().unwrap().deref_mut())
                .map(|_| ())
-                .map_err(|e| anyhow::anyhow!("Delete error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Delete error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_all_with_date_taken(
@@ -1074,9 +960,9 @@ impl ExifDao for SqliteExifDao {
                        .filter_map(|(path, dt)| dt.map(|ts| (path, ts)))
                        .collect()
                })
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_exif_batch(
@@ -1100,9 +986,9 @@ impl ExifDao for SqliteExifDao {
            query
                .filter(rel_path.eq_any(file_paths))
                .load::<ImageExif>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn query_by_exif(
@@ -1161,9 +1047,9 @@ impl ExifDao for SqliteExifDao {

            query
                .load::<ImageExif>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_camera_makes(
@@ -1188,9 +1074,9 @@ impl ExifDao for SqliteExifDao {
                        .filter_map(|(make, cnt)| make.map(|m| (m, cnt)))
                        .collect()
                })
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn update_file_path(
@@ -1207,10 +1093,10 @@ impl ExifDao for SqliteExifDao {
            diesel::update(image_exif.filter(rel_path.eq(old_path)))
                .set(rel_path.eq(new_path))
                .execute(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Update error: {}", e))?;
+                .map_err(|_| anyhow::anyhow!("Update error"))?;
            Ok(())
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn get_all_file_paths(
@@ -1225,9 +1111,9 @@ impl ExifDao for SqliteExifDao {
            image_exif
                .select(rel_path)
                .load(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_all_with_gps(
@@ -1295,7 +1181,7 @@ impl ExifDao for SqliteExifDao {

            Ok(filtered)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_rows_missing_hash(
@@ -1314,9 +1200,9 @@ impl ExifDao for SqliteExifDao {
                .order(id.asc())
                .limit(limit)
                .load::<(i32, String)>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn backfill_content_hash(
@@ -1340,53 +1226,9 @@ impl ExifDao for SqliteExifDao {
            .set((content_hash.eq(hash), size_bytes.eq(size_val)))
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
+            .map_err(|_| anyhow::anyhow!("Update error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
-    }
-
-    fn list_distinct_content_hashes(
-        &mut self,
-        context: &opentelemetry::Context,
-    ) -> Result<Vec<String>, DbError> {
-        trace_db_call(context, "query", "list_distinct_content_hashes", |_span| {
-            use schema::image_exif::dsl::*;
-
-            let mut connection = self.connection.lock().expect("Unable to get ExifDao");
-
-            image_exif
-                .filter(content_hash.is_not_null())
-                .select(content_hash)
-                .distinct()
-                .load::<Option<String>>(connection.deref_mut())
-                .map(|rows| rows.into_iter().flatten().collect())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
-        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
-    }
-
-    fn list_paths_and_hashes_for_library(
-        &mut self,
-        context: &opentelemetry::Context,
-        lib_id: i32,
-    ) -> Result<Vec<(String, Option<String>)>, DbError> {
-        trace_db_call(
-            context,
-            "query",
-            "list_paths_and_hashes_for_library",
-            |_span| {
-                use schema::image_exif::dsl::*;
-
-                let mut connection = self.connection.lock().expect("Unable to get ExifDao");
-
-                image_exif
-                    .filter(library_id.eq(lib_id))
-                    .select((rel_path, content_hash))
-                    .load::<(String, Option<String>)>(connection.deref_mut())
-                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
-            },
-        )
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn get_rows_needing_date_backfill(
@@ -1413,10 +1255,10 @@ impl ExifDao for SqliteExifDao {
                    .order(id.asc())
                    .limit(limit)
                    .load::<(i32, String)>(connection.deref_mut())
-                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                    .map_err(|_| anyhow::anyhow!("Query error"))
            },
        )
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn backfill_date_taken(
@@ -1480,146 +1322,6 @@ impl ExifDao for SqliteExifDao {
        })
    }

-    fn list_clip_unencoded_candidates(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_id_val: i32,
-        limit: i64,
-    ) -> Result<Vec<(String, String)>, DbError> {
-        trace_db_call(
-            context,
-            "query",
-            "list_clip_unencoded_candidates",
-            |_span| {
-                use schema::image_exif::dsl::*;
-
-                let mut connection = self.connection.lock().expect("Unable to get ExifDao");
-
-                // Partial index `idx_image_exif_clip_backfill` covers the
-                // (clip_embedding IS NULL AND content_hash IS NOT NULL)
-                // filter; the planner hits it directly. ORDER BY id ASC
-                // keeps drain progress monotone across ticks.
-                image_exif
-                    .filter(library_id.eq(library_id_val))
-                    .filter(clip_embedding.is_null())
-                    .filter(content_hash.is_not_null())
-                    .select((rel_path, content_hash.assume_not_null()))
-                    .order(id.asc())
-                    .limit(limit)
-                    .load::<(String, String)>(connection.deref_mut())
-                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
-            },
-        )
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
-    }
-
-    fn backfill_clip_embedding(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_id_val: i32,
-        rel_path_val: &str,
-        embedding: &[u8],
-        model_version: &str,
-    ) -> Result<(), DbError> {
-        trace_db_call(context, "update", "backfill_clip_embedding", |_span| {
-            use schema::image_exif::dsl::*;
-
-            let mut connection = self.connection.lock().expect("Unable to get ExifDao");
-
-            let result = diesel::update(
-                image_exif
-                    .filter(library_id.eq(library_id_val))
-                    .filter(rel_path.eq(rel_path_val)),
-            )
-            .set((
-                clip_embedding.eq(embedding),
-                clip_model_version.eq(model_version),
-            ))
-            .execute(connection.deref_mut());
-
-            match result {
-                Ok(rows) => {
-                    if rows == 0 {
-                        // Same race as backfill_date_taken — row vanished
-                        // between the candidate query and this write. Not
-                        // a hard error; the drain re-scans next tick.
-                        log::debug!(
-                            "backfill_clip_embedding: 0 rows matched lib={} {} \
-                             (row likely retired by missing-file scan)",
-                            library_id_val,
-                            rel_path_val
-                        );
-                    }
-                    Ok(())
-                }
-                Err(e) => Err(anyhow::anyhow!(
-                    "diesel update failed (lib={}, rel_path={}, model={}): {}",
-                    library_id_val,
-                    rel_path_val,
-                    model_version,
-                    e
-                )),
-            }
-        })
-        .map_err(|e| {
-            log::warn!("backfill_clip_embedding: {}", e);
-            DbError::new(DbErrorKind::UpdateError)
-        })
-    }
-
-    fn list_clip_index(
-        &mut self,
-        context: &opentelemetry::Context,
-        library_ids_val: &[i32],
-        model_version_filter: Option<&str>,
-    ) -> Result<Vec<(String, Vec<u8>)>, DbError> {
-        trace_db_call(context, "query", "list_clip_index", |_span| {
-            use schema::image_exif::dsl::*;
-
-            let mut connection = self.connection.lock().expect("Unable to get ExifDao");
-
-            // Build the base filter. content_hash + clip_embedding both
-            // need to be present for the row to be searchable.
-            let mut query = image_exif
-                .filter(content_hash.is_not_null())
-                .filter(clip_embedding.is_not_null())
-                .into_boxed();
-            if !library_ids_val.is_empty() {
-                query = query.filter(library_id.eq_any(library_ids_val));
-            }
-            if let Some(mv) = model_version_filter {
-                query = query.filter(clip_model_version.eq(mv));
-            }
-
-            // Order by id ASC so cross-library duplicates pick the
-            // earliest-ingested row (stable across calls; the in-memory
-            // matrix gets a deterministic row order). Group-by on
-            // content_hash via post-filter — Diesel doesn't expose a
-            // clean DISTINCT ON in this query shape.
-            let rows: Vec<(String, Vec<u8>)> = query
-                .select((
-                    content_hash.assume_not_null(),
-                    clip_embedding.assume_not_null(),
-                ))
-                .order(id.asc())
-                .load::<(String, Vec<u8>)>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
-
-            // Dedupe by hash, keeping the first occurrence. Cheap; sized
-            // to ~14k entries on this library.
-            let mut seen: std::collections::HashSet<String> =
-                std::collections::HashSet::with_capacity(rows.len());
-            let mut out = Vec::with_capacity(rows.len());
-            for (h, e) in rows {
-                if seen.insert(h.clone()) {
-                    out.push((h, e));
-                }
-            }
-            Ok(out)
-        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
-    }
-
    fn set_manual_date_taken(
        &mut self,
        context: &opentelemetry::Context,
@@ -1777,7 +1479,7 @@ impl ExifDao for SqliteExifDao {
                })
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn find_by_content_hash(
@@ -1794,9 +1496,9 @@ impl ExifDao for SqliteExifDao {
                .filter(content_hash.eq(hash))
                .first::<ImageExif>(connection.deref_mut())
                .optional()
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_rel_paths_sharing_content(
@@ -1819,7 +1521,7 @@ impl ExifDao for SqliteExifDao {
                .select(content_hash)
                .first::<Option<String>>(connection.deref_mut())
                .optional()
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))?
+                .map_err(|_| anyhow::anyhow!("Query error"))?
                .flatten();

            let paths = match hash {
@@ -1828,13 +1530,13 @@ impl ExifDao for SqliteExifDao {
                    .select(rel_path)
                    .distinct()
                    .load::<String>(connection.deref_mut())
-                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))?,
+                    .map_err(|_| anyhow::anyhow!("Query error"))?,
                None => vec![rel_path_val.to_string()],
            };

            Ok(paths)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_rel_paths_for_library(
@@ -1851,9 +1553,9 @@ impl ExifDao for SqliteExifDao {
                .filter(library_id.eq(library_id_val))
                .select(rel_path)
                .load::<String>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn find_content_hash_anywhere(
@@ -1873,9 +1575,9 @@ impl ExifDao for SqliteExifDao {
                .first::<Option<String>>(connection.deref_mut())
                .optional()
                .map(|opt| opt.flatten())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_rel_paths_by_hash(
@@ -1893,9 +1595,9 @@ impl ExifDao for SqliteExifDao {
                .select(rel_path)
                .distinct()
                .load::<String>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_rel_paths_for_hashes(
@@ -1922,14 +1624,14 @@ impl ExifDao for SqliteExifDao {
                    .select((content_hash.assume_not_null(), rel_path))
                    .distinct()
                    .load::<(String, String)>(connection.deref_mut())
-                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
+                    .map_err(|_| anyhow::anyhow!("Query error"))?;
                for (hash, path) in rows {
                    out.entry(hash).or_default().push(path);
                }
            }
            Ok(out)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn list_rel_paths_for_libraries(
@@ -1995,9 +1697,9 @@ impl ExifDao for SqliteExifDao {

            query
                .load::<(i32, String)>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn delete_exif_by_library(
@@ -2016,9 +1718,9 @@ impl ExifDao for SqliteExifDao {
            )
            .execute(self.connection.lock().unwrap().deref_mut())
            .map(|_| ())
-            .map_err(|e| anyhow::anyhow!("Delete error: {}", e))
+            .map_err(|_| anyhow::anyhow!("Delete error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn count_for_library(
@@ -2033,9 +1735,9 @@ impl ExifDao for SqliteExifDao {
                .filter(library_id.eq(library_id_val))
                .count()
                .get_result::<i64>(self.connection.lock().unwrap().deref_mut())
-                .map_err(|e| anyhow::anyhow!("Count error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Count error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn list_rel_paths_for_library_page(
@@ -2059,10 +1761,10 @@ impl ExifDao for SqliteExifDao {
                    .limit(limit)
                    .offset(offset)
                    .load::<(i32, String)>(self.connection.lock().unwrap().deref_mut())
-                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                    .map_err(|_| anyhow::anyhow!("Query error"))
            },
        )
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_rows_missing_perceptual_hash(
@@ -2107,10 +1809,10 @@ impl ExifDao for SqliteExifDao {
                    .order(id.asc())
                    .limit(limit)
                    .load::<(i32, String)>(connection.deref_mut())
-                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                    .map_err(|_| anyhow::anyhow!("Query error"))
            },
        )
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn backfill_perceptual_hash(
@@ -2134,12 +1836,11 @@ impl ExifDao for SqliteExifDao {
            .set((phash_64.eq(phash_val), dhash_64.eq(dhash_val)))
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
+            .map_err(|_| anyhow::anyhow!("Update error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

-    #[allow(clippy::type_complexity)]
    fn list_duplicates_exact(
        &mut self,
        context: &opentelemetry::Context,
@@ -2166,7 +1867,7 @@ impl ExifDao for SqliteExifDao {
                    q = q.filter(library_id.eq(lib));
                }
                q.load::<String>(connection.deref_mut())
-                    .map_err(|e| anyhow::anyhow!("Query error: {}", e))?
+                    .map_err(|_| anyhow::anyhow!("Query error"))?
            };

            if dup_hashes.is_empty() {
@@ -2213,7 +1914,7 @@ impl ExifDao for SqliteExifDao {
                Option<i64>,
            )> = q
                .load(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
+                .map_err(|_| anyhow::anyhow!("Query error"))?;

            Ok(rows
                .into_iter()
@@ -2232,10 +1933,9 @@ impl ExifDao for SqliteExifDao {
                })
                .collect())
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

-    #[allow(clippy::type_complexity)]
    fn list_perceptual_candidates(
        &mut self,
        context: &opentelemetry::Context,
@@ -2295,7 +1995,7 @@ impl ExifDao for SqliteExifDao {
                Option<i64>,
            )> = q
                .load(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))?;
+                .map_err(|_| anyhow::anyhow!("Query error"))?;

            // Dedup keyed on content_hash, keeping the first occurrence
            // (deterministic by the SQL ORDER BY: lowest library_id,
@@ -2321,7 +2021,7 @@ impl ExifDao for SqliteExifDao {
            }
            Ok(out)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn list_image_paths(
@@ -2346,9 +2046,9 @@ impl ExifDao for SqliteExifDao {
                q = q.filter(duplicate_of_hash.is_null());
            }
            q.load::<(i32, String)>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn lookup_duplicate_row(
@@ -2408,9 +2108,9 @@ impl ExifDao for SqliteExifDao {
                        duplicate_decided_at: r.10,
                    })
                })
-                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
+                .map_err(|_| anyhow::anyhow!("Query error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn set_duplicate_of(
@@ -2437,9 +2137,9 @@ impl ExifDao for SqliteExifDao {
            ))
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
+            .map_err(|_| anyhow::anyhow!("Update error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn clear_duplicate_of(
@@ -2464,9 +2164,9 @@ impl ExifDao for SqliteExifDao {
            ))
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|e| anyhow::anyhow!("Update error: {}", e))
+            .map_err(|_| anyhow::anyhow!("Update error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn union_perceptual_tags(
@@ -2504,9 +2204,9 @@ impl ExifDao for SqliteExifDao {
            .bind::<diesel::sql_types::Text, _>(survivor_hash)
            .execute(connection.deref_mut())
            .map(|_| ())
-            .map_err(|e| anyhow::anyhow!("Tag union error: {}", e))
+            .map_err(|_| anyhow::anyhow!("Tag union error"))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }
 }

@@ -1,76 +1,9 @@
 use crate::database::schema::{
-    entities, entity_facts, entity_photo_links, favorites, image_exif, insight_generation_jobs,
-    libraries, personas, photo_insights, precomputed_reels, user_ai_prefs, users,
-    video_preview_clips,
+    entities, entity_facts, entity_photo_links, favorites, image_exif, libraries, personas,
+    photo_insights, users, video_preview_clips,
 };
 use serde::Serialize;

-/// Possible statuses for an insight generation job.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, FromSqlRow)]
-#[serde(rename_all = "snake_case")]
-pub enum InsightJobStatus {
-    Running,
-    Completed,
-    Failed,
-    Cancelled,
-}
-
-impl InsightJobStatus {
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            Self::Running => "running",
-            Self::Completed => "completed",
-            Self::Failed => "failed",
-            Self::Cancelled => "cancelled",
-        }
-    }
-
-    pub fn parse(s: &str) -> Self {
-        match s {
-            "running" => Self::Running,
-            "completed" => Self::Completed,
-            "failed" => Self::Failed,
-            "cancelled" => Self::Cancelled,
-            other => {
-                log::warn!(
-                    "Unknown InsightJobStatus value: {:?}, treating as failed",
-                    other
-                );
-                Self::Failed
-            }
-        }
-    }
-}
-
-impl std::fmt::Display for InsightJobStatus {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.as_str())
-    }
-}
-
-/// Type of insight generation (standard vs agentic).
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum InsightGenerationType {
-    Standard,
-    Agentic,
-}
-
-impl InsightGenerationType {
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            Self::Standard => "standard",
-            Self::Agentic => "agentic",
-        }
-    }
-}
-
-impl std::fmt::Display for InsightGenerationType {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.as_str())
-    }
-}
-
 #[derive(Insertable)]
 #[diesel(table_name = users)]
 pub struct InsertUser<'a> {
@@ -181,15 +114,6 @@ pub struct ImageExif {
    /// Snapshot of the prior `date_taken_source` taken on first manual
    /// override. NULL when no override is active.
    pub original_date_taken_source: Option<String>,
-    /// L2-normalized CLIP image embedding (raw little-endian float32 bytes;
-    /// length depends on the model — 768×4 for ViT-L/14, 512×4 for ViT-B/32).
-    /// NULL until Apollo's CLIP service has encoded this photo via the
-    /// backfill drain. Used by `/photos/search` for semantic queries.
-    pub clip_embedding: Option<Vec<u8>>,
-    /// Which CLIP model produced `clip_embedding` (e.g. `"ViT-L/14"`). A
-    /// swap of `APOLLO_CLIP_MODEL` re-eligibilizes rows whose stored
-    /// version differs so the drain rebuilds them.
-    pub clip_model_version: Option<String>,
 }

 #[derive(Insertable)]
@@ -219,15 +143,6 @@ pub struct InsertPhotoInsight {
    /// inserted before the hash is available stay null and the
    /// reconciliation pass backfills them.
    pub content_hash: Option<String>,
-    pub num_ctx: Option<i32>,
-    pub temperature: Option<f32>,
-    pub top_p: Option<f32>,
-    pub top_k: Option<i32>,
-    pub min_p: Option<f32>,
-    pub system_prompt: Option<String>,
-    pub persona_id: Option<String>,
-    pub prompt_eval_count: Option<i32>,
-    pub eval_count: Option<i32>,
 }

 #[derive(Serialize, Queryable, Clone, Debug)]
@@ -247,15 +162,6 @@ pub struct PhotoInsight {
    pub backend: String,
    pub fewshot_source_ids: Option<String>,
    pub content_hash: Option<String>,
-    pub num_ctx: Option<i32>,
-    pub temperature: Option<f32>,
-    pub top_p: Option<f32>,
-    pub top_k: Option<i32>,
-    pub min_p: Option<f32>,
-    pub system_prompt: Option<String>,
-    pub persona_id: Option<String>,
-    pub prompt_eval_count: Option<i32>,
-    pub eval_count: Option<i32>,
 }

 // --- Libraries ---
@@ -479,83 +385,3 @@ pub struct VideoPreviewClip {
    pub created_at: String,
    pub updated_at: String,
 }
-
-#[derive(Insertable)]
-#[diesel(table_name = insight_generation_jobs)]
-pub struct InsertInsightGenerationJob {
-    pub library_id: i32,
-    #[diesel(column_name = file_path)]
-    pub path: String,
-    #[diesel(column_name = generation_type)]
-    pub gen_type: String,
-    pub status: String,
-    pub started_at: i64,
-}
-
-#[derive(Queryable, Serialize, Clone, Debug)]
-pub struct InsightGenerationJob {
-    pub id: i32,
-    pub library_id: i32,
-    #[diesel(column_name = file_path)]
-    pub path: String,
-    #[diesel(column_name = generation_type)]
-    pub gen_type: String,
-    pub status: String,
-    pub started_at: i64,
-    pub completed_at: Option<i64>,
-    pub result_insight_id: Option<i32>,
-    pub error_message: Option<String>,
-}
-
-// --- Precomputed reels -------------------------------------------------------
-
-#[derive(Insertable)]
-#[diesel(table_name = precomputed_reels)]
-pub struct InsertablePrecomputedReel {
-    pub span: String,
-    pub library_key: String,
-    pub cache_key: String,
-    pub output_path: String,
-    pub title: String,
-    pub media_count: i32,
-    pub render_version: i32,
-    pub tz_offset_minutes: i32,
-    pub voice: Option<String>,
-    pub generated_at: i64,
-}
-
-#[derive(Serialize, Queryable, Clone, Debug)]
-pub struct PrecomputedReel {
-    pub id: i32,
-    pub span: String,
-    pub library_key: String,
-    pub cache_key: String,
-    pub output_path: String,
-    pub title: String,
-    pub media_count: i32,
-    pub render_version: i32,
-    pub tz_offset_minutes: i32,
-    pub voice: Option<String>,
-    pub generated_at: i64,
-}
-
-// --- User AI preferences (Section E) ----------------------------------------
-
-#[derive(Queryable, Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)]
-#[diesel(table_name = user_ai_prefs)]
-pub struct UserAiPrefs {
-    pub id: i32,
-    pub voice: Option<String>,
-    pub tz_offset_minutes: Option<i32>,
-    pub library: Option<String>,
-    pub updated_at: i64,
-}
-
-#[derive(Insertable, Debug, Clone, serde::Deserialize, serde::Serialize)]
-#[diesel(table_name = user_ai_prefs)]
-pub struct UpsertUserAiPrefs {
-    pub voice: Option<String>,
-    pub tz_offset_minutes: Option<i32>,
-    pub library: Option<String>,
-    pub updated_at: i64,
-}
@@ -119,7 +119,7 @@ impl PersonaDao for SqlitePersonaDao {
                .load::<Persona>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_persona(
@@ -138,7 +138,7 @@ impl PersonaDao for SqlitePersonaDao {
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn create_persona(
@@ -178,7 +178,7 @@ impl PersonaDao for SqlitePersonaDao {
                .first::<Persona>(conn.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn update_persona(
@@ -241,7 +241,7 @@ impl PersonaDao for SqlitePersonaDao {
                .optional()
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn delete_persona(
@@ -258,7 +258,7 @@ impl PersonaDao for SqlitePersonaDao {
                .map_err(|e| anyhow::anyhow!("Delete error: {}", e))?;
            Ok(n > 0)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn bulk_import(
@@ -294,7 +294,7 @@ impl PersonaDao for SqlitePersonaDao {
            }
            Ok(inserted)
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }
 }

@@ -1,439 +0,0 @@
-use diesel::prelude::*;
-use diesel::sqlite::SqliteConnection;
-use std::ops::DerefMut;
-use std::sync::{Arc, Mutex};
-
-use crate::database::models::{InsertablePrecomputedReel, PrecomputedReel};
-use crate::database::schema;
-use crate::database::{DbError, DbErrorKind, connect};
-use crate::otel::trace_db_call;
-
-/// Ledger for precomputed memory reels. The nightly agentic job writes a
-/// row after each successful render; the `GET /reels/precomputed` handler
-/// reads it to gate on freshness and serve the cached MP4.
-pub trait PrecomputedReelDao: Sync + Send {
-    /// Insert a precomputed reel row. Returns the new row's id.
-    /// Written by the nightly agentic job (Section D).
-    #[allow(dead_code)]
-    fn record_reel(
-        &mut self,
-        context: &opentelemetry::Context,
-        row: &InsertablePrecomputedReel,
-    ) -> Result<i32, DbError>;
-
-    /// Find the latest precomputed reel for the given (span, library_key).
-    fn latest_for(
-        &mut self,
-        context: &opentelemetry::Context,
-        span: &str,
-        library_key: &str,
-    ) -> Result<Option<PrecomputedReel>, DbError>;
-
-    /// Return true when a fresh precomputed reel exists for the given
-    /// (span, library_key, render_version) that was generated at or after
-    /// `min_generated_at`. Used as a fast existence gate before falling
-    /// back to `latest_for` (avoids a second query path).
-    fn exists_fresh(
-        &mut self,
-        context: &opentelemetry::Context,
-        span: &str,
-        library_key: &str,
-        render_version: i32,
-        min_generated_at: i64,
-    ) -> Result<bool, DbError>;
-
-    /// Delete all but the newest `keep` rows for (span, library_key), returning
-    /// the deleted rows so the caller can unlink their output files. Used by the
-    /// nightly job to retire superseded reels (e.g. yesterday's daily).
-    #[allow(dead_code)]
-    fn prune_superseded(
-        &mut self,
-        context: &opentelemetry::Context,
-        span: &str,
-        library_key: &str,
-        keep: usize,
-    ) -> Result<Vec<PrecomputedReel>, DbError>;
-
-    /// Every cache_key currently in the ledger. Used by the on-disk cache sweep
-    /// to protect files a ledger row still points at.
-    #[allow(dead_code)]
-    fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result<Vec<String>, DbError>;
-}
-
-pub struct SqlitePrecomputedReelDao {
-    connection: Arc<Mutex<SqliteConnection>>,
-}
-
-impl Default for SqlitePrecomputedReelDao {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl SqlitePrecomputedReelDao {
-    pub fn new() -> Self {
-        Self {
-            connection: Arc::new(Mutex::new(connect())),
-        }
-    }
-
-    #[cfg(test)]
-    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
-        Self { connection: conn }
-    }
-}
-
-impl PrecomputedReelDao for SqlitePrecomputedReelDao {
-    fn record_reel(
-        &mut self,
-        context: &opentelemetry::Context,
-        row: &InsertablePrecomputedReel,
-    ) -> Result<i32, DbError> {
-        trace_db_call(context, "insert", "record_reel", |_span| {
-            use schema::precomputed_reels::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock PrecomputedReelDao");
-
-            diesel::insert_into(dsl::precomputed_reels)
-                .values(row)
-                .execute(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Failed to insert reel: {}", e))?;
-
-            dsl::precomputed_reels
-                .order(dsl::id.desc())
-                .select(dsl::id)
-                .first::<i32>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Failed to get reel id: {}", e))
-        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
-    }
-
-    fn latest_for(
-        &mut self,
-        context: &opentelemetry::Context,
-        span: &str,
-        library_key: &str,
-    ) -> Result<Option<PrecomputedReel>, DbError> {
-        trace_db_call(context, "query", "latest_for", |_span| {
-            use schema::precomputed_reels::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock PrecomputedReelDao");
-
-            dsl::precomputed_reels
-                .filter(dsl::span.eq(span))
-                .filter(dsl::library_key.eq(library_key))
-                .order(dsl::generated_at.desc())
-                .first::<PrecomputedReel>(connection.deref_mut())
-                .optional()
-                .map_err(|e| anyhow::anyhow!("Failed to get latest reel: {}", e))
-        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
-    }
-
-    fn exists_fresh(
-        &mut self,
-        context: &opentelemetry::Context,
-        span: &str,
-        library_key: &str,
-        render_version: i32,
-        min_generated_at: i64,
-    ) -> Result<bool, DbError> {
-        trace_db_call(context, "query", "exists_fresh", |_span| {
-            use schema::precomputed_reels::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock PrecomputedReelDao");
-
-            let count: i64 = dsl::precomputed_reels
-                .filter(dsl::span.eq(span))
-                .filter(dsl::library_key.eq(library_key))
-                .filter(dsl::render_version.eq(render_version))
-                .filter(dsl::generated_at.ge(min_generated_at))
-                .count()
-                .get_result(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Failed to check fresh reel: {}", e))?;
-
-            Ok(count > 0)
-        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
-    }
-
-    fn prune_superseded(
-        &mut self,
-        context: &opentelemetry::Context,
-        span: &str,
-        library_key: &str,
-        keep: usize,
-    ) -> Result<Vec<PrecomputedReel>, DbError> {
-        trace_db_call(context, "delete", "prune_superseded", |_span| {
-            use schema::precomputed_reels::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock PrecomputedReelDao");
-
-            // Newest first; everything past `keep` is superseded. The table
-            // holds at most a handful of rows per (span, library), so loading
-            // and slicing in Rust is cheaper than a correlated subquery.
-            let mut rows: Vec<PrecomputedReel> = dsl::precomputed_reels
-                .filter(dsl::span.eq(span))
-                .filter(dsl::library_key.eq(library_key))
-                .order(dsl::generated_at.desc())
-                .load::<PrecomputedReel>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Failed to load reels for prune: {}", e))?;
-
-            let stale = rows.split_off(rows.len().min(keep));
-            if !stale.is_empty() {
-                let ids: Vec<i32> = stale.iter().map(|r| r.id).collect();
-                diesel::delete(dsl::precomputed_reels.filter(dsl::id.eq_any(ids)))
-                    .execute(connection.deref_mut())
-                    .map_err(|e| anyhow::anyhow!("Failed to delete superseded reels: {}", e))?;
-            }
-            Ok(stale)
-        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
-    }
-
-    fn all_cache_keys(&mut self, context: &opentelemetry::Context) -> Result<Vec<String>, DbError> {
-        trace_db_call(context, "query", "all_cache_keys", |_span| {
-            use schema::precomputed_reels::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock PrecomputedReelDao");
-
-            dsl::precomputed_reels
-                .select(dsl::cache_key)
-                .load::<String>(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Failed to load cache keys: {}", e))
-        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use diesel::Connection;
-    use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
-
-    const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
-
-    fn setup_dao() -> SqlitePrecomputedReelDao {
-        let mut conn = SqliteConnection::establish(":memory:")
-            .expect("Unable to create in-memory db connection");
-        conn.run_pending_migrations(DB_MIGRATIONS)
-            .expect("Failure running DB migrations");
-        SqlitePrecomputedReelDao::from_connection(Arc::new(Mutex::new(conn)))
-    }
-
-    fn ctx() -> opentelemetry::Context {
-        opentelemetry::Context::new()
-    }
-
-    fn sample_row() -> InsertablePrecomputedReel {
-        InsertablePrecomputedReel {
-            span: "day".to_string(),
-            library_key: "1".to_string(),
-            cache_key: "abc123".to_string(),
-            output_path: "/tmp/reel.mp4".to_string(),
-            title: "Test Reel".to_string(),
-            media_count: 10,
-            render_version: 1,
-            tz_offset_minutes: 0,
-            voice: Some("default".to_string()),
-            generated_at: 1_000_000,
-        }
-    }
-
-    #[test]
-    fn record_reel_inserts_and_returns_id() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-        let row = sample_row();
-
-        let id = dao.record_reel(&ctx, &row).unwrap();
-        assert!(id > 0, "should return a positive id");
-    }
-
-    #[test]
-    fn record_reel_returns_increasing_ids() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-        let row = sample_row();
-
-        let id1 = dao.record_reel(&ctx, &row).unwrap();
-        let id2 = dao.record_reel(&ctx, &row).unwrap();
-        assert!(id2 > id1, "each insert should get a higher id");
-    }
-
-    #[test]
-    fn latest_for_returns_latest() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let row1 = InsertablePrecomputedReel {
-            generated_at: 1_000_000,
-            ..sample_row()
-        };
-        let row2 = InsertablePrecomputedReel {
-            generated_at: 2_000_000,
-            ..sample_row()
-        };
-
-        dao.record_reel(&ctx, &row1).unwrap();
-        dao.record_reel(&ctx, &row2).unwrap();
-
-        let latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap();
-        assert_eq!(latest.generated_at, 2_000_000);
-    }
-
-    #[test]
-    fn latest_for_scoped_by_span_and_library() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let day_row = InsertablePrecomputedReel {
-            span: "day".to_string(),
-            library_key: "1".to_string(),
-            generated_at: 1_000_000,
-            ..sample_row()
-        };
-        let week_row = InsertablePrecomputedReel {
-            span: "week".to_string(),
-            library_key: "1".to_string(),
-            generated_at: 2_000_000,
-            ..sample_row()
-        };
-
-        dao.record_reel(&ctx, &day_row).unwrap();
-        dao.record_reel(&ctx, &week_row).unwrap();
-
-        let day_latest = dao.latest_for(&ctx, "day", "1").unwrap().unwrap();
-        assert_eq!(day_latest.span, "day");
-
-        let week_latest = dao.latest_for(&ctx, "week", "1").unwrap().unwrap();
-        assert_eq!(week_latest.span, "week");
-
-        // Different library returns None
-        let missing = dao.latest_for(&ctx, "day", "99").unwrap();
-        assert!(missing.is_none());
-    }
-
-    #[test]
-    fn latest_for_returns_none_when_no_rows() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let result = dao.latest_for(&ctx, "day", "1").unwrap();
-        assert!(result.is_none());
-    }
-
-    #[test]
-    fn exists_fresh_returns_true_when_present() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        dao.record_reel(&ctx, &sample_row()).unwrap();
-
-        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap();
-        assert!(exists, "should find the row we just inserted");
-    }
-
-    #[test]
-    fn exists_fresh_returns_false_when_missing() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap();
-        assert!(!exists, "should not find anything in empty table");
-    }
-
-    #[test]
-    fn exists_fresh_respects_min_generated_at() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        dao.record_reel(&ctx, &sample_row()).unwrap();
-
-        // Below the threshold — should exist
-        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 500_000).unwrap();
-        assert!(exists);
-
-        // Above the threshold — should not exist
-        let exists = dao.exists_fresh(&ctx, "day", "1", 1, 2_000_000).unwrap();
-        assert!(!exists);
-    }
-
-    #[test]
-    fn exists_fresh_respects_render_version() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-
-        let row_v1 = InsertablePrecomputedReel {
-            render_version: 1,
-            ..sample_row()
-        };
-        dao.record_reel(&ctx, &row_v1).unwrap();
-
-        assert!(dao.exists_fresh(&ctx, "day", "1", 1, 900_000).unwrap());
-        assert!(!dao.exists_fresh(&ctx, "day", "1", 2, 900_000).unwrap());
-    }
-
-    #[test]
-    fn prune_superseded_keeps_newest_and_returns_deleted() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-        // Three day/lib1 reels at increasing timestamps, plus an unrelated one.
-        for (i, key) in ["k1", "k2", "k3"].iter().enumerate() {
-            dao.record_reel(
-                &ctx,
-                &InsertablePrecomputedReel {
-                    cache_key: key.to_string(),
-                    generated_at: 1_000_000 + i as i64 * 1000,
-                    ..sample_row()
-                },
-            )
-            .unwrap();
-        }
-        let other = InsertablePrecomputedReel {
-            library_key: "2".to_string(),
-            cache_key: "other".to_string(),
-            ..sample_row()
-        };
-        dao.record_reel(&ctx, &other).unwrap();
-
-        // Keep the newest 2 of (day, "1"); k1 (oldest) is superseded.
-        let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap();
-        assert_eq!(deleted.len(), 1);
-        assert_eq!(deleted[0].cache_key, "k1");
-
-        // The newest 2 survive; the other-library row is untouched.
-        let keys = dao.all_cache_keys(&ctx).unwrap();
-        assert_eq!(keys.len(), 3);
-        assert!(keys.contains(&"k2".to_string()));
-        assert!(keys.contains(&"k3".to_string()));
-        assert!(keys.contains(&"other".to_string()));
-        assert!(!keys.contains(&"k1".to_string()));
-    }
-
-    #[test]
-    fn prune_superseded_noop_when_within_keep() {
-        let mut dao = setup_dao();
-        let ctx = ctx();
-        dao.record_reel(&ctx, &sample_row()).unwrap();
-        let deleted = dao.prune_superseded(&ctx, "day", "1", 2).unwrap();
-        assert!(deleted.is_empty());
-        assert_eq!(dao.all_cache_keys(&ctx).unwrap().len(), 1);
-    }
-}
@@ -96,7 +96,7 @@ impl PreviewDao for SqlitePreviewDao {
                .map(|_| ())
                .map_err(|e| anyhow::anyhow!("Insert error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn update_status(
@@ -126,7 +126,7 @@ impl PreviewDao for SqlitePreviewDao {
                .map(|_| ())
                .map_err(|e| anyhow::anyhow!("Update error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::UpdateError, e))
+        .map_err(|_| DbError::new(DbErrorKind::UpdateError))
    }

    fn get_preview(
@@ -148,7 +148,7 @@ impl PreviewDao for SqlitePreviewDao {
                Err(e) => Err(anyhow::anyhow!("Query error: {}", e)),
            }
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_previews_batch(
@@ -170,7 +170,7 @@ impl PreviewDao for SqlitePreviewDao {
                .load::<VideoPreviewClip>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_by_status(
@@ -188,7 +188,7 @@ impl PreviewDao for SqlitePreviewDao {
                .load::<VideoPreviewClip>(connection.deref_mut())
                .map_err(|e| anyhow::anyhow!("Query error: {}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
 }

@@ -57,28 +57,30 @@ impl ReconcileStats {
 /// watcher tick. Errors are logged but never propagated; reconciliation
 /// is best-effort and a transient DB hiccup must not stall the watcher.
 pub fn run(conn: &mut SqliteConnection) -> ReconcileStats {
-    let stats = ReconcileStats {
-        tagged_photo_hashes_filled: match backfill_tagged_photo_hashes(conn) {
-            Ok(n) => n,
-            Err(e) => {
-                warn!("reconcile: tagged_photo hash backfill failed: {:?}", e);
-                0
-            }
-        },
-        photo_insights_hashes_filled: match backfill_photo_insights_hashes(conn) {
-            Ok(n) => n,
-            Err(e) => {
-                warn!("reconcile: photo_insights hash backfill failed: {:?}", e);
-                0
-            }
-        },
-        photo_insights_demoted: match collapse_insight_currents(conn) {
-            Ok(n) => n,
-            Err(e) => {
-                warn!("reconcile: photo_insights scalar merge failed: {:?}", e);
-                0
-            }
-        },
+    let mut stats = ReconcileStats::default();
+
+    stats.tagged_photo_hashes_filled = match backfill_tagged_photo_hashes(conn) {
+        Ok(n) => n,
+        Err(e) => {
+            warn!("reconcile: tagged_photo hash backfill failed: {:?}", e);
+            0
+        }
+    };
+
+    stats.photo_insights_hashes_filled = match backfill_photo_insights_hashes(conn) {
+        Ok(n) => n,
+        Err(e) => {
+            warn!("reconcile: photo_insights hash backfill failed: {:?}", e);
+            0
+        }
+    };
+
+    stats.photo_insights_demoted = match collapse_insight_currents(conn) {
+        Ok(n) => n,
+        Err(e) => {
+            warn!("reconcile: photo_insights scalar merge failed: {:?}", e);
+            0
+        }
    };

    if stats.changed() {
@@ -138,8 +138,6 @@ diesel::table! {
        date_taken_source -> Nullable<Text>,
        original_date_taken -> Nullable<BigInt>,
        original_date_taken_source -> Nullable<Text>,
-        clip_embedding -> Nullable<Binary>,
-        clip_model_version -> Nullable<Text>,
    }
 }

@@ -216,15 +214,6 @@ diesel::table! {
        backend -> Text,
        fewshot_source_ids -> Nullable<Text>,
        content_hash -> Nullable<Text>,
-        num_ctx -> Nullable<Integer>,
-        temperature -> Nullable<Float>,
-        top_p -> Nullable<Float>,
-        top_k -> Nullable<Integer>,
-        min_p -> Nullable<Float>,
-        system_prompt -> Nullable<Text>,
-        persona_id -> Nullable<Text>,
-        prompt_eval_count -> Nullable<Integer>,
-        eval_count -> Nullable<Integer>,
    }
 }

@@ -266,16 +255,6 @@ diesel::table! {
    }
 }

-diesel::table! {
-    user_ai_prefs (id) {
-        id -> Integer,
-        voice -> Nullable<Text>,
-        tz_offset_minutes -> Nullable<Integer>,
-        library -> Nullable<Text>,
-        updated_at -> BigInt,
-    }
-}
-
 diesel::table! {
    video_preview_clips (id) {
        id -> Integer,
@@ -290,43 +269,12 @@ diesel::table! {
    }
 }

-diesel::table! {
-    insight_generation_jobs (id) {
-        id -> Integer,
-        library_id -> Integer,
-        file_path -> Text,
-        generation_type -> Text,
-        status -> Text,
-        started_at -> BigInt,
-        completed_at -> Nullable<BigInt>,
-        result_insight_id -> Nullable<Integer>,
-        error_message -> Nullable<Text>,
-    }
-}
-
-diesel::table! {
-    precomputed_reels (id) {
-        id -> Integer,
-        span -> Text,
-        library_key -> Text,
-        cache_key -> Text,
-        output_path -> Text,
-        title -> Text,
-        media_count -> Integer,
-        render_version -> Integer,
-        tz_offset_minutes -> Integer,
-        voice -> Nullable<Text>,
-        generated_at -> BigInt,
-    }
-}
-
 diesel::joinable!(entity_facts -> photo_insights (source_insight_id));
 diesel::joinable!(entity_photo_links -> entities (entity_id));
 diesel::joinable!(entity_photo_links -> libraries (library_id));
 diesel::joinable!(face_detections -> libraries (library_id));
 diesel::joinable!(face_detections -> persons (person_id));
 diesel::joinable!(image_exif -> libraries (library_id));
-diesel::joinable!(insight_generation_jobs -> libraries (library_id));
 diesel::joinable!(personas -> users (user_id));
 diesel::joinable!(persons -> entities (entity_id));
 diesel::joinable!(photo_insights -> libraries (library_id));
@@ -342,17 +290,14 @@ diesel::allow_tables_to_appear_in_same_query!(
    face_detections,
    favorites,
    image_exif,
-    insight_generation_jobs,
    libraries,
    location_history,
    personas,
    persons,
    photo_insights,
-    precomputed_reels,
    search_history,
    tagged_photo,
    tags,
-    user_ai_prefs,
    users,
    video_preview_clips,
 );
@@ -189,11 +189,10 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                .expect("Unable to get SearchHistoryDao");

            // Validate embedding dimensions (REQUIRED for searches)
-            if search.embedding.len() != crate::ai::embedding_dim() {
+            if search.embedding.len() != 768 {
                return Err(anyhow::anyhow!(
-                    "Invalid embedding dimensions: {} (expected {})",
-                    search.embedding.len(),
-                    crate::ai::embedding_dim()
+                    "Invalid embedding dimensions: {} (expected 768)",
+                    search.embedding.len()
                ));
            }

@@ -228,7 +227,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                source_file: search.source_file,
            })
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn store_searches_batch(
@@ -246,7 +245,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
            conn.transaction::<_, anyhow::Error, _>(|conn| {
                for search in searches {
                    // Validate embedding (REQUIRED)
-                    if search.embedding.len() != crate::ai::embedding_dim() {
+                    if search.embedding.len() != 768 {
                        log::warn!(
                            "Skipping search with invalid embedding dimensions: {}",
                            search.embedding.len()
@@ -284,7 +283,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {

            Ok(inserted)
        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
+        .map_err(|_| DbError::new(DbErrorKind::InsertError))
    }

    fn find_searches_in_range(
@@ -311,7 +310,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
            .map(|rows| rows.into_iter().map(|r| r.to_search_record()).collect())
            .map_err(|e| anyhow::anyhow!("Query error: {:?}", e))
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn find_similar_searches(
@@ -326,11 +325,10 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                .lock()
                .expect("Unable to get SearchHistoryDao");

-            if query_embedding.len() != crate::ai::embedding_dim() {
+            if query_embedding.len() != 768 {
                return Err(anyhow::anyhow!(
-                    "Invalid query embedding dimensions: {} (expected {})",
-                    query_embedding.len(),
-                    crate::ai::embedding_dim()
+                    "Invalid query embedding dimensions: {} (expected 768)",
+                    query_embedding.len()
                ));
            }

@@ -374,7 +372,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                .map(|(_, search)| search)
                .collect())
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn find_relevant_searches_hybrid(
@@ -408,11 +406,10 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {

            // Step 2: If query embedding provided, rank by semantic similarity
            if let Some(query_emb) = query_embedding {
-                if query_emb.len() != crate::ai::embedding_dim() {
+                if query_emb.len() != 768 {
                    return Err(anyhow::anyhow!(
-                        "Invalid query embedding dimensions: {} (expected {})",
-                        query_emb.len(),
-                        crate::ai::embedding_dim()
+                        "Invalid query embedding dimensions: {} (expected 768)",
+                        query_emb.len()
                    ));
                }

@@ -462,7 +459,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {
                    .collect())
            }
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn search_exists(
@@ -493,7 +490,7 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {

            Ok(result.count > 0)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }

    fn get_search_count(&mut self, context: &opentelemetry::Context) -> Result<i64, DbError> {
@@ -516,6 +513,6 @@ impl SearchHistoryDao for SqliteSearchHistoryDao {

            Ok(result.count)
        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
+        .map_err(|_| DbError::new(DbErrorKind::QueryError))
    }
 }
@@ -1,206 +0,0 @@
-use diesel::prelude::*;
-use diesel::sqlite::SqliteConnection;
-use std::ops::DerefMut;
-use std::sync::{Arc, Mutex};
-
-use crate::database::models::{UpsertUserAiPrefs, UserAiPrefs};
-use crate::database::schema;
-use crate::database::{DbError, DbErrorKind, connect};
-use crate::otel::trace_db_call;
-
-/// Generic single-row table that passively mirrors the latest client AI
-/// request parameters (voice, timezone, library). Read by the nightly
-/// pre-generation scheduler (Section D) to pick up user preferences.
-pub trait UserAiPrefsDao: Sync + Send {
-    /// Read the single row; `None` when it hasn't been populated yet.
-    fn get_prefs(
-        &mut self,
-        context: &opentelemetry::Context,
-    ) -> Result<Option<UserAiPrefs>, DbError>;
-
-    /// Upsert the single row (id is always 1).
-    #[allow(dead_code)]
-    fn upsert_prefs(
-        &mut self,
-        context: &opentelemetry::Context,
-        prefs: &UpsertUserAiPrefs,
-    ) -> Result<(), DbError>;
-}
-
-pub struct SqliteUserAiPrefsDao {
-    connection: Arc<Mutex<SqliteConnection>>,
-}
-
-impl Default for SqliteUserAiPrefsDao {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl SqliteUserAiPrefsDao {
-    pub fn new() -> Self {
-        Self {
-            connection: Arc::new(Mutex::new(connect())),
-        }
-    }
-
-    #[cfg(test)]
-    pub fn from_connection(conn: Arc<Mutex<SqliteConnection>>) -> Self {
-        Self { connection: conn }
-    }
-}
-
-impl UserAiPrefsDao for SqliteUserAiPrefsDao {
-    fn get_prefs(
-        &mut self,
-        context: &opentelemetry::Context,
-    ) -> Result<Option<UserAiPrefs>, DbError> {
-        trace_db_call(context, "query", "get_prefs", |_span| {
-            use schema::user_ai_prefs::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock UserAiPrefsDao");
-
-            dsl::user_ai_prefs
-                .first::<UserAiPrefs>(connection.deref_mut())
-                .optional()
-                .map_err(|e| anyhow::anyhow!("Failed to get prefs: {}", e))
-        })
-        .map_err(|e| DbError::log(DbErrorKind::QueryError, e))
-    }
-
-    fn upsert_prefs(
-        &mut self,
-        context: &opentelemetry::Context,
-        prefs: &UpsertUserAiPrefs,
-    ) -> Result<(), DbError> {
-        trace_db_call(context, "upsert", "upsert_prefs", |_span| {
-            use schema::user_ai_prefs::dsl;
-
-            let mut connection = self
-                .connection
-                .lock()
-                .expect("Unable to lock UserAiPrefsDao");
-
-            // Single-row table (id=1): one atomic upsert. The explicit id=1
-            // makes the conflict target deterministic so the second call
-            // updates in place rather than tripping the CHECK(id=1) constraint,
-            // and real insert errors surface instead of being swallowed into a
-            // separate update branch. The columns are set explicitly (rather
-            // than via AsChangeset) so a None field overwrites to NULL — the
-            // row mirrors the latest request exactly, not a merge of past ones.
-            diesel::insert_into(dsl::user_ai_prefs)
-                .values((dsl::id.eq(1), prefs))
-                .on_conflict(dsl::id)
-                .do_update()
-                .set((
-                    dsl::voice.eq(&prefs.voice),
-                    dsl::tz_offset_minutes.eq(&prefs.tz_offset_minutes),
-                    dsl::library.eq(&prefs.library),
-                    dsl::updated_at.eq(&prefs.updated_at),
-                ))
-                .execute(connection.deref_mut())
-                .map_err(|e| anyhow::anyhow!("Failed to upsert prefs: {}", e))?;
-            Ok(())
-        })
-        .map_err(|e| DbError::log(DbErrorKind::InsertError, e))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use diesel::Connection;
-    use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
-
-    const DB_MIGRATIONS: EmbeddedMigrations = embed_migrations!();
-
-    fn setup_dao() -> SqliteUserAiPrefsDao {
-        let mut conn = SqliteConnection::establish(":memory:")
-            .expect("Unable to create in-memory db connection");
-        conn.run_pending_migrations(DB_MIGRATIONS)
-            .expect("Failure running DB migrations");
-        SqliteUserAiPrefsDao::from_connection(Arc::new(Mutex::new(conn)))
-    }
-
-    fn ctx() -> opentelemetry::Context {
-        opentelemetry::Context::new()
-    }
-
-    #[test]
-    fn get_prefs_returns_none_when_empty() {
-        let mut dao = setup_dao();
-        let result = dao.get_prefs(&ctx()).unwrap();
-        assert!(result.is_none());
-    }
-
-    #[test]
-    fn upsert_prefs_inserts_row() {
-        let mut dao = setup_dao();
-        let now = 1_700_000_000i64;
-        let prefs = UpsertUserAiPrefs {
-            voice: Some("grandma".to_string()),
-            tz_offset_minutes: Some(-480),
-            library: Some("1".to_string()),
-            updated_at: now,
-        };
-        dao.upsert_prefs(&ctx(), &prefs).unwrap();
-
-        let row = dao.get_prefs(&ctx()).unwrap().unwrap();
-        assert_eq!(row.id, 1);
-        assert_eq!(row.voice, Some("grandma".to_string()));
-        assert_eq!(row.tz_offset_minutes, Some(-480));
-        assert_eq!(row.library, Some("1".to_string()));
-        assert_eq!(row.updated_at, now);
-    }
-
-    #[test]
-    fn upsert_prefs_replaces_existing() {
-        let mut dao = setup_dao();
-        let now1 = 1_700_000_000i64;
-        let now2 = 1_800_000_000i64;
-
-        let prefs1 = UpsertUserAiPrefs {
-            voice: Some("grandma".to_string()),
-            tz_offset_minutes: Some(-480),
-            library: Some("1".to_string()),
-            updated_at: now1,
-        };
-        dao.upsert_prefs(&ctx(), &prefs1).unwrap();
-
-        let prefs2 = UpsertUserAiPrefs {
-            voice: Some("dad".to_string()),
-            tz_offset_minutes: Some(-300),
-            library: None,
-            updated_at: now2,
-        };
-        dao.upsert_prefs(&ctx(), &prefs2).unwrap();
-
-        let row = dao.get_prefs(&ctx()).unwrap().unwrap();
-        assert_eq!(row.voice, Some("dad".to_string()));
-        assert_eq!(row.tz_offset_minutes, Some(-300));
-        assert!(row.library.is_none());
-        assert_eq!(row.updated_at, now2);
-    }
-
-    #[test]
-    fn upsert_partial_fields() {
-        let mut dao = setup_dao();
-        let now = 1_700_000_000i64;
-
-        let prefs = UpsertUserAiPrefs {
-            voice: None,
-            tz_offset_minutes: Some(-480),
-            library: None,
-            updated_at: now,
-        };
-        dao.upsert_prefs(&ctx(), &prefs).unwrap();
-
-        let row = dao.get_prefs(&ctx()).unwrap().unwrap();
-        assert_eq!(row.tz_offset_minutes, Some(-480));
-        assert!(row.voice.is_none());
-        assert!(row.library.is_none());
-    }
-}
@@ -234,7 +234,7 @@ async fn list_exact_handler(
    let span = global_tracer().start_with_context("duplicates.list_exact", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

-    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .map(|l| l.id);
@@ -265,7 +265,7 @@ async fn list_perceptual_handler(
    let span = global_tracer().start_with_context("duplicates.list_perceptual", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

-    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .map(|l| l.id);
@@ -449,7 +449,7 @@ async fn list_folder_pairs_handler(
    let span = global_tracer().start_with_context("duplicates.list_folder_pairs", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

-    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .map(|l| l.id);
@@ -1024,14 +1024,9 @@ impl FaceDao for SqliteFaceDao {
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
-                q.select(
-                    #[allow(deprecated)]
-                    {
-                        diesel::dsl::count_distinct(face_detections::content_hash)
-                    },
-                )
-                .first(conn.deref_mut())
-                .with_context(|| "stats: scanned")?
+                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
+                    .first(conn.deref_mut())
+                    .with_context(|| "stats: scanned")?
            };
            let with_faces: i64 = {
                let mut q = face_detections::table
@@ -1040,14 +1035,9 @@ impl FaceDao for SqliteFaceDao {
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
-                q.select(
-                    #[allow(deprecated)]
-                    {
-                        diesel::dsl::count_distinct(face_detections::content_hash)
-                    },
-                )
-                .first(conn.deref_mut())
-                .with_context(|| "stats: with_faces")?
+                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
+                    .first(conn.deref_mut())
+                    .with_context(|| "stats: with_faces")?
            };
            let no_faces: i64 = {
                let mut q = face_detections::table
@@ -1056,14 +1046,9 @@ impl FaceDao for SqliteFaceDao {
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
-                q.select(
-                    #[allow(deprecated)]
-                    {
-                        diesel::dsl::count_distinct(face_detections::content_hash)
-                    },
-                )
-                .first(conn.deref_mut())
-                .with_context(|| "stats: no_faces")?
+                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
+                    .first(conn.deref_mut())
+                    .with_context(|| "stats: no_faces")?
            };
            let failed: i64 = {
                let mut q = face_detections::table
@@ -1072,14 +1057,9 @@ impl FaceDao for SqliteFaceDao {
                if let Some(lib) = library_id {
                    q = q.filter(face_detections::library_id.eq(lib));
                }
-                q.select(
-                    #[allow(deprecated)]
-                    {
-                        diesel::dsl::count_distinct(face_detections::content_hash)
-                    },
-                )
-                .first(conn.deref_mut())
-                .with_context(|| "stats: failed")?
+                q.select(diesel::dsl::count_distinct(face_detections::content_hash))
+                    .first(conn.deref_mut())
+                    .with_context(|| "stats: failed")?
            };
            // Image-extension filter mirrors `list_unscanned_candidates` so
            // SCANNED can actually reach 100%: videos sit in `image_exif` but
@@ -1755,7 +1735,7 @@ async fn stats_handler<D: FaceDao>(
    let span = global_tracer().start_with_context("faces.stats", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

-    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .map(|l| l.id);
@@ -1782,12 +1762,11 @@ async fn list_faces_handler<D: FaceDao>(
    let normalized_path = normalize_path(&query.path);
    // resolve_library_param returns Option<&Library>; clone so the result
    // is owned (matching the primary_library fallback's type).
-    let library: Library =
-        libraries::resolve_library_param_state(&app_state, query.library.as_deref())
-            .ok()
-            .flatten()
-            .cloned()
-            .unwrap_or_else(|| app_state.primary_library().clone());
+    let library: Library = libraries::resolve_library_param(&app_state, query.library.as_deref())
+        .ok()
+        .flatten()
+        .cloned()
+        .unwrap_or_else(|| app_state.primary_library().clone());

    let mut dao = face_dao.lock().expect("face dao lock");
    let hash = match dao.resolve_content_hash(&span_context, library.id, &normalized_path) {
@@ -1871,7 +1850,7 @@ async fn create_face_handler<D: FaceDao>(
    }

    let normalized_path = normalize_path(&body.path);
-    let library: Library = match libraries::resolve_library_param_state(
+    let library: Library = match libraries::resolve_library_param(
        &app_state,
        body.library.as_ref().map(|i| i.to_string()).as_deref(),
    ) {
@@ -2139,10 +2118,7 @@ async fn update_face_handler<D: FaceDao>(
            // the short context string we surface in the response body —
            // SQLITE_BUSY here usually means another DAO's writer held the
            // lock past `busy_timeout` (5s), which is invisible in `{}`.
-            warn!(
-                "PATCH /image/faces/{}: 500 — update_face failed: {:#}",
-                id, e
-            );
+            warn!("PATCH /image/faces/{}: 500 — update_face failed: {:#}", id, e);
            return HttpResponse::InternalServerError().body(e.to_string());
        }
    };
@@ -2193,7 +2169,7 @@ async fn list_persons_handler<D: FaceDao>(
    let span = global_tracer().start_with_context("persons.list", &context);
    let span_context = opentelemetry::Context::current_with_span(span);

-    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .map(|l| l.id);
@@ -2346,7 +2322,7 @@ async fn person_faces_handler<D: FaceDao>(
    let context = extract_context_from_request(&request);
    let span = global_tracer().start_with_context("persons.faces", &context);
    let span_context = opentelemetry::Context::current_with_span(span);
-    let library_id = libraries::resolve_library_param_state(&app_state, query.library.as_deref())
+    let library_id = libraries::resolve_library_param(&app_state, query.library.as_deref())
        .ok()
        .flatten()
        .map(|l| l.id);
@@ -53,7 +53,6 @@ pub fn walk_library_files(base_path: &Path, excluded_dirs: &[String]) -> Vec<Dir
 /// used by the watcher's quick-scan tick to skip the long tail. Files
 /// whose metadata can't be read are kept; the caller's batch EXIF lookup
 /// dedups against existing rows.
-#[allow(dead_code)]
 pub fn enumerate_indexable_files(
    base_path: &Path,
    excluded_dirs: &[String],
@@ -22,42 +22,8 @@ pub fn needs_ffmpeg_thumbnail(path: &Path) -> bool {
 /// Supported video file extensions
 pub const VIDEO_EXTENSIONS: &[&str] = &["mp4", "mov", "avi", "mkv"];

-/// Audio file extensions accepted as voice-clone references (TTS). Mirrors
-/// the formats Chatterbox can decode (wav/mp3/flac/m4a/aac/ogg).
-pub const AUDIO_EXTENSIONS: &[&str] = &["wav", "mp3", "flac", "m4a", "aac", "ogg", "oga", "opus"];
-
-/// Filenames that are filesystem metadata, not real media — exact
-/// basename match. Extend if a new platform sidecar appears (Windows
-/// Thumbs.db / desktop.ini live here too if those libraries land).
-const METADATA_FILENAMES: &[&str] = &[".DS_Store"];
-
-/// True if the basename is a filesystem metadata sidecar that should be
-/// invisible to every media predicate.
-///
-/// macOS writes `._<name>` AppleDouble companions when copying to
-/// non-HFS volumes — each holds the extended attributes of `<name>`,
-/// NOT a copy of the bytes. Same extension as the real file, so a
-/// pure-extension match treats `._photo.jpg` as a JPEG, ships it to
-/// the decoder, and accumulates failed rows: face_detections
-/// `status='failed'`, clip_embedding `status='failed'`, plus a
-/// pointless `image_exif` row whose `content_hash` will be the hash
-/// of the metadata blob. The downstream noise (failed-row counts that
-/// never go to zero, 422 bursts to Apollo, evictor timer reset by
-/// those 422s) is the visible damage. `.DS_Store` is the per-directory
-/// version (Finder view state) — no extension, but cheap to guard
-/// here too in case some future predicate matches by content type.
-pub fn is_filesystem_metadata(path: &Path) -> bool {
-    let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
-        return false;
-    };
-    name.starts_with("._") || METADATA_FILENAMES.contains(&name)
-}
-
 /// Check if a path has an image extension
 pub fn is_image_file(path: &Path) -> bool {
-    if is_filesystem_metadata(path) {
-        return false;
-    }
    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
        let ext_lower = ext.to_lowercase();
        IMAGE_EXTENSIONS.contains(&ext_lower.as_str())
@@ -68,9 +34,6 @@ pub fn is_image_file(path: &Path) -> bool {

 /// Check if a path has a video extension
 pub fn is_video_file(path: &Path) -> bool {
-    if is_filesystem_metadata(path) {
-        return false;
-    }
    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
        let ext_lower = ext.to_lowercase();
        VIDEO_EXTENSIONS.contains(&ext_lower.as_str())
@@ -79,19 +42,6 @@ pub fn is_video_file(path: &Path) -> bool {
    }
 }

-/// Check if a path has an audio extension (voice-clone references)
-pub fn is_audio_file(path: &Path) -> bool {
-    if is_filesystem_metadata(path) {
-        return false;
-    }
-    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
-        let ext_lower = ext.to_lowercase();
-        AUDIO_EXTENSIONS.contains(&ext_lower.as_str())
-    } else {
-        false
-    }
-}
-
 /// Check if a path has a supported media extension (image or video)
 pub fn is_media_file(path: &Path) -> bool {
    is_image_file(path) || is_video_file(path)
@@ -148,46 +98,4 @@ mod tests {
        assert!(!is_media_file(Path::new("document.txt")));
        assert!(!is_media_file(Path::new("no_extension")));
    }
-
-    #[test]
-    fn test_apple_double_excluded_from_media() {
-        // The bug-of-record: ImageApi was shipping macOS AppleDouble
-        // sidecars to Apollo's CLIP/face decoders, accumulating failed
-        // rows and pinning Apollo's eviction timer with the 422 burst.
-        // Predicate-level guard means every downstream walker
-        // (face_watch, backfill, clip_watch, watcher) inherits the fix
-        // without touching their filters.
-        assert!(!is_image_file(Path::new("._photo.jpg")));
-        assert!(!is_image_file(Path::new("dir/._photo.JPG")));
-        assert!(!is_image_file(Path::new("a/b/._DSC_2182-S.jpg")));
-        assert!(!is_video_file(Path::new("._video.mp4")));
-        assert!(!is_media_file(Path::new("._photo.png")));
-        // A real file that merely starts with "_" (no leading dot) is
-        // not AppleDouble — must NOT be filtered.
-        assert!(is_image_file(Path::new("_photo.jpg")));
-    }
-
-    #[test]
-    fn test_ds_store_excluded() {
-        // Finder per-directory metadata. No image extension so
-        // is_image_file would already say false; the guard makes the
-        // predicate's *reason* explicit and covers a hypothetical
-        // future caller matching by basename.
-        assert!(!is_image_file(Path::new(".DS_Store")));
-        assert!(!is_video_file(Path::new(".DS_Store")));
-        assert!(!is_media_file(Path::new("some/dir/.DS_Store")));
-        assert!(is_filesystem_metadata(Path::new(".DS_Store")));
-        assert!(is_filesystem_metadata(Path::new("dir/.DS_Store")));
-    }
-
-    #[test]
-    fn test_dotfiles_other_than_apple_double_are_unaffected() {
-        // We deliberately scope to `._*` + the exact .DS_Store name —
-        // not all dotfiles — because a user could plausibly name a
-        // cover image `.cover.jpg` and we shouldn't silently drop it.
-        // If that turns out to be wrong, broaden here; for now,
-        // narrow + explicit > broad + surprising.
-        assert!(is_image_file(Path::new(".cover.jpg")));
-        assert!(!is_filesystem_metadata(Path::new(".cover.jpg")));
-    }
 }
@@ -275,14 +275,14 @@ pub async fn list_photos<TagD: TagDao, FS: FileSystemAccess>(
    // Resolve the optional library filter. Unknown values return 400. A
    // `None` result means "union across all libraries" and downstream
    // walks iterate every configured library root.
-    let library =
-        match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) {
-            Ok(lib) => lib,
-            Err(msg) => {
-                log::warn!("Rejecting /photos request: {}", msg);
-                return HttpResponse::BadRequest().body(msg);
-            }
-        };
+    let library = match crate::libraries::resolve_library_param(&app_state, req.library.as_deref())
+    {
+        Ok(lib) => lib,
+        Err(msg) => {
+            log::warn!("Rejecting /photos request: {}", msg);
+            return HttpResponse::BadRequest().body(msg);
+        }
+    };

    let span_context = opentelemetry::Context::current_with_span(span);

@@ -1238,7 +1238,7 @@ pub async fn list_exif_summary(
    // Resolve the library filter up front so a bad id/name 400s before we
    // ever take the DAO mutex. None == union across all libraries.
    let library_filter =
-        match crate::libraries::resolve_library_param_state(&app_state, req.library.as_deref()) {
+        match crate::libraries::resolve_library_param(&app_state, req.library.as_deref()) {
            Ok(lib) => lib.map(|l| l.id),
            Err(msg) => {
                span.set_status(Status::error(msg.clone()));
@@ -1511,8 +1511,6 @@ mod tests {
            date_taken_source,
            original_date_taken: None,
            original_date_taken_source: None,
-            clip_embedding: None,
-            clip_model_version: None,
        }
    }

@@ -1552,8 +1550,6 @@ mod tests {
                date_taken_source: data.date_taken_source.clone(),
                original_date_taken: None,
                original_date_taken_source: None,
-                clip_embedding: None,
-                clip_model_version: None,
            })
        }

@@ -1600,8 +1596,6 @@ mod tests {
                date_taken_source: data.date_taken_source.clone(),
                original_date_taken: None,
                original_date_taken_source: None,
-                clip_embedding: None,
-                clip_model_version: None,
            })
        }

@@ -1695,21 +1689,6 @@ mod tests {
            Ok(())
        }

-        fn list_distinct_content_hashes(
-            &mut self,
-            _context: &opentelemetry::Context,
-        ) -> Result<Vec<String>, DbError> {
-            Ok(Vec::new())
-        }
-
-        fn list_paths_and_hashes_for_library(
-            &mut self,
-            _context: &opentelemetry::Context,
-            _library_id: i32,
-        ) -> Result<Vec<(String, Option<String>)>, DbError> {
-            Ok(Vec::new())
-        }
-
        fn get_rows_needing_date_backfill(
            &mut self,
            _context: &opentelemetry::Context,
@@ -1938,35 +1917,6 @@ mod tests {
        ) -> Result<(), DbError> {
            Ok(())
        }
-
-        fn list_clip_unencoded_candidates(
-            &mut self,
-            _context: &opentelemetry::Context,
-            _library_id: i32,
-            _limit: i64,
-        ) -> Result<Vec<(String, String)>, DbError> {
-            Ok(Vec::new())
-        }
-
-        fn backfill_clip_embedding(
-            &mut self,
-            _context: &opentelemetry::Context,
-            _library_id: i32,
-            _rel_path: &str,
-            _embedding: &[u8],
-            _model_version: &str,
-        ) -> Result<(), DbError> {
-            Ok(())
-        }
-
-        fn list_clip_index(
-            &mut self,
-            _context: &opentelemetry::Context,
-            _library_ids: &[i32],
-            _model_version: Option<&str>,
-        ) -> Result<Vec<(String, Vec<u8>)>, DbError> {
-            Ok(Vec::new())
-        }
    }

    mod api {
@@ -1,5 +1,4 @@
 /// Geographic calculation utilities for GPS-based search
-use serde::Deserialize;
 use std::f64;

 /// Calculate distance between two GPS coordinates using the Haversine formula.
@@ -62,140 +61,6 @@ pub fn gps_bounding_box(lat: f64, lon: f64, radius_km: f64) -> (f64, f64, f64, f
    )
 }

-/// A place resolved from a free-text query via forward geocoding.
-///
-/// The filter pipeline searches a *circle* (`gps_lat`/`gps_lon`/
-/// `gps_radius_km`), but a place can be anything from a single address to
-/// a whole country. We collapse Nominatim's bounding box into the smallest
-/// circle that circumscribes it (see [`bbox_to_circle`]) so "Portland" and
-/// "Italy" both map onto the existing circle filter without a schema change.
-#[derive(Debug, Clone, PartialEq)]
-pub struct GeoPlace {
-    /// Nominatim's canonical name for the match (e.g. "Italia").
-    pub display_name: String,
-    /// Centroid latitude in decimal degrees.
-    pub lat: f64,
-    /// Centroid longitude in decimal degrees.
-    pub lon: f64,
-    /// Radius (km) of a circle centred on the centroid that covers the
-    /// matched area. Floored to [`MIN_PLACE_RADIUS_KM`] so a point result
-    /// (whose bounding box is microscopic) still yields a usable circle.
-    pub radius_km: f64,
-}
-
-/// Floor for a geocoded place's radius. Point results (a street address)
-/// come back with a near-zero bounding box; without a floor the circle
-/// filter would match nothing.
-pub const MIN_PLACE_RADIUS_KM: f64 = 0.5;
-
-/// Collapse a bounding box into the centroid + circumscribing radius.
-///
-/// Input is Nominatim's `boundingbox` order: `(south_lat, north_lat,
-/// west_lon, east_lon)`. The radius is the *largest* great-circle distance
-/// from the centroid to any of the four corners, so the resulting circle
-/// fully covers the box. (The corners aren't equidistant on a sphere —
-/// longitude lines converge toward the poles, so the equator-facing edge's
-/// corners are farthest; taking the max guarantees coverage in either
-/// hemisphere.)
-///
-/// Pure and exact (no flooring) so it can be unit-tested directly; callers
-/// apply [`MIN_PLACE_RADIUS_KM`] when turning the result into a filter.
-pub fn bbox_to_circle(south: f64, north: f64, west: f64, east: f64) -> (f64, f64, f64) {
-    let center_lat = (south + north) / 2.0;
-    let center_lon = (west + east) / 2.0;
-    let radius_km = [(south, west), (south, east), (north, west), (north, east)]
-        .iter()
-        .map(|(clat, clon)| haversine_distance(center_lat, center_lon, *clat, *clon))
-        .fold(0.0_f64, f64::max);
-    (center_lat, center_lon, radius_km)
-}
-
-/// Raw Nominatim `/search` result. `lat`/`lon` arrive as strings and
-/// `boundingbox` as a 4-element string array `[south, north, west, east]`.
-#[derive(Deserialize)]
-struct NominatimSearchResult {
-    lat: String,
-    lon: String,
-    display_name: String,
-    boundingbox: Option<[String; 4]>,
-}
-
-/// Forward-geocode a free-text place name to a [`GeoPlace`] via the public
-/// OpenStreetMap Nominatim `/search` endpoint.
-///
-/// Mirrors `InsightGenerator::reverse_geocode`'s error posture: any network,
-/// HTTP, or parse failure returns `None` rather than propagating, so a flaky
-/// geocoder degrades the query to "no location filter" instead of failing it.
-///
-/// Nominatim's usage policy requires a `User-Agent` and rate-limits to ~1
-/// request/second; callers doing this interactively should cache results.
-pub async fn forward_geocode(query: &str) -> Option<GeoPlace> {
-    let q = query.trim();
-    if q.is_empty() {
-        return None;
-    }
-
-    let client = reqwest::Client::new();
-    let response = match client
-        .get("https://nominatim.openstreetmap.org/search")
-        .query(&[("format", "json"), ("limit", "1"), ("q", q)])
-        .header("User-Agent", "ImageAPI/1.0") // Nominatim requires User-Agent
-        .send()
-        .await
-    {
-        Ok(resp) => resp,
-        Err(e) => {
-            log::warn!("Forward geocoding network error for {q:?}: {e}");
-            return None;
-        }
-    };
-
-    if !response.status().is_success() {
-        log::warn!(
-            "Forward geocoding HTTP error for {q:?}: {}",
-            response.status()
-        );
-        return None;
-    }
-
-    let results: Vec<NominatimSearchResult> = match response.json().await {
-        Ok(r) => r,
-        Err(e) => {
-            log::warn!("Forward geocoding JSON parse error for {q:?}: {e}");
-            return None;
-        }
-    };
-
-    let top = results.into_iter().next()?;
-    let lat: f64 = top.lat.parse().ok()?;
-    let lon: f64 = top.lon.parse().ok()?;
-
-    // Prefer the bounding box (handles large places); fall back to a
-    // point + floor radius when Nominatim omits it.
-    let (center_lat, center_lon, radius_km) = match &top.boundingbox {
-        Some([s, n, w, e]) => match (s.parse(), n.parse(), w.parse(), e.parse()) {
-            (Ok(s), Ok(n), Ok(w), Ok(e)) => bbox_to_circle(s, n, w, e),
-            _ => (lat, lon, 0.0),
-        },
-        None => (lat, lon, 0.0),
-    };
-
-    let place = GeoPlace {
-        display_name: top.display_name,
-        lat: center_lat,
-        lon: center_lon,
-        radius_km: radius_km.max(MIN_PLACE_RADIUS_KM),
-    };
-    log::info!(
-        "Forward geocoded {q:?} -> {} ({:.4}, {:.4}, r={:.1}km)",
-        place.display_name,
-        place.lat,
-        place.lon,
-        place.radius_km
-    );
-    Some(place)
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -253,41 +118,4 @@ mod tests {
            distance
        );
    }
-
-    #[test]
-    fn test_bbox_to_circle_centroid() {
-        // Symmetric box around (10, 20): centroid should land dead centre.
-        let (lat, lon, radius) = bbox_to_circle(9.0, 11.0, 19.0, 21.0);
-        assert!((lat - 10.0).abs() < 1e-9, "centroid lat, got {lat}");
-        assert!((lon - 20.0).abs() < 1e-9, "centroid lon, got {lon}");
-        assert!(radius > 0.0, "radius should be positive, got {radius}");
-    }
-
-    #[test]
-    fn test_bbox_to_circle_covers_corner() {
-        // The radius must reach every corner of the box. Verify the
-        // centroid-to-corner distance equals the returned radius for all
-        // four corners (they're symmetric, so all equal).
-        let (south, north, west, east) = (40.0, 42.0, -74.0, -72.0);
-        let (lat, lon, radius) = bbox_to_circle(south, north, west, east);
-        for (clat, clon) in [(south, west), (south, east), (north, west), (north, east)] {
-            let d = haversine_distance(lat, lon, clat, clon);
-            assert!(
-                d <= radius + 1e-6,
-                "corner ({clat},{clon}) at {d}km should be within radius {radius}km"
-            );
-        }
-    }
-
-    #[test]
-    fn test_bbox_to_circle_country_vs_city_scale() {
-        // A country-sized box yields a far larger radius than a city-sized
-        // one — confirming the bbox approach scales with place size.
-        let (_, _, country) = bbox_to_circle(35.5, 47.1, 6.6, 18.5); // ~Italy
-        let (_, _, city) = bbox_to_circle(45.4, 45.6, -122.8, -122.5); // ~Portland
-        assert!(
-            country > city * 10.0,
-            "country radius {country}km should dwarf city radius {city}km"
-        );
-    }
 }
@@ -53,7 +53,7 @@ pub async fn get_image(

    // Resolve library from query param; default to primary so clients that
    // don't yet send `library=` continue to work.
-    let library = match libraries::resolve_library_param_state(&app_state, req.library.as_deref()) {
+    let library = match libraries::resolve_library_param(&app_state, req.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(msg) => {
@@ -82,209 +82,6 @@ pub async fn get_image(

    if let Some((library, path)) = resolved {
        let image_size = req.size.unwrap_or(PhotoSize::Full);
-
-        // `size=large|xlarge` is only meaningful for stills — there's no
-        // useful "resized video preview" tier. Videos fall back to the
-        // existing thumb pipeline (which already handles gif/static
-        // selection). `mut` so preview branches can downgrade to `Full`
-        // after a generation failure.
-        let mut image_size = if (image_size == PhotoSize::Large || image_size == PhotoSize::XLarge)
-            && file_types::is_video_file(&path)
-        {
-            PhotoSize::Thumb
-        } else {
-            image_size
-        };
-
-        if image_size == PhotoSize::Large {
-            let relative_path = path
-                .strip_prefix(&library.root_path)
-                .expect("Error stripping library root prefix from large preview");
-            let relative_path_str = relative_path.to_string_lossy().replace('\\', "/");
-            let thumbs = Path::new(&app_state.thumbnail_path);
-            let large_dir = thumbs.join("_large");
-
-            // Lookup chain mirrors the Thumb branch — hash-keyed first so
-            // multi-library deployments share derivative bytes across
-            // libraries, then library-scoped legacy as the fallback for
-            // rows that aren't hashed yet.
-            let hash_large_path: Option<PathBuf> = {
-                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
-                match dao.get_exif(&context, &relative_path_str) {
-                    Ok(Some(row)) => row
-                        .content_hash
-                        .as_deref()
-                        .map(|h| content_hash::large_preview_path(thumbs, h)),
-                    _ => None,
-                }
-            };
-            let scoped_legacy_large_path =
-                content_hash::library_scoped_legacy_path(&large_dir, library.id, relative_path);
-
-            let existing = hash_large_path
-                .as_ref()
-                .filter(|p| p.exists())
-                .cloned()
-                .or_else(|| {
-                    if scoped_legacy_large_path.exists() {
-                        Some(scoped_legacy_large_path.clone())
-                    } else {
-                        None
-                    }
-                });
-
-            if let Some(found) = existing
-                && let Ok(file) = NamedFile::open(&found)
-            {
-                span.set_status(Status::Ok);
-                return file
-                    .use_etag(true)
-                    .use_last_modified(true)
-                    .prefer_utf8(true)
-                    .into_response(&request);
-            }
-
-            // Cache miss — generate. Resize + JPEG-encode can take 100–500ms
-            // for a 24MP source (longer for RAW), so run on the blocking pool
-            // to keep the actix worker free. Prefer the hash-keyed
-            // destination when a hash is known so the result is reusable
-            // across libraries that hold the same bytes.
-            let dest = hash_large_path
-                .clone()
-                .unwrap_or_else(|| scoped_legacy_large_path.clone());
-            let src = path.clone();
-            let dest_for_block = dest.clone();
-            let generated = web::block(move || {
-                if let Some(parent) = dest_for_block.parent() {
-                    std::fs::create_dir_all(parent)?;
-                }
-                // Write to a sibling tempfile then atomically rename so a
-                // concurrent reader never observes a half-written JPEG.
-                let tmp = dest_for_block.with_extension("jpg.tmp");
-                crate::thumbnails::generate_large_preview(&src, &tmp)?;
-                std::fs::rename(&tmp, &dest_for_block)?;
-                Ok::<(), std::io::Error>(())
-            })
-            .await;
-
-            match generated {
-                Ok(Ok(())) => {
-                    if let Ok(file) = NamedFile::open(&dest) {
-                        span.set_status(Status::Ok);
-                        return file
-                            .use_etag(true)
-                            .use_last_modified(true)
-                            .prefer_utf8(true)
-                            .into_response(&request);
-                    }
-                }
-                Ok(Err(e)) => {
-                    warn!(
-                        "Large preview generation failed for {:?}: {} — falling back to original",
-                        path, e
-                    );
-                }
-                Err(e) => {
-                    warn!(
-                        "Large preview blocking-pool error for {:?}: {} — falling back to original",
-                        path, e
-                    );
-                }
-            }
-            // Fall through to the Full branch below so the caller gets
-            // *something* useful (the original bytes — or the RAW
-            // embedded preview, which is what the Full branch returns for
-            // unrenderable RAW containers) instead of a 404.
-            image_size = PhotoSize::Full;
-        }
-
-        if image_size == PhotoSize::XLarge {
-            let relative_path = path
-                .strip_prefix(&library.root_path)
-                .expect("Error stripping library root prefix from xlarge preview");
-            let relative_path_str = relative_path.to_string_lossy().replace('\\', "/");
-            let thumbs = Path::new(&app_state.thumbnail_path);
-            let xlarge_dir = thumbs.join("_xlarge");
-
-            let hash_xlarge_path: Option<PathBuf> = {
-                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
-                match dao.get_exif(&context, &relative_path_str) {
-                    Ok(Some(row)) => row
-                        .content_hash
-                        .as_deref()
-                        .map(|h| content_hash::xlarge_preview_path(thumbs, h)),
-                    _ => None,
-                }
-            };
-            let scoped_legacy_xlarge_path =
-                content_hash::library_scoped_legacy_path(&xlarge_dir, library.id, relative_path);
-
-            let existing = hash_xlarge_path
-                .as_ref()
-                .filter(|p| p.exists())
-                .cloned()
-                .or_else(|| {
-                    if scoped_legacy_xlarge_path.exists() {
-                        Some(scoped_legacy_xlarge_path.clone())
-                    } else {
-                        None
-                    }
-                });
-
-            if let Some(found) = existing
-                && let Ok(file) = NamedFile::open(&found)
-            {
-                span.set_status(Status::Ok);
-                return file
-                    .use_etag(true)
-                    .use_last_modified(true)
-                    .prefer_utf8(true)
-                    .into_response(&request);
-            }
-
-            let dest = hash_xlarge_path
-                .clone()
-                .unwrap_or_else(|| scoped_legacy_xlarge_path.clone());
-            let src = path.clone();
-            let dest_for_block = dest.clone();
-            let generated = web::block(move || {
-                if let Some(parent) = dest_for_block.parent() {
-                    std::fs::create_dir_all(parent)?;
-                }
-                let tmp = dest_for_block.with_extension("jpg.tmp");
-                crate::thumbnails::generate_xlarge_preview(&src, &tmp)?;
-                std::fs::rename(&tmp, &dest_for_block)?;
-                Ok::<(), std::io::Error>(())
-            })
-            .await;
-
-            match generated {
-                Ok(Ok(())) => {
-                    if let Ok(file) = NamedFile::open(&dest) {
-                        span.set_status(Status::Ok);
-                        return file
-                            .use_etag(true)
-                            .use_last_modified(true)
-                            .prefer_utf8(true)
-                            .into_response(&request);
-                    }
-                }
-                Ok(Err(e)) => {
-                    warn!(
-                        "XLarge preview generation failed for {:?}: {} — falling back to original",
-                        path, e
-                    );
-                }
-                Err(e) => {
-                    warn!(
-                        "XLarge preview blocking-pool error for {:?}: {} — falling back to original",
-                        path, e
-                    );
-                }
-            }
-            image_size = PhotoSize::Full;
-        }
-
        if image_size == PhotoSize::Thumb {
            let relative_path = path
                .strip_prefix(&library.root_path)
@@ -386,15 +183,14 @@ pub async fn get_image(
        // review JPEG, ~1–2 MP). Falls through to NamedFile if no preview is
        // available, which preserves the historical behavior for callers
        // that genuinely want the original bytes.
-        if image_size == PhotoSize::Full
-            && exif::is_tiff_raw(&path)
-            && let Some(preview) = exif::extract_embedded_jpeg_preview(&path)
-        {
-            span.set_status(Status::Ok);
-            return HttpResponse::Ok()
-                .content_type("image/jpeg")
-                .insert_header(("Cache-Control", "public, max-age=3600"))
-                .body(preview);
+        if image_size == PhotoSize::Full && exif::is_tiff_raw(&path) {
+            if let Some(preview) = exif::extract_embedded_jpeg_preview(&path) {
+                span.set_status(Status::Ok);
+                return HttpResponse::Ok()
+                    .content_type("image/jpeg")
+                    .insert_header(("Cache-Control", "public, max-age=3600"))
+                    .body(preview);
+            }
        }

        if let Ok(file) = NamedFile::open(&path) {
@@ -492,7 +288,7 @@ pub async fn get_file_metadata(
    let span_context =
        opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());

-    let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref())
+    let library = libraries::resolve_library_param(&app_state, path.library.as_deref())
        .ok()
        .flatten()
        .unwrap_or_else(|| app_state.primary_library());
@@ -580,7 +376,7 @@ pub async fn set_image_gps(
    let span_context =
        opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());

-    let library = libraries::resolve_library_param_state(&app_state, body.library.as_deref())
+    let library = libraries::resolve_library_param(&app_state, body.library.as_deref())
        .ok()
        .flatten()
        .unwrap_or_else(|| app_state.primary_library());
@@ -746,7 +542,7 @@ pub async fn get_full_exif(
    let context = extract_context_from_request(&request);
    let mut span = tracer.start_with_context("get_full_exif", &context);

-    let library = libraries::resolve_library_param_state(&app_state, path.library.as_deref())
+    let library = libraries::resolve_library_param(&app_state, path.library.as_deref())
        .ok()
        .flatten()
        .unwrap_or_else(|| app_state.primary_library());
@@ -888,8 +684,7 @@ pub async fn set_image_date(
    let span_context =
        opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());

-    let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref())
-    {
+    let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(msg) => {
@@ -911,7 +706,7 @@ pub async fn set_image_date(
        Ok(row) => {
            span.set_status(Status::Ok);
            HttpResponse::Ok().json(build_metadata_response_for_date_mutation(
-                library,
+                &library,
                &normalized_path,
                row,
            ))
@@ -942,8 +737,7 @@ pub async fn clear_image_date(
    let span_context =
        opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());

-    let library = match libraries::resolve_library_param_state(&app_state, body.library.as_deref())
-    {
+    let library = match libraries::resolve_library_param(&app_state, body.library.as_deref()) {
        Ok(Some(lib)) => lib,
        Ok(None) => app_state.primary_library(),
        Err(msg) => {
@@ -963,7 +757,7 @@ pub async fn clear_image_date(
        Ok(row) => {
            span.set_status(Status::Ok);
            HttpResponse::Ok().json(build_metadata_response_for_date_mutation(
-                library,
+                &library,
                &normalized_path,
                row,
            ))
@@ -1003,7 +797,7 @@ pub async fn upload_image(
    // Resolve the optional library selector. Absent → primary library
    // (backwards-compatible with clients that don't yet send `library=`).
    let target_library =
-        match libraries::resolve_library_param_state(&app_state, query.library.as_deref()) {
+        match libraries::resolve_library_param(&app_state, query.library.as_deref()) {
            Ok(Some(lib)) => lib,
            Ok(None) => app_state.primary_library(),
            Err(msg) => {
@@ -11,313 +11,190 @@ use actix_web::{
    web::{self, Data},
 };
 use log::{debug, error, info, warn};
-use opentelemetry::KeyValue;
 use opentelemetry::trace::{Span, Status, Tracer};
-use serde::Serialize;
+use opentelemetry::{KeyValue, global};

-use crate::content_hash;
 use crate::data::{
    Claims, PreviewClipRequest, PreviewStatusItem, PreviewStatusRequest, PreviewStatusResponse,
    ThumbnailRequest,
 };
-use crate::database::{ExifDao, PreviewDao};
+use crate::database::PreviewDao;
 use crate::files::is_valid_full_path;
 use crate::libraries;
 use crate::otel::{extract_context_from_request, global_tracer};
 use crate::state::AppState;
-use crate::video::actors::{
-    GeneratePreviewClipMessage, QueueVideosMessage, VideoToQueue, probe_video_stream_meta,
-};
-use crate::video::hls_paths;
-
-/// Response body for `POST /video/generate`. Clients consume
-/// `playlist_url` (hash-keyed, stable across libraries and renames)
-/// and poll for readiness via the URL itself.
-#[derive(Serialize, Debug)]
-struct GenerateVideoResponse {
-    /// Hash-keyed URL to the HLS playlist. Resolves to
-    /// `$VIDEO_PATH/<shard>/<hash>/playlist.m3u8` server-side. Relative
-    /// segment refs inside the playlist resolve correctly because the
-    /// browser appends to this URL's path.
-    playlist_url: String,
-    /// blake3 content hash of the source video. Stable per byte content,
-    /// so duplicate uploads / archive ingests share one set of HLS
-    /// output.
-    content_hash: String,
-    /// `true` iff the playlist file is already on disk. `false` means a
-    /// transcode was queued; clients should retry the URL after a short
-    /// delay (or rely on HLS.js's own retry policy).
-    ready: bool,
-    /// Source-video frame rate in Hz, probed via ffprobe. `None` when the
-    /// probe failed or ffprobe couldn't parse either rate field — clients
-    /// fall back to their own default (typically 30) for frame stepping.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    frame_rate: Option<f32>,
-}
+use crate::video::actors::{GeneratePreviewClipMessage, ProcessMessage, create_playlist};

 #[post("/video/generate")]
 pub async fn generate_video(
    _claims: Claims,
    request: HttpRequest,
    app_state: Data<AppState>,
-    exif_dao: Data<std::sync::Mutex<Box<dyn ExifDao>>>,
    body: web::Json<ThumbnailRequest>,
 ) -> impl Responder {
    let tracer = global_tracer();
+
    let context = extract_context_from_request(&request);
    let mut span = tracer.start_with_context("generate_video", &context);

-    let preferred_library =
-        libraries::resolve_library_param_state(&app_state, body.library.as_deref())
+    let filename = PathBuf::from(&body.path);
+
+    if let Some(name) = filename.file_name() {
+        let filename = name.to_str().expect("Filename should convert to string");
+        // KNOWN ISSUE (multi-library): playlist filename is the basename
+        // alone, so two source files with the same basename — whether in
+        // different libraries or different subdirs of one library —
+        // overwrite each other's playlists while ffmpeg runs. The
+        // hash-keyed `content_hash::hls_dir` is the long-term answer
+        // (see CLAUDE.md "Multi-library data model"); rewiring the
+        // actor pipeline to use it is out of scope for this branch.
+        // The orphan-cleanup job above already walks every library so
+        // it doesn't false-delete archive playlists.
+        let playlist = format!("{}/{}.m3u8", app_state.video_path, filename);
+
+        let library = libraries::resolve_library_param(&app_state, body.library.as_deref())
            .ok()
            .flatten()
            .unwrap_or_else(|| app_state.primary_library());

-    // Try the resolved library first, then fall back to any other library
-    // that actually contains the file — handles union-mode requests where
-    // the mobile client passes no library but the file lives in a
-    // non-primary library. Track which library won so the DB lookup is
-    // scoped correctly.
-    let resolved = is_valid_full_path(&preferred_library.root_path, &body.path, false)
-        .filter(|p| p.exists())
-        .map(|p| (preferred_library.id, preferred_library.root_path.clone(), p))
-        .or_else(|| {
-            app_state.libraries.iter().find_map(|lib| {
-                if lib.id == preferred_library.id {
-                    return None;
-                }
-                is_valid_full_path(&lib.root_path, &body.path, false)
-                    .filter(|p| p.exists())
-                    .map(|p| (lib.id, lib.root_path.clone(), p))
-            })
-        });
+        // Try the resolved library first, then fall back to any other library
+        // that actually contains the file — handles union-mode requests where
+        // the mobile client passes no library but the file lives in a
+        // non-primary library.
+        let resolved = is_valid_full_path(&library.root_path, &body.path, false)
+            .filter(|p| p.exists())
+            .or_else(|| {
+                app_state.libraries.iter().find_map(|lib| {
+                    if lib.id == library.id {
+                        return None;
+                    }
+                    is_valid_full_path(&lib.root_path, &body.path, false).filter(|p| p.exists())
+                })
+            });

-    let Some((resolved_library_id, resolved_root, full_path)) = resolved else {
-        span.set_status(Status::error(format!("invalid path {:?}", &body.path)));
-        return HttpResponse::BadRequest().finish();
-    };
-
-    // Build the rel_path used to look up the row. Forward-slash normalized
-    // so the lookup matches DB rows on Windows — see `rel_path_for_lookup`.
-    let full_path_str = full_path.to_string_lossy().to_string();
-    let rel_path = rel_path_for_lookup(&full_path_str, &resolved_root);
-
-    // DB lookup first. Cheap and avoids re-reading the file off disk for
-    // already-ingested videos.
-    let hash_from_db: Option<String> = {
-        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
-        match dao.get_exif_batch(
-            &context,
-            Some(resolved_library_id),
-            std::slice::from_ref(&rel_path),
-        ) {
-            Ok(rows) => rows.into_iter().next().and_then(|r| r.content_hash),
-            Err(e) => {
-                warn!(
-                    "exif_dao.get_exif_batch failed for {} (lib {}): {:?}",
-                    rel_path, resolved_library_id, e
+        if let Some(path) = resolved {
+            if let Ok(child) = create_playlist(path.to_str().unwrap(), &playlist).await {
+                span.add_event(
+                    "playlist_created".to_string(),
+                    vec![KeyValue::new("playlist-name", filename.to_string())],
                );
-                None
+
+                span.set_status(Status::Ok);
+                app_state.stream_manager.do_send(ProcessMessage(
+                    playlist.clone(),
+                    child,
+                    // opentelemetry::Context::new().with_span(span),
+                ));
            }
+        } else {
+            span.set_status(Status::error(format!("invalid path {:?}", &body.path)));
+            return HttpResponse::BadRequest().finish();
        }
-    };

-    // Best-effort fallback: compute on-the-fly when the DB row hasn't
-    // been written or is mid-backfill. Read-only — no library mutation.
-    let content_hash_str = match hash_from_db {
-        Some(h) => h,
-        None => match content_hash::compute(&full_path) {
-            Ok(id) => id.content_hash,
-            Err(e) => {
-                error!(
-                    "Failed to compute content_hash for {}: {}",
-                    full_path.display(),
-                    e
-                );
-                span.set_status(Status::error(format!("hash compute failed: {}", e)));
-                return HttpResponse::InternalServerError().finish();
-            }
-        },
-    };
-
-    let video_dir = std::path::Path::new(&app_state.video_path);
-    let playlist_path = hls_paths::playlist_for_hash(video_dir, &content_hash_str);
-    let sentinel_path = hls_paths::sentinel_for_hash(video_dir, &content_hash_str);
-    let ready = playlist_path.exists();
-
-    if !ready && !sentinel_path.exists() {
-        // Kick off generation via the existing actor pipeline. Fire-and-
-        // forget — the playlist appears at `playlist_path` once ffmpeg
-        // + rename complete. The client polls the URL.
-        info!(
-            "/video/generate: queueing playlist for {} (hash={})",
-            full_path.display(),
-            &content_hash_str[..content_hash_str.len().min(16)]
-        );
-        app_state.playlist_manager.do_send(QueueVideosMessage {
-            videos: vec![VideoToQueue {
-                video_path: full_path.clone(),
-                content_hash: content_hash_str.clone(),
-            }],
-        });
-        span.add_event(
-            "playlist_queued",
-            vec![KeyValue::new("content_hash", content_hash_str.clone())],
-        );
-    } else if ready {
-        span.add_event(
-            "playlist_already_present",
-            vec![KeyValue::new("content_hash", content_hash_str.clone())],
-        );
+        HttpResponse::Ok().json(playlist)
    } else {
-        // Sentinel present — past transcode attempt failed. Return the
-        // URL anyway (it'll 404 / 5xx at fetch time) so the client gets
-        // a deterministic answer. Operator must delete the sentinel to
-        // force a retry.
-        warn!(
-            "/video/generate: unsupported sentinel present for {} (hash={}); not re-queueing",
-            full_path.display(),
-            &content_hash_str[..content_hash_str.len().min(16)]
-        );
+        let message = format!("Unable to get file name: {:?}", filename);
+        error!("{}", message);
+        span.set_status(Status::error(message));
+
+        HttpResponse::BadRequest().finish()
    }
-
-    let playlist_url = format!(
-        "/video/hls/{}/{}",
-        content_hash_str,
-        hls_paths::PLAYLIST_FILENAME
-    );
-
-    // Probe the source for frame rate so the mobile scrubber can step at
-    // the right interval. Cheap (~tens of ms) and only runs once per video
-    // open. Probe failures degrade silently — clients have a fallback.
-    let frame_rate = probe_video_stream_meta(&full_path.to_string_lossy())
-        .await
-        .frame_rate;
-
-    span.set_status(Status::Ok);
-    HttpResponse::Ok().json(GenerateVideoResponse {
-        playlist_url,
-        content_hash: content_hash_str,
-        ready,
-        frame_rate,
-    })
 }

-/// Serve HLS playlist or segment files under the hash-keyed layout
-/// `$VIDEO_PATH/<shard>/<hash>/<file>`. The matched `{file}` must be
-/// either `playlist.m3u8` or a `segment_NNN.ts` style segment; any other
-/// shape is 400'd to defend against operators stashing other content in
-/// the hash dir.
-#[get("/video/hls/{hash}/{file}")]
-pub async fn stream_hls_file(
+#[get("/video/stream")]
+pub async fn stream_video(
    request: HttpRequest,
    _: Claims,
-    path: web::Path<(String, String)>,
+    path: web::Query<ThumbnailRequest>,
+    app_state: Data<AppState>,
+) -> impl Responder {
+    let tracer = global::tracer("image-server");
+    let context = extract_context_from_request(&request);
+    let mut span = tracer.start_with_context("stream_video", &context);
+
+    let playlist = &path.path;
+    debug!("Playlist: {}", playlist);
+
+    // Only serve files under video_path (HLS playlists) or base_path (source videos)
+    if playlist.starts_with(&app_state.video_path)
+        || is_valid_full_path(&app_state.base_path, playlist, false).is_some()
+    {
+        match NamedFile::open(playlist) {
+            Ok(file) => {
+                span.set_status(Status::Ok);
+                file.into_response(&request)
+            }
+            _ => {
+                span.set_status(Status::error(format!("playlist not found {}", playlist)));
+                HttpResponse::NotFound().finish()
+            }
+        }
+    } else {
+        span.set_status(Status::error(format!("playlist not valid {}", playlist)));
+        HttpResponse::BadRequest().finish()
+    }
+}
+
+#[get("/video/{path}")]
+pub async fn get_video_part(
+    request: HttpRequest,
+    _: Claims,
+    path: web::Path<ThumbnailRequest>,
    app_state: Data<AppState>,
 ) -> impl Responder {
    let tracer = global_tracer();
    let context = extract_context_from_request(&request);
-    let mut span = tracer.start_with_context("stream_hls_file", &context);
+    let mut span = tracer.start_with_context("get_video_part", &context);

-    let (hash, file) = path.into_inner();
-    if !is_valid_hash(&hash) {
-        span.set_status(Status::error("invalid hash"));
-        return HttpResponse::BadRequest().body("invalid hash");
-    }
-    if !is_allowed_hls_filename(&file) {
-        span.set_status(Status::error("invalid file"));
-        return HttpResponse::BadRequest().body("invalid file");
-    }
+    let part = &path.path;
+    debug!("Video part: {}", part);

-    let shard = &hash[..2];
-    let file_path = PathBuf::from(&app_state.video_path)
-        .join(shard)
-        .join(&hash)
-        .join(&file);
+    let mut file_part = PathBuf::new();
+    file_part.push(app_state.video_path.clone());
+    file_part.push(part);

-    // Path-traversal guard: canonicalize both sides and require the file
-    // to live under `app_state.video_path`. `is_valid_hash` /
-    // `is_allowed_hls_filename` already block dangerous strings, but
-    // belt-and-suspenders here is cheap.
+    // Guard against directory traversal attacks
    let canonical_base = match std::fs::canonicalize(&app_state.video_path) {
-        Ok(p) => p,
+        Ok(path) => path,
        Err(e) => {
-            error!("Failed to canonicalize VIDEO_PATH: {:?}", e);
-            span.set_status(Status::error("VIDEO_PATH not canonicalisable"));
+            error!("Failed to canonicalize video path: {:?}", e);
+            span.set_status(Status::error("Invalid video path configuration"));
            return HttpResponse::InternalServerError().finish();
        }
    };
-    let canonical_file = match std::fs::canonicalize(&file_path) {
-        Ok(p) => p,
+
+    let canonical_file = match std::fs::canonicalize(&file_part) {
+        Ok(path) => path,
        Err(_) => {
-            debug!("HLS file not found: {}", file_path.display());
-            span.set_status(Status::error("not found"));
+            warn!("Video part not found or invalid: {:?}", file_part);
+            span.set_status(Status::error(format!("Video part not found '{}'", part)));
            return HttpResponse::NotFound().finish();
        }
    };
+
+    // Ensure the resolved path is still within the video directory
    if !canonical_file.starts_with(&canonical_base) {
-        warn!(
-            "Path traversal attempt: {} resolved outside VIDEO_PATH",
-            file_path.display()
-        );
-        span.set_status(Status::error("traversal"));
+        warn!("Directory traversal attempt detected: {:?}", part);
+        span.set_status(Status::error("Invalid video path"));
        return HttpResponse::Forbidden().finish();
    }

    match NamedFile::open(&canonical_file) {
-        Ok(f) => {
+        Ok(file) => {
            span.set_status(Status::Ok);
-            f.into_response(&request)
+            file.into_response(&request)
        }
-        Err(_) => {
-            span.set_status(Status::error("not found"));
+        _ => {
+            error!("Video part not found: {:?}", file_part);
+            span.set_status(Status::error(format!(
+                "Video part not found '{}'",
+                file_part.to_str().unwrap()
+            )));
            HttpResponse::NotFound().finish()
        }
    }
 }

-/// 64 lowercase-or-upper hex chars. Strict so we don't accept arbitrary
-/// strings that might canonicalize into trouble.
-fn is_valid_hash(s: &str) -> bool {
-    s.len() == 64 && s.bytes().all(|b| b.is_ascii_hexdigit())
-}
-
-/// Compute the forward-slash `rel_path` used to look up a video's
-/// `image_exif` row, from its absolute path string and the library root.
-///
-/// Normalizing to forward slashes is essential on Windows: `file_scan`
-/// stores rel_paths forward-slash regardless of OS, but a raw strip of a
-/// backslash Windows path (`Z:\...\pic\Melissa\clip.mp4`) yields
-/// `Melissa\clip.mp4`. `get_exif_batch` does an exact match with no
-/// normalization, so the backslash form misses and the handler falls back
-/// to re-hashing the entire file on every request.
-fn rel_path_for_lookup(full_path_str: &str, resolved_root: &str) -> String {
-    full_path_str
-        .strip_prefix(resolved_root)
-        .unwrap_or(full_path_str)
-        .trim_start_matches(['/', '\\'])
-        .replace('\\', "/")
-}
-
-/// Allowed file names inside a hash dir. `playlist.m3u8` plus segment
-/// files matching the `segment_NNN.ts` template that `PlaylistGenerator`
-/// writes via `hls_paths::SEGMENT_TEMPLATE`. Anything else (including
-/// `.tmp`, `.unsupported`, dotfiles) returns 400 — these are internal
-/// artifacts the client should never request.
-fn is_allowed_hls_filename(name: &str) -> bool {
-    if name == hls_paths::PLAYLIST_FILENAME {
-        return true;
-    }
-    if let Some(rest) = name.strip_prefix("segment_")
-        && let Some(num) = rest.strip_suffix(".ts")
-        && !num.is_empty()
-        && num.bytes().all(|b| b.is_ascii_digit())
-    {
-        return true;
-    }
-    false
-}
-
 #[get("/video/preview")]
 pub async fn get_video_preview(
    _claims: Claims,
@@ -550,98 +427,6 @@ mod tests {
    use crate::testhelpers::TestPreviewDao;
    use actix_web::App;

-    #[test]
-    fn is_valid_hash_requires_64_ascii_hex() {
-        assert!(is_valid_hash(&"a".repeat(64)));
-        assert!(is_valid_hash(&"F".repeat(64)));
-        assert!(is_valid_hash(&format!("ab{}", "0".repeat(62))));
-
-        assert!(!is_valid_hash(&"a".repeat(63)));
-        assert!(!is_valid_hash(&"a".repeat(65)));
-        // Anything outside the hex alphabet — including '/', '.', '..' —
-        // is rejected up front so the path-traversal canonicalisation
-        // never has to defend the boundary alone.
-        assert!(!is_valid_hash(&format!("/{}", "a".repeat(63))));
-        assert!(!is_valid_hash(&format!("..{}", "a".repeat(62))));
-        assert!(!is_valid_hash(&"g".repeat(64)));
-    }
-
-    #[test]
-    fn is_allowed_hls_filename_accepts_only_playlist_and_segments() {
-        assert!(is_allowed_hls_filename("playlist.m3u8"));
-        assert!(is_allowed_hls_filename("segment_000.ts"));
-        assert!(is_allowed_hls_filename("segment_999.ts"));
-        assert!(is_allowed_hls_filename("segment_0.ts"));
-
-        // Internal artifacts the client should never request.
-        assert!(!is_allowed_hls_filename("playlist.m3u8.tmp"));
-        assert!(!is_allowed_hls_filename("playlist.unsupported"));
-        // Traversal / path components — defence in depth alongside
-        // the actix path matcher itself.
-        assert!(!is_allowed_hls_filename(".."));
-        assert!(!is_allowed_hls_filename("../etc/passwd"));
-        assert!(!is_allowed_hls_filename("segment_abc.ts"));
-        assert!(!is_allowed_hls_filename("segment_.ts"));
-        assert!(!is_allowed_hls_filename(""));
-    }
-
-    #[test]
-    fn rel_path_for_lookup_normalizes_windows_separators() {
-        // Windows: backslash root + backslash full path. The stored row is
-        // forward-slash (`Melissa/clip.mp4`), so without normalization the
-        // lookup misses and the handler re-hashes the whole file.
-        assert_eq!(
-            rel_path_for_lookup(r"Z:\Media\pic\Melissa\clip.mp4", r"Z:\Media\pic"),
-            "Melissa/clip.mp4"
-        );
-    }
-
-    #[test]
-    fn rel_path_for_lookup_handles_unix_separators() {
-        assert_eq!(
-            rel_path_for_lookup("/media/pic/Melissa/clip.mp4", "/media/pic"),
-            "Melissa/clip.mp4"
-        );
-    }
-
-    #[test]
-    fn rel_path_for_lookup_file_at_root_has_no_separator() {
-        // A file directly in the library root has no internal separator, so
-        // the bug never manifested here — guard against a regression anyway.
-        assert_eq!(
-            rel_path_for_lookup(r"Z:\Media\pic\clip.mp4", r"Z:\Media\pic"),
-            "clip.mp4"
-        );
-        assert_eq!(
-            rel_path_for_lookup("/media/pic/clip.mp4", "/media/pic"),
-            "clip.mp4"
-        );
-    }
-
-    #[test]
-    fn rel_path_for_lookup_strips_leading_separators() {
-        // Both separator styles are trimmed from the front after the root
-        // is stripped, regardless of which form the join produced.
-        assert_eq!(
-            rel_path_for_lookup(r"Z:\Media\pic\sub\a.mp4", r"Z:\Media\pic"),
-            "sub/a.mp4"
-        );
-        assert_eq!(
-            rel_path_for_lookup("/media/pic//sub/a.mp4", "/media/pic"),
-            "sub/a.mp4"
-        );
-    }
-
-    #[test]
-    fn rel_path_for_lookup_falls_back_when_root_does_not_match() {
-        // If the root doesn't prefix the path (e.g. a stale mount), we keep
-        // the whole path but still normalize separators rather than panic.
-        assert_eq!(
-            rel_path_for_lookup(r"D:\other\Melissa\clip.mp4", r"Z:\Media\pic"),
-            "D:/other/Melissa/clip.mp4"
-        );
-    }
-
    fn make_token() -> String {
        let claims = Claims::valid_user("1".to_string());
        jsonwebtoken::encode(
@@ -1,409 +0,0 @@
-//! Per-library HLS readiness: Prometheus gauges + `/hls/stats` endpoint.
-//!
-//! The new hash-keyed pipeline transcodes lazily — most of a freshly
-//! mounted library is "pending" for the first hour, and operators want
-//! a live read on "how much work is left, am I CPU-bound, do I need to
-//! bump `HLS_CONCURRENCY`." This module supplies both surfaces against
-//! the same compute path:
-//!
-//! - **Prometheus gauges** `imageserver_hls_videos_total{library}`,
-//!   `..._with_playlist{library}`, `..._pending{library}`,
-//!   `..._unsupported{library}`. Updated every watcher full-scan tick
-//!   and on every `/hls/stats` request, so the freshness matches
-//!   whichever surface the operator is watching.
-//!
-//! - **`GET /hls/stats`** returns a JSON snapshot of the same counts
-//!   plus a top-level cross-library aggregate. Claims-protected
-//!   (matches every other authenticated read in this crate).
-//!
-//! Cost is O(distinct video hashes per library), each row needing a
-//! single `stat()` on the playlist file. On a 100k-video library that's
-//! noticeable; on a typical home library (few thousand) it's noise.
-//! We call from explicit triggers only — never per-request from
-//! middleware — so the cost is bounded.
-
-use std::collections::HashSet;
-use std::path::Path;
-use std::sync::{Arc, Mutex};
-
-use actix_web::{HttpResponse, Responder, get, web};
-use lazy_static::lazy_static;
-use log::{info, warn};
-use prometheus::IntGaugeVec;
-use serde::Serialize;
-
-use crate::data::Claims;
-use crate::database::ExifDao;
-use crate::file_types;
-use crate::libraries::Library;
-use crate::state::AppState;
-use crate::video::hls_paths;
-
-lazy_static! {
-    pub static ref HLS_VIDEOS_TOTAL: IntGaugeVec = IntGaugeVec::new(
-        prometheus::Opts::new(
-            "imageserver_hls_videos_total",
-            "Distinct video content hashes per library known to image_exif",
-        ),
-        &["library"],
-    )
-    .expect("HLS_VIDEOS_TOTAL");
-    pub static ref HLS_VIDEOS_WITH_PLAYLIST: IntGaugeVec = IntGaugeVec::new(
-        prometheus::Opts::new(
-            "imageserver_hls_videos_with_playlist",
-            "Videos whose hash-keyed HLS playlist is already on disk",
-        ),
-        &["library"],
-    )
-    .expect("HLS_VIDEOS_WITH_PLAYLIST");
-    pub static ref HLS_VIDEOS_PENDING: IntGaugeVec = IntGaugeVec::new(
-        prometheus::Opts::new(
-            "imageserver_hls_videos_pending",
-            "Videos whose hash-keyed HLS playlist is not yet on disk",
-        ),
-        &["library"],
-    )
-    .expect("HLS_VIDEOS_PENDING");
-    pub static ref HLS_VIDEOS_UNSUPPORTED: IntGaugeVec = IntGaugeVec::new(
-        prometheus::Opts::new(
-            "imageserver_hls_videos_unsupported",
-            "Videos with an `.unsupported` sentinel — ffmpeg refused; \
-             operator must delete to retry",
-        ),
-        &["library"],
-    )
-    .expect("HLS_VIDEOS_UNSUPPORTED");
-}
-
-/// Per-library HLS readiness snapshot.
-#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
-pub struct HlsLibraryStats {
-    pub library_id: i32,
-    pub library: String,
-    /// Distinct video content hashes (dedupes intra-library bytes-at-N-paths).
-    pub total: usize,
-    /// Of `total`, hashes whose `playlist.m3u8` is on disk.
-    pub with_playlist: usize,
-    /// Of `total`, hashes whose ffmpeg attempt left a `.unsupported`
-    /// sentinel. Counted separately because they won't progress without
-    /// operator intervention (delete the sentinel to retry).
-    pub unsupported: usize,
-    /// `total - (with_playlist + unsupported)` — videos awaiting transcode.
-    pub pending: usize,
-    /// Distinct rel_paths under this library that are video files but
-    /// whose `image_exif.content_hash` is still NULL (mid-backfill).
-    /// These don't yet count toward `total` because they're invisible
-    /// to the hash-keyed pipeline; surfaced so the operator can see
-    /// "hash backfill, then transcode" pipeline depth.
-    pub hashless_videos: usize,
-}
-
-/// JSON response body for `GET /hls/stats`.
-#[derive(Serialize, Debug)]
-pub struct HlsStatsResponse {
-    pub libraries: Vec<HlsLibraryStats>,
-    pub total: usize,
-    pub with_playlist: usize,
-    pub pending: usize,
-    pub unsupported: usize,
-    pub hashless_videos: usize,
-}
-
-/// Compute current readiness per library and publish to Prometheus.
-/// Returns the same data so callers can serialise it. The publish step
-/// is idempotent on the gauge — old values get overwritten.
-pub fn compute_and_publish(
-    libraries: &[Library],
-    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
-    video_dir: &Path,
-) -> Vec<HlsLibraryStats> {
-    let ctx = opentelemetry::Context::new();
-    let mut out = Vec::with_capacity(libraries.len());
-    for lib in libraries {
-        let stats = compute_for_library(&ctx, lib, exif_dao, video_dir);
-        publish_gauges(&stats);
-        out.push(stats);
-    }
-    out
-}
-
-fn publish_gauges(s: &HlsLibraryStats) {
-    HLS_VIDEOS_TOTAL
-        .with_label_values(&[s.library.as_str()])
-        .set(s.total as i64);
-    HLS_VIDEOS_WITH_PLAYLIST
-        .with_label_values(&[s.library.as_str()])
-        .set(s.with_playlist as i64);
-    HLS_VIDEOS_PENDING
-        .with_label_values(&[s.library.as_str()])
-        .set(s.pending as i64);
-    HLS_VIDEOS_UNSUPPORTED
-        .with_label_values(&[s.library.as_str()])
-        .set(s.unsupported as i64);
-}
-
-fn compute_for_library(
-    ctx: &opentelemetry::Context,
-    lib: &Library,
-    exif_dao: &Arc<Mutex<Box<dyn ExifDao>>>,
-    video_dir: &Path,
-) -> HlsLibraryStats {
-    let rows = {
-        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
-        match dao.list_paths_and_hashes_for_library(ctx, lib.id) {
-            Ok(r) => r,
-            Err(e) => {
-                warn!(
-                    "hls_stats: list_paths_and_hashes_for_library failed for lib {}: {:?}",
-                    lib.id, e
-                );
-                Vec::new()
-            }
-        }
-    };
-    stats_from_rows(lib, &rows, video_dir)
-}
-
-/// Pure function — same compute as [`compute_for_library`] but works
-/// on caller-supplied rows. Split out so tests don't need a full
-/// `ExifDao` mock; the integration path is exercised through
-/// `compute_and_publish` against the real SQLite DAO at runtime.
-fn stats_from_rows(
-    lib: &Library,
-    rows: &[(String, Option<String>)],
-    video_dir: &Path,
-) -> HlsLibraryStats {
-    let mut hashes: HashSet<String> = HashSet::new();
-    let mut hashless_videos = 0usize;
-    for (rel_path, hash_opt) in rows {
-        if !file_types::is_video_file(Path::new(rel_path)) {
-            continue;
-        }
-        match hash_opt {
-            Some(h) => {
-                hashes.insert(h.clone());
-            }
-            None => {
-                hashless_videos += 1;
-            }
-        }
-    }
-
-    let mut with_playlist = 0usize;
-    let mut unsupported = 0usize;
-    for h in &hashes {
-        if hls_paths::playlist_for_hash(video_dir, h).exists() {
-            with_playlist += 1;
-        } else if hls_paths::sentinel_for_hash(video_dir, h).exists() {
-            unsupported += 1;
-        }
-    }
-    let total = hashes.len();
-    let pending = total.saturating_sub(with_playlist + unsupported);
-
-    HlsLibraryStats {
-        library_id: lib.id,
-        library: lib.name.clone(),
-        total,
-        with_playlist,
-        unsupported,
-        pending,
-        hashless_videos,
-    }
-}
-
-/// Log a single info line summarising readiness across all libraries.
-/// Called by the watcher at the end of a full-scan tick so operators
-/// who tail the log see the headline number without scraping
-/// Prometheus.
-pub fn log_summary(stats: &[HlsLibraryStats]) {
-    let total: usize = stats.iter().map(|s| s.total).sum();
-    let with_playlist: usize = stats.iter().map(|s| s.with_playlist).sum();
-    let pending: usize = stats.iter().map(|s| s.pending).sum();
-    let unsupported: usize = stats.iter().map(|s| s.unsupported).sum();
-    let hashless: usize = stats.iter().map(|s| s.hashless_videos).sum();
-
-    let per_lib: Vec<String> = stats
-        .iter()
-        .map(|s| {
-            format!(
-                "{}={}/{} pending={} unsupported={} hashless={}",
-                s.library, s.with_playlist, s.total, s.pending, s.unsupported, s.hashless_videos,
-            )
-        })
-        .collect();
-
-    info!(
-        "HLS readiness: {}/{} playlists on disk, {} pending, {} unsupported, {} hashless videos | per-library: [{}]",
-        with_playlist,
-        total,
-        pending,
-        unsupported,
-        hashless,
-        per_lib.join(", "),
-    );
-}
-
-#[get("/hls/stats")]
-pub async fn hls_stats_handler(
-    _claims: Claims,
-    app_state: web::Data<AppState>,
-    exif_dao: web::Data<Mutex<Box<dyn ExifDao>>>,
-) -> impl Responder {
-    let libraries = app_state.libraries.clone();
-    let video_dir = std::path::PathBuf::from(&app_state.video_path);
-    let exif_dao = exif_dao.into_inner();
-
-    // Synchronous file IO + DB query — run on a blocking pool so the
-    // actix worker thread stays free for other requests.
-    let stats =
-        match web::block(move || compute_and_publish(&libraries, &exif_dao, &video_dir)).await {
-            Ok(s) => s,
-            Err(e) => {
-                warn!("/hls/stats: blocking task failed: {:?}", e);
-                Vec::new()
-            }
-        };
-
-    let total: usize = stats.iter().map(|s| s.total).sum();
-    let with_playlist: usize = stats.iter().map(|s| s.with_playlist).sum();
-    let pending: usize = stats.iter().map(|s| s.pending).sum();
-    let unsupported: usize = stats.iter().map(|s| s.unsupported).sum();
-    let hashless_videos: usize = stats.iter().map(|s| s.hashless_videos).sum();
-
-    HttpResponse::Ok().json(HlsStatsResponse {
-        libraries: stats,
-        total,
-        with_playlist,
-        pending,
-        unsupported,
-        hashless_videos,
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use tempfile::tempdir;
-
-    fn lib(id: i32, name: &str) -> Library {
-        Library {
-            id,
-            name: name.into(),
-            root_path: String::new(),
-            enabled: true,
-            excluded_dirs: Vec::new(),
-        }
-    }
-
-    fn rows(vs: Vec<(&str, Option<&str>)>) -> Vec<(String, Option<String>)> {
-        vs.into_iter()
-            .map(|(p, h)| (p.to_string(), h.map(|s| s.to_string())))
-            .collect()
-    }
-
-    fn touch(dir: &Path, rel: &str) {
-        let p = dir.join(rel);
-        std::fs::create_dir_all(p.parent().unwrap()).unwrap();
-        std::fs::write(p, b"").unwrap();
-    }
-
-    #[test]
-    fn videos_only_count_in_total() {
-        let tmp = tempdir().unwrap();
-        let r = rows(vec![
-            ("photos/IMG.jpg", Some(&"a".repeat(64))), // image: ignored
-            ("clip.mp4", Some(&"b".repeat(64))),
-            ("vid.mov", Some(&"c".repeat(64))),
-        ]);
-        let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
-        assert_eq!(stats.total, 2);
-        assert_eq!(stats.with_playlist, 0);
-        assert_eq!(stats.pending, 2);
-        assert_eq!(stats.unsupported, 0);
-        assert_eq!(stats.hashless_videos, 0);
-    }
-
-    #[test]
-    fn hash_dedup_collapses_duplicate_rel_paths() {
-        let tmp = tempdir().unwrap();
-        let r = rows(vec![
-            ("a/clip.mp4", Some(&"a".repeat(64))),
-            ("b/clip.mp4", Some(&"a".repeat(64))), // same bytes, dup
-            ("other.mp4", Some(&"b".repeat(64))),
-        ]);
-        let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
-        assert_eq!(stats.total, 2, "duplicate hashes collapse");
-    }
-
-    #[test]
-    fn playlist_existence_promotes_to_with_playlist() {
-        let tmp = tempdir().unwrap();
-        let hash = "a".repeat(64);
-        touch(tmp.path(), &format!("aa/{}/playlist.m3u8", hash));
-
-        let r = rows(vec![("clip.mp4", Some(&hash))]);
-        let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
-        assert_eq!(stats.total, 1);
-        assert_eq!(stats.with_playlist, 1);
-        assert_eq!(stats.pending, 0);
-    }
-
-    #[test]
-    fn sentinel_existence_promotes_to_unsupported() {
-        let tmp = tempdir().unwrap();
-        let hash = "b".repeat(64);
-        touch(tmp.path(), &format!("bb/{}/playlist.unsupported", hash));
-
-        let r = rows(vec![("clip.mov", Some(&hash))]);
-        let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
-        assert_eq!(stats.total, 1);
-        assert_eq!(stats.unsupported, 1);
-        assert_eq!(stats.with_playlist, 0);
-        assert_eq!(stats.pending, 0);
-    }
-
-    #[test]
-    fn null_hash_videos_are_hashless_not_total() {
-        let tmp = tempdir().unwrap();
-        let r = rows(vec![
-            ("clip.mp4", None),
-            ("other.mp4", Some(&"a".repeat(64))),
-        ]);
-        let stats = stats_from_rows(&lib(1, "main"), &r, tmp.path());
-        assert_eq!(stats.total, 1, "hashless row excluded from total");
-        assert_eq!(stats.hashless_videos, 1);
-    }
-
-    #[test]
-    fn publish_gauges_sets_per_library_value() {
-        let s = HlsLibraryStats {
-            library_id: 7,
-            library: "test_publish_a".into(),
-            total: 5,
-            with_playlist: 2,
-            pending: 3,
-            unsupported: 0,
-            hashless_videos: 0,
-        };
-        publish_gauges(&s);
-        assert_eq!(
-            HLS_VIDEOS_TOTAL
-                .with_label_values(&["test_publish_a"])
-                .get(),
-            5
-        );
-        assert_eq!(
-            HLS_VIDEOS_PENDING
-                .with_label_values(&["test_publish_a"])
-                .get(),
-            3
-        );
-        assert_eq!(
-            HLS_VIDEOS_WITH_PLAYLIST
-                .with_label_values(&["test_publish_a"])
-                .get(),
-            2
-        );
-    }
-}
@@ -444,7 +444,8 @@ where
            )
            .service(web::resource("/graph").route(web::get().to(get_graph::<D>)))
            .service(
-                web::resource("/predicate-stats").route(web::get().to(get_predicate_stats::<D>)),
+                web::resource("/predicate-stats")
+                    .route(web::get().to(get_predicate_stats::<D>)),
            )
            .service(
                web::resource("/predicates/{predicate}/bulk-reject")
@@ -803,36 +804,38 @@ async fn synthesize_merge<D: KnowledgeDao + 'static>(
            .json(serde_json::json!({"error": "source_id and target_id must differ"}));
    }

-    let (source, target) = {
-        let cx = opentelemetry::Context::current();
-        let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
+    let cx = opentelemetry::Context::current();
+    let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");

-        let source = match dao.get_entity_by_id(&cx, body.source_id) {
-            Ok(Some(e)) => e,
-            Ok(None) => {
-                return HttpResponse::BadRequest()
-                    .json(serde_json::json!({"error": "source entity not found"}));
-            }
-            Err(e) => {
-                log::error!("synthesize_merge source lookup: {:?}", e);
-                return HttpResponse::InternalServerError()
-                    .json(serde_json::json!({"error": "Database error"}));
-            }
-        };
-        let target = match dao.get_entity_by_id(&cx, body.target_id) {
-            Ok(Some(e)) => e,
-            Ok(None) => {
-                return HttpResponse::BadRequest()
-                    .json(serde_json::json!({"error": "target entity not found"}));
-            }
-            Err(e) => {
-                log::error!("synthesize_merge target lookup: {:?}", e);
-                return HttpResponse::InternalServerError()
-                    .json(serde_json::json!({"error": "Database error"}));
-            }
-        };
-        (source, target)
+    let source = match dao.get_entity_by_id(&cx, body.source_id) {
+        Ok(Some(e)) => e,
+        Ok(None) => {
+            return HttpResponse::BadRequest()
+                .json(serde_json::json!({"error": "source entity not found"}));
+        }
+        Err(e) => {
+            log::error!("synthesize_merge source lookup: {:?}", e);
+            return HttpResponse::InternalServerError()
+                .json(serde_json::json!({"error": "Database error"}));
+        }
    };
+    let target = match dao.get_entity_by_id(&cx, body.target_id) {
+        Ok(Some(e)) => e,
+        Ok(None) => {
+            return HttpResponse::BadRequest()
+                .json(serde_json::json!({"error": "target entity not found"}));
+        }
+        Err(e) => {
+            log::error!("synthesize_merge target lookup: {:?}", e);
+            return HttpResponse::InternalServerError()
+                .json(serde_json::json!({"error": "Database error"}));
+        }
+    };
+
+    // Drop the DAO lock before the LLM call — the generate request
+    // is the slow part (seconds) and we don't want to block other
+    // knowledge reads while it runs.
+    drop(dao);

    let source_desc = if source.description.trim().is_empty() {
        "(none)".to_string()
@@ -1258,8 +1261,12 @@ async fn bulk_reject_predicate<D: KnowledgeDao + 'static>(
    let persona = resolve_persona_filter(&req, &claims, &persona_dao);
    let cx = opentelemetry::Context::current();
    let mut dao = dao.lock().expect("Unable to lock KnowledgeDao");
-    match dao.bulk_reject_facts_by_predicate(&cx, &persona, &predicate, Some(("manual", "manual")))
-    {
+    match dao.bulk_reject_facts_by_predicate(
+        &cx,
+        &persona,
+        &predicate,
+        Some(("manual", "manual")),
+    ) {
        Ok(rejected) => HttpResponse::Ok().json(BulkRejectResponse { rejected }),
        Err(e) => {
            log::error!("bulk_reject_predicate error: {:?}", e);
@@ -7,8 +7,6 @@ pub mod ai;
 pub mod auth;
 pub mod bin_progress;
 pub mod cleanup;
-pub mod clip_search;
-pub mod clip_watch;
 pub mod content_hash;
 pub mod data;
 pub mod database;
@@ -35,7 +33,6 @@ pub mod tags;
 #[cfg(test)]
 pub mod testhelpers;
 pub mod thumbnails;
-pub mod unified_search;
 pub mod utils;
 pub mod video;

@@ -94,7 +94,7 @@ pub fn parse_excluded_dirs_column(raw: Option<&str>) -> Vec<String> {
    match raw {
        None => Vec::new(),
        Some(s) => s
-            .split([',', '\n', '\r'])
+            .split(|c: char| matches!(c, ',' | '\n' | '\r'))
            .map(str::trim)
            .filter(|s| !s.is_empty())
            .map(String::from)
@@ -148,7 +148,10 @@ pub fn validate_excluded_dirs_entry(entry: &str) -> Result<String, String> {
    if let Some(rel) = trimmed.strip_prefix('/') {
        // Path form. Reject `..` traversal — `base.join(\"../x\")` doesn't
        // canonicalise, so `path.starts_with(...)` never matches.
-        if rel.split('/').any(|seg| seg == "..") {
+        if rel
+            .split('/')
+            .any(|seg| seg == "..")
+        {
            return Err(format!(
                "'{}': '..' segments don't normalise — the prefix-match never fires",
                trimmed
@@ -291,11 +294,11 @@ pub fn seed_or_patch_from_env(conn: &mut SqliteConnection, base_path: &str) {
 }

 /// Resolve a library request parameter (accepts numeric id as string or name)
-/// against a list of libraries. Returns `Ok(None)` when the param is
+/// against the configured libraries. Returns `Ok(None)` when the param is
 /// absent, meaning "span all libraries". Returns `Err` when a value is
 /// provided but does not match any library.
 pub fn resolve_library_param<'a>(
-    libs: &'a [Library],
+    state: &'a AppState,
    param: Option<&str>,
 ) -> Result<Option<&'a Library>, String> {
    let Some(raw) = param.map(str::trim).filter(|s| !s.is_empty()) else {
@@ -303,29 +306,18 @@ pub fn resolve_library_param<'a>(
    };

    if let Ok(id) = raw.parse::<i32>() {
-        return libs
-            .iter()
-            .find(|l| l.id == id)
+        return state
+            .library_by_id(id)
            .map(Some)
            .ok_or_else(|| format!("unknown library id: {}", id));
    }

-    libs.iter()
-        .find(|l| l.name == raw)
+    state
+        .library_by_name(raw)
        .map(Some)
        .ok_or_else(|| format!("unknown library name: {}", raw))
 }

-/// Resolve a library request parameter against the AppState's libraries.
-/// Returns `Ok(None)` when the param is absent, meaning "span all libraries".
-/// Returns `Err` when a value is provided but does not match any library.
-pub fn resolve_library_param_state<'a>(
-    state: &'a AppState,
-    param: Option<&str>,
-) -> Result<Option<&'a Library>, String> {
-    resolve_library_param(&state.libraries, param)
-}
-
 /// Health of a library at a point in time. Probed at the top of each
 /// file-watcher tick. The `Stale` state is the "be conservative" signal:
 /// destructive paths (ingest writes, future move-handoff and orphan GC in
@@ -550,10 +542,7 @@ pub async fn patch_library(
        {
            Ok(n) => affected = affected.max(n),
            Err(e) => {
-                warn!(
-                    "PATCH /libraries/{}: enabled update failed: {:?}",
-                    lib_id, e
-                );
+                warn!("PATCH /libraries/{}: enabled update failed: {:?}", lib_id, e);
                return HttpResponse::InternalServerError().body(format!("{}", e));
            }
        }
@@ -611,9 +600,7 @@ pub async fn patch_library(
            );
            HttpResponse::Ok().json(lib)
        }
-        None => {
-            HttpResponse::NotFound().body(format!("library id {} not found after update", lib_id))
-        }
+        None => HttpResponse::NotFound().body(format!("library id {} not found after update", lib_id)),
    }
 }

@@ -673,6 +660,12 @@ mod tests {
        assert_eq!(abs, PathBuf::from("/tmp/media/2024/photo.jpg"));
    }

+    fn state_with_libraries(libs: Vec<Library>) -> AppState {
+        let mut state = AppState::test_state();
+        state.libraries = libs;
+        state
+    }
+
    fn sample_libraries() -> Vec<Library> {
        vec![
            Library {
@@ -692,52 +685,52 @@ mod tests {
        ]
    }

-    #[test]
-    fn resolve_library_param_absent_is_union() {
-        let libs = sample_libraries();
-        assert!(matches!(resolve_library_param(&libs, None), Ok(None)));
+    #[actix_rt::test]
+    async fn resolve_library_param_absent_is_union() {
+        let state = state_with_libraries(sample_libraries());
+        assert!(matches!(resolve_library_param(&state, None), Ok(None)));
    }

-    #[test]
-    fn resolve_library_param_empty_or_whitespace_is_union() {
-        let libs = sample_libraries();
-        assert!(matches!(resolve_library_param(&libs, Some("")), Ok(None)));
+    #[actix_rt::test]
+    async fn resolve_library_param_empty_or_whitespace_is_union() {
+        let state = state_with_libraries(sample_libraries());
+        assert!(matches!(resolve_library_param(&state, Some("")), Ok(None)));
        assert!(matches!(
-            resolve_library_param(&libs, Some("   ")),
+            resolve_library_param(&state, Some("   ")),
            Ok(None)
        ));
    }

-    #[test]
-    fn resolve_library_param_numeric_id_matches() {
-        let libs = sample_libraries();
-        let lib = resolve_library_param(&libs, Some("7"))
+    #[actix_rt::test]
+    async fn resolve_library_param_numeric_id_matches() {
+        let state = state_with_libraries(sample_libraries());
+        let lib = resolve_library_param(&state, Some("7"))
            .expect("valid id")
            .expect("some library");
        assert_eq!(lib.id, 7);
        assert_eq!(lib.name, "archive");
    }

-    #[test]
-    fn resolve_library_param_name_matches() {
-        let libs = sample_libraries();
-        let lib = resolve_library_param(&libs, Some("main"))
+    #[actix_rt::test]
+    async fn resolve_library_param_name_matches() {
+        let state = state_with_libraries(sample_libraries());
+        let lib = resolve_library_param(&state, Some("main"))
            .expect("valid name")
            .expect("some library");
        assert_eq!(lib.id, 1);
    }

-    #[test]
-    fn resolve_library_param_unknown_id_errs() {
-        let libs = sample_libraries();
-        let err = resolve_library_param(&libs, Some("999")).unwrap_err();
+    #[actix_rt::test]
+    async fn resolve_library_param_unknown_id_errs() {
+        let state = state_with_libraries(sample_libraries());
+        let err = resolve_library_param(&state, Some("999")).unwrap_err();
        assert!(err.contains("unknown library id"));
    }

-    #[test]
-    fn resolve_library_param_unknown_name_errs() {
-        let libs = sample_libraries();
-        let err = resolve_library_param(&libs, Some("missing")).unwrap_err();
+    #[actix_rt::test]
+    async fn resolve_library_param_unknown_name_errs() {
+        let state = state_with_libraries(sample_libraries());
+        let err = resolve_library_param(&state, Some("missing")).unwrap_err();
        assert!(err.contains("unknown library name"));
    }

@@ -937,7 +930,10 @@ mod tests {

    #[test]
    fn validate_strips_trailing_slash_on_path_entries() {
-        assert_eq!(validate_excluded_dirs_entry("/photos/").unwrap(), "/photos");
+        assert_eq!(
+            validate_excluded_dirs_entry("/photos/").unwrap(),
+            "/photos"
+        );
        assert_eq!(
            validate_excluded_dirs_entry("/photos//").unwrap(),
            "/photos"
@@ -1057,7 +1053,7 @@ mod tests {
            enabled: true,
            excluded_dirs: Vec::new(),
        };
-        let map = new_health_map(std::slice::from_ref(&lib));
+        let map = new_health_map(&[lib.clone()]);

        // First probe: empty dir, no prior data — Online.
        let s1 = refresh_health(&map, &lib, false);
@@ -296,7 +296,6 @@ impl GcStats {
            || self.revived > 0
    }

-    #[allow(dead_code)]
    pub fn total_deleted(&self) -> usize {
        self.deleted_face_detections + self.deleted_tagged_photo + self.deleted_photo_insights
    }
@@ -26,13 +26,12 @@ use crate::files::{RealFileSystem, move_file};
 use crate::service::ServiceBuilder;
 use crate::state::AppState;
 use crate::tags::*;
+use crate::video::actors::ScanDirectoryMessage;
 use log::{error, info};

 mod ai;
 mod auth;
 mod backfill;
-mod clip_search;
-mod clip_watch;
 mod content_hash;
 mod data;
 mod database;
@@ -47,14 +46,12 @@ mod file_types;
 mod files;
 mod geo;
 mod handlers;
-mod hls_stats;
 mod libraries;
 mod library_maintenance;
 mod perceptual_hash;
 mod state;
 mod tags;
 mod thumbnails;
-mod unified_search;
 mod utils;
 mod video;
 mod watcher;
@@ -63,7 +60,6 @@ mod knowledge;
 mod memories;
 mod otel;
 mod personas;
-mod reels;
 mod service;
 #[cfg(test)]
 mod testhelpers;
@@ -77,32 +73,6 @@ fn main() -> std::io::Result<()> {

    run_migrations(&mut connect()).expect("Failed to run migrations");

-    // Recover orphaned insight generation jobs from a previous crash.
-    {
-        use crate::database::{InsightGenerationJobDao, SqliteInsightGenerationJobDao};
-        let mut dao = SqliteInsightGenerationJobDao::new();
-        let ctx = opentelemetry::Context::new();
-        match dao.recover_orphaned_jobs(&ctx) {
-            Ok(n) if n > 0 => {
-                info!("Recovered {} orphaned insight generation jobs", n);
-            }
-            Ok(_) => {}
-            Err(e) => {
-                log::warn!("Failed to recover orphaned insight jobs: {:?}", e);
-            }
-        }
-    }
-
-    // One-shot retirement of the pre-content-hash HLS layout. Idempotent
-    // — a second boot finds nothing and reports zero deletions, so it's
-    // safe to leave wired in until the module is removed in a later
-    // release. Runs before the actor pipeline starts so we never race a
-    // PlaylistGenerator write against this rm.
-    {
-        let video_path = env::var("VIDEO_PATH").expect("VIDEO_PATH was not set in the env");
-        video::legacy_migration::retire_legacy_hls_output(std::path::Path::new(&video_path));
-    }
-
    let system = actix::System::new();
    system.block_on(async {
        // Just use basic logger when running a non-release build
@@ -147,32 +117,15 @@ fn main() -> std::io::Result<()> {
            .registry
            .register(Box::new(thumbnails::VIDEO_GAUGE.clone()))
            .unwrap();
-        // HLS readiness gauges. Updated by the watcher every full-scan
-        // tick and on every `/hls/stats` request. See `hls_stats`.
-        prometheus
-            .registry
-            .register(Box::new(hls_stats::HLS_VIDEOS_TOTAL.clone()))
-            .unwrap();
-        prometheus
-            .registry
-            .register(Box::new(hls_stats::HLS_VIDEOS_WITH_PLAYLIST.clone()))
-            .unwrap();
-        prometheus
-            .registry
-            .register(Box::new(hls_stats::HLS_VIDEOS_PENDING.clone()))
-            .unwrap();
-        prometheus
-            .registry
-            .register(Box::new(hls_stats::HLS_VIDEOS_UNSUPPORTED.clone()))
-            .unwrap();

        let app_state = app_data.clone();
+        for lib in &app_state.libraries {
+            app_state.playlist_manager.do_send(ScanDirectoryMessage {
+                directory: lib.root_path.clone(),
+            });
+        }

-        // Start file watcher with playlist manager and preview generator.
-        // The watcher's first tick is configured to be a full scan (see
-        // `watch_files`), so every library's missing HLS playlists are
-        // queued on that first iteration — no separate startup walk
-        // needed.
+        // Start file watcher with playlist manager and preview generator
        let playlist_mgr_for_watcher = app_state.playlist_manager.as_ref().clone();
        let preview_gen_for_watcher = app_state.preview_clip_generator.as_ref().clone();
        // Both background jobs read from the shared `live_libraries` lock
@@ -184,7 +137,6 @@ fn main() -> std::io::Result<()> {
            playlist_mgr_for_watcher,
            preview_gen_for_watcher,
            app_state.face_client.clone(),
-            app_state.clip_client.clone(),
            app_state.excluded_dirs.clone(),
            app_state.library_health.clone(),
        );
@@ -199,28 +151,6 @@ fn main() -> std::io::Result<()> {
            app_state.library_health.clone(),
        );

-        // Periodically clean up stale turn entries from the in-memory
-        // registry. Runs at the same interval as the configured timeout,
-        // drops entries older than that timeout.
-        {
-            let registry = app_state.turn_registry.clone();
-            let timeout_secs = registry.timeout_secs();
-            tokio::spawn(async move {
-                // Sweep at most every 5 minutes, and never less often than the
-                // timeout itself — otherwise entries could linger up to ~2× the
-                // configured timeout before being reclaimed.
-                let interval_secs = timeout_secs.clamp(1, 300);
-                let interval = tokio::time::Duration::from_secs(interval_secs);
-                loop {
-                    tokio::time::sleep(interval).await;
-                    let cleaned = registry.cleanup_stale().await;
-                    if cleaned > 0 {
-                        log::info!("TurnRegistry: cleaned up {cleaned} stale entries");
-                    }
-                }
-            });
-        }
-
        // Spawn background job to generate daily conversation summaries
        {
            use crate::ai::generate_daily_summaries;
@@ -268,11 +198,6 @@ fn main() -> std::io::Result<()> {
            }
        }

-        // Spawn the nightly pre-generation scheduler (Section D).
-        reels::spawn_pregen_scheduler(app_state.clone()).await;
-        // Spawn the on-disk reel-cache sweeper (bounds pre-gen + on-demand reels).
-        reels::spawn_reel_cache_sweeper(app_state.clone()).await;
-
        HttpServer::new(move || {
            let user_dao = SqliteUserDao::new();
            let favorites_dao = SqliteFavoriteDao::new();
@@ -328,27 +253,14 @@ fn main() -> std::io::Result<()> {
                .service(
                    web::resource("/photos/exif").route(web::get().to(files::list_exif_summary)),
                )
-                .service(
-                    // Semantic search via CLIP embeddings. See
-                    // src/clip_search.rs for the request/response shape.
-                    web::resource("/photos/search")
-                        .route(web::get().to(clip_search::search_photos)),
-                )
-                .service(
-                    // Unified natural-language search: LLM translates the
-                    // query into structured filters + a semantic term, then
-                    // filters constrain and CLIP ranks. See src/unified_search.rs.
-                    web::resource("/photos/search/unified")
-                        .route(web::get().to(unified_search::unified_search::<SqliteTagDao>)),
-                )
                .service(web::resource("/file/move").post(move_file::<RealFileSystem>))
                .service(handlers::image::get_image)
                .service(handlers::image::upload_image)
                .service(handlers::video::generate_video)
-                .service(handlers::video::stream_hls_file)
+                .service(handlers::video::stream_video)
                .service(handlers::video::get_video_preview)
                .service(handlers::video::get_preview_status)
-                .service(hls_stats::hls_stats_handler)
+                .service(handlers::video::get_video_part)
                .service(handlers::favorites::favorites)
                .service(handlers::favorites::put_add_favorite)
                .service(handlers::favorites::delete_favorite)
@@ -358,38 +270,19 @@ fn main() -> std::io::Result<()> {
                .service(handlers::image::clear_image_date)
                .service(handlers::image::get_full_exif)
                .service(memories::list_memories)
-                .service(reels::create_reel_handler)
-                .service(reels::reel_status_handler)
-                .service(reels::reel_video_handler)
-                .service(reels::precomputed_reel_handler)
-                .service(reels::precomputed_video_handler)
                .service(ai::generate_insight_handler)
                .service(ai::generate_agentic_insight_handler)
-                .service(ai::generation_status_handler)
-                .service(ai::cancel_generation_handler)
                .service(ai::get_insight_handler)
                .service(ai::delete_insight_handler)
                .service(ai::get_all_insights_handler)
-                .service(ai::get_insight_history_handler)
                .service(ai::get_available_models_handler)
                .service(ai::get_openrouter_models_handler)
                .service(ai::chat_turn_handler)
                .service(ai::chat_stream_handler)
                .service(ai::chat_history_handler)
                .service(ai::chat_rewind_handler)
-                .service(ai::turn_async_handler)
-                .service(ai::turn_replay_handler)
-                .service(ai::cancel_turn_handler)
                .service(ai::rate_insight_handler)
                .service(ai::export_training_data_handler)
-                .service(ai::tts_speech_handler)
-                .service(ai::create_speech_job_handler)
-                .service(ai::speech_job_status_handler)
-                .service(ai::cancel_speech_job_handler)
-                .service(ai::list_voices_handler)
-                .service(ai::create_voice_upload_handler)
-                .service(ai::create_voice_from_library_handler)
-                .service(ai::delete_voice_handler)
                .service(libraries::list_libraries)
                .service(libraries::patch_library)
                .add_feature(add_tag_services::<_, SqliteTagDao>)
@@ -349,6 +349,12 @@ pub async fn list_memories(
        opentelemetry::Context::new().with_remote_span_context(span.span_context().clone());

    let span_mode = q.span.unwrap_or(MemoriesSpan::Day);
+    let span_token = match span_mode {
+        MemoriesSpan::Day => "day",
+        MemoriesSpan::Week => "week",
+        MemoriesSpan::Month => "month",
+    };
+    let years_back: i32 = DEFAULT_YEARS_BACK;

    // The SQL filter expects a signed offset in minutes from UTC; default
    // 0 (UTC) when the client didn't send a hint. We also keep a chrono
@@ -360,66 +366,18 @@ pub async fn list_memories(
        .timezone_offset_minutes
        .and_then(|offset_mins| FixedOffset::east_opt(offset_mins * 60));

-    let items = match gather_memory_items(
-        &app_state,
-        &exif_dao,
-        &span_context,
-        span_mode,
-        tz_offset_minutes,
-        client_timezone,
-        q.library.as_deref(),
-    ) {
-        Ok(items) => items,
+    debug!(
+        "list_memories: span={:?} tz_offset_min={} years_back={}",
+        span_mode, tz_offset_minutes, years_back
+    );
+
+    let library = match crate::libraries::resolve_library_param(&app_state, q.library.as_deref()) {
+        Ok(lib) => lib,
        Err(msg) => {
            warn!("Rejecting /memories request: {}", msg);
            return HttpResponse::BadRequest().body(msg);
        }
    };
-
-    span.add_event(
-        "memories_scanned",
-        vec![
-            KeyValue::new("span", format!("{:?}", span_mode)),
-            KeyValue::new("years_back", DEFAULT_YEARS_BACK.to_string()),
-            KeyValue::new("result_count", items.len().to_string()),
-            KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()),
-            KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
-        ],
-    );
-    span.set_status(Status::Ok);
-
-    HttpResponse::Ok().json(MemoriesResponse { items })
-}
-
-/// Resolve an "on this day/week/month across past years" window into an
-/// ordered list of [`MemoryItem`]s. Shared by the `/memories` handler and the
-/// memory-reel selector so both honour the same library resolution, per-library
-/// exclusions, timezone handling, and sort order. Returns `Err(message)` only
-/// when the `library` param is invalid (callers map that to 400); per-library
-/// query/lock failures are logged and skipped, matching the handler's
-/// best-effort behaviour.
-pub fn gather_memory_items(
-    app_state: &AppState,
-    exif_dao: &Mutex<Box<dyn ExifDao>>,
-    span_context: &opentelemetry::Context,
-    span_mode: MemoriesSpan,
-    tz_offset_minutes: i32,
-    client_timezone: Option<FixedOffset>,
-    library_param: Option<&str>,
-) -> Result<Vec<MemoryItem>, String> {
-    let span_token = match span_mode {
-        MemoriesSpan::Day => "day",
-        MemoriesSpan::Week => "week",
-        MemoriesSpan::Month => "month",
-    };
-    let years_back: i32 = DEFAULT_YEARS_BACK;
-
-    debug!(
-        "gather_memory_items: span={:?} tz_offset_min={} years_back={}",
-        span_mode, tz_offset_minutes, years_back
-    );
-
-    let library = crate::libraries::resolve_library_param_state(app_state, library_param)?;
    let libraries_to_scan: Vec<&crate::libraries::Library> = match library {
        Some(lib) => vec![lib],
        None => app_state.libraries.iter().collect(),
@@ -436,7 +394,7 @@ pub fn gather_memory_items(

        let rows = match exif_dao.lock() {
            Ok(mut dao) => match dao.get_memories_in_window(
-                span_context,
+                &span_context,
                lib.id,
                span_token,
                years_back,
@@ -511,7 +469,21 @@ pub fn gather_memory_items(
        }
    }

-    Ok(memories_with_dates.into_iter().map(|(m, _)| m).collect())
+    let items: Vec<MemoryItem> = memories_with_dates.into_iter().map(|(m, _)| m).collect();
+
+    span.add_event(
+        "memories_scanned",
+        vec![
+            KeyValue::new("span", format!("{:?}", span_mode)),
+            KeyValue::new("years_back", years_back.to_string()),
+            KeyValue::new("result_count", items.len().to_string()),
+            KeyValue::new("tz_offset_minutes", tz_offset_minutes.to_string()),
+            KeyValue::new("excluded_dirs", format!("{:?}", app_state.excluded_dirs)),
+        ],
+    );
+    span.set_status(Status::Ok);
+
+    HttpResponse::Ok().json(MemoriesResponse { items })
 }

 #[cfg(test)]
@@ -1,742 +0,0 @@
-//! ffmpeg assembly for memory reels.
-//!
-//! Two-stage, per-segment design: each segment is rendered to its own
-//! normalized MP4 (identical codec/resolution/fps/timebase), then the segments
-//! are joined with the concat demuxer (stream copy, no re-encode). Rendering
-//! per segment — rather than one monster filtergraph — keeps each ffmpeg
-//! invocation simple to reason about, parallelizes naturally, and means a
-//! video-clip segment type (phase 2) slots in as just a different per-segment
-//! builder without touching the concat stage.
-//!
-//! The arg builders are pure (`Vec<String>` out) so the exact ffmpeg command
-//! is unit-testable; the runners spawn ffmpeg and surface stderr on failure.
-
-use anyhow::{Context, Result, bail};
-use std::path::Path;
-use tokio::process::Command;
-
-/// Re-exported so the reel pipeline reaches NVENC detection through this module
-/// rather than depending on `video::ffmpeg` directly.
-pub use crate::video::ffmpeg::is_nvenc_available;
-
-/// Reel canvas. Portrait, because reels are watched on a phone held upright —
-/// a landscape canvas letterboxes to a thin ~25%-height band there. Each photo
-/// is fitted sharp and centered over a blurred, zoomed copy of itself (see
-/// [`photo_filter_chain`]) so the frame is always filled regardless of the
-/// photo's orientation, without cropping the subject.
-pub const REEL_WIDTH: u32 = 1080;
-pub const REEL_HEIGHT: u32 = 1920;
-pub const REEL_FPS: u32 = 30;
-
-/// A beat's screen time is its narration length plus a short breath, with a
-/// floor so a terse line still lingers. No ceiling: the beat always covers the
-/// full narration so speech is never truncated — the scripter is asked to keep
-/// lines short instead.
-pub const MIN_SEGMENT_SECONDS: f64 = 2.5;
-const NARRATION_TAIL_SECONDS: f64 = 0.6;
-
-/// Fade durations baked into each photo. A held (single-photo) beat gets a
-/// gentle dip; burst photos get a much snappier fade so the difference between
-/// a held shot and a quick burst is obvious.
-const SINGLE_FADE_SECONDS: f64 = 0.35;
-const BURST_FADE_SECONDS: f64 = 0.12;
-
-/// Video-clip framing. Fallback cap on how much of a clip we read when the
-/// source length can't be probed; with a known length, a clip instead plays for
-/// as much of its beat as its footage allows (see [`clip_beat_plan`]). Its live
-/// audio is ducked to `CLIP_DUCK_VOLUME` under the narration.
-pub const CLIP_SECONDS: f64 = 5.0;
-const CLIP_DUCK_VOLUME: f64 = 0.35;
-
-/// Floor on how long each burst photo stays up, so a long line over many photos
-/// doesn't flash them subliminally. If the narration is too short to give every
-/// photo this much, the beat is stretched to fit.
-const MIN_BURST_PHOTO_SECONDS: f64 = 0.6;
-
-/// Base screen time for a beat given its narration length: narration + breath,
-/// floored. Used as the lower bound on a beat's total duration.
-pub fn segment_duration(narration_secs: f64) -> f64 {
-    let d = narration_secs + NARRATION_TAIL_SECONDS;
-    if d.is_finite() && d > MIN_SEGMENT_SECONDS {
-        d
-    } else {
-        MIN_SEGMENT_SECONDS
-    }
-}
-
-/// Split a beat into per-photo durations. The beat lasts at least its narration
-/// (so speech isn't cut) and at least `n × MIN_BURST_PHOTO_SECONDS` (so a fast
-/// burst stays legible); the photos share that total evenly. Returns
-/// `(total_seconds, per_photo_seconds)`.
-pub fn beat_durations(narration_secs: f64, n_photos: usize) -> (f64, Vec<f64>) {
-    let n = n_photos.max(1);
-    let base = segment_duration(narration_secs);
-    let min_total = n as f64 * MIN_BURST_PHOTO_SECONDS;
-    let total = if base > min_total { base } else { min_total };
-    let each = total / n as f64;
-    (total, vec![each; n])
-}
-
-/// Fade length to use for a beat of `n_photos` (gentle when held, snappy in a
-/// burst).
-fn fade_for(n_photos: usize) -> f64 {
-    if n_photos > 1 {
-        BURST_FADE_SECONDS
-    } else {
-        SINGLE_FADE_SECONDS
-    }
-}
-
-/// Options controlling per-segment rendering.
-#[derive(Debug, Clone, Copy)]
-pub struct SegmentOpts {
-    pub width: u32,
-    pub height: u32,
-    pub fps: u32,
-    pub nvenc: bool,
-}
-
-impl Default for SegmentOpts {
-    fn default() -> Self {
-        Self {
-            width: REEL_WIDTH,
-            height: REEL_HEIGHT,
-            fps: REEL_FPS,
-            nvenc: false,
-        }
-    }
-}
-
-/// Filter chain for one photo (input `idx`) producing the labelled output
-/// `[v{idx}]`. Splits the still into a background and foreground: the background
-/// is scaled to *cover* the canvas and heavily blurred; the foreground is
-/// scaled to *fit* and overlaid centered. This fills the portrait frame for any
-/// photo orientation — no black bars, no cropping of the subject — then a fade
-/// in/out softens the cut. Intermediate labels are suffixed with `idx` so
-/// several chains coexist in one `filter_complex`.
-///
-/// `fps` is normalized BEFORE the fades so the brightness ramp is computed on a
-/// true {fps}-frame timeline; otherwise the fade is sampled at the looped
-/// still's coarse cadence and duplicated up, which reads as a steppy dip.
-fn photo_filter_chain(idx: usize, opts: &SegmentOpts, duration: f64, fade: f64) -> String {
-    let (w, h, fps) = (opts.width, opts.height, opts.fps);
-    let fade_out_start = (duration - fade).max(0.0);
-    format!(
-        "[{idx}:v]split=2[bg{idx}][fg{idx}];\
-         [bg{idx}]scale={w}:{h}:force_original_aspect_ratio=increase,\
-         crop={w}:{h},boxblur=20:2[bgb{idx}];\
-         [fg{idx}]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs{idx}];\
-         [bgb{idx}][fgs{idx}]overlay=(W-w)/2:(H-h)/2,\
-         fps={fps},\
-         fade=t=in:st=0:d={fade},\
-         fade=t=out:st={fade_out_start:.3}:d={fade},\
-         setsar=1,format=yuv420p[v{idx}]"
-    )
-}
-
-/// Full `filter_complex` for a beat of `per_photo` durations: one chain per
-/// photo, concatenated into `[v]`, with the narration (the last input, index
-/// `per_photo.len()`) padded with trailing silence into `[a]`. A single-photo
-/// beat degenerates to one chain + `concat=n=1` (a passthrough).
-pub fn beat_filtergraph(opts: &SegmentOpts, per_photo: &[f64]) -> String {
-    let n = per_photo.len().max(1);
-    let fade = fade_for(n);
-    let chains: Vec<String> = per_photo
-        .iter()
-        .enumerate()
-        .map(|(i, &d)| photo_filter_chain(i, opts, d, fade))
-        .collect();
-    let concat_inputs: String = (0..n).map(|i| format!("[v{i}]")).collect();
-    format!(
-        "{chains};{concat_inputs}concat=n={n}:v=1:a=0[v];[{n}:a]apad[a]",
-        chains = chains.join(";")
-    )
-}
-
-fn video_encoder_args(nvenc: bool) -> Vec<String> {
-    if nvenc {
-        // p4 ≈ balanced; cq 23 ≈ libx264 crf 21. Matches the HLS transcode path.
-        [
-            "-c:v",
-            "h264_nvenc",
-            "-preset",
-            "p4",
-            "-cq",
-            "23",
-            "-pix_fmt",
-            "yuv420p",
-        ]
-    } else {
-        [
-            "-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-pix_fmt", "yuv420p",
-        ]
-    }
-    .iter()
-    .map(|s| s.to_string())
-    .collect()
-}
-
-/// Build the ffmpeg args that render one beat: each photo looped for its slice
-/// of the beat (filled to the portrait canvas with a blurred backdrop), the
-/// slices concatenated, and the single narration muxed over the whole thing.
-/// `total` bounds the output (and the apad'd audio) to the beat length.
-pub fn build_beat_args(
-    image_paths: &[String],
-    audio_path: &str,
-    out_path: &str,
-    per_photo: &[f64],
-    total: f64,
-    opts: &SegmentOpts,
-) -> Vec<String> {
-    let fps = opts.fps.to_string();
-    let mut args: Vec<String> = vec!["-y".into()];
-    if opts.nvenc {
-        args.extend(["-hwaccel".into(), "cuda".into()]);
-    }
-    // One looped-still input per photo, each bounded to its slice by an input
-    // `-t`; reading at the target `-framerate` gives the fades real frames to
-    // ramp across.
-    for (path, &dur) in image_paths.iter().zip(per_photo.iter()) {
-        args.extend([
-            "-framerate".into(),
-            fps.clone(),
-            "-loop".into(),
-            "1".into(),
-            "-t".into(),
-            format!("{dur:.3}"),
-            "-i".into(),
-            path.clone(),
-        ]);
-    }
-    args.extend([
-        "-i".into(),
-        audio_path.into(),
-        "-filter_complex".into(),
-        beat_filtergraph(opts, per_photo),
-        "-map".into(),
-        "[v]".into(),
-        "-map".into(),
-        "[a]".into(),
-        "-t".into(),
-        format!("{total:.3}"),
-        // Force constant frame rate so the beat (and the concatenated reel)
-        // plays at a steady {fps} rather than a variable cadence.
-        "-r".into(),
-        fps,
-    ]);
-    args.extend(video_encoder_args(opts.nvenc));
-    args.extend(
-        ["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-shortest"]
-            .iter()
-            .map(|s| s.to_string()),
-    );
-    args.push(out_path.into());
-    args
-}
-
-/// Build the concat-demuxer args that join rendered segments losslessly.
-/// `+faststart` moves the moov atom up front so the reel streams immediately
-/// on the mobile client. The output muxer is forced with `-f mp4` because we
-/// write to a `.tmp` path (atomic publish) whose extension ffmpeg can't map to
-/// a format on its own.
-pub fn build_concat_args(list_path: &str, out_path: &str) -> Vec<String> {
-    [
-        "-y",
-        "-f",
-        "concat",
-        "-safe",
-        "0",
-        "-i",
-        list_path,
-        "-c",
-        "copy",
-        "-movflags",
-        "+faststart",
-        "-f",
-        "mp4",
-        out_path,
-    ]
-    .iter()
-    .map(|s| s.to_string())
-    .collect()
-}
-
-/// Render the concat list file body. Each line points the demuxer at one
-/// segment; single quotes in paths are escaped per ffmpeg's concat syntax.
-pub fn build_concat_list(segment_paths: &[String]) -> String {
-    let mut out = String::new();
-    for p in segment_paths {
-        let escaped = p.replace('\'', r"'\''");
-        out.push_str(&format!("file '{escaped}'\n"));
-    }
-    out
-}
-
-async fn run_ffmpeg(args: &[String], what: &str) -> Result<()> {
-    let output = Command::new("ffmpeg")
-        .args(args)
-        .output()
-        .await
-        .with_context(|| format!("spawning ffmpeg for {what}"))?;
-    if !output.status.success() {
-        bail!(
-            "ffmpeg {what} failed: {}",
-            String::from_utf8_lossy(&output.stderr)
-        );
-    }
-    Ok(())
-}
-
-/// Render one beat to `out_path`: its photos shown in sequence (a held shot for
-/// one photo, a quick burst for several) under the single narration in
-/// `audio_path`, whose measured length sets the beat's pacing.
-pub async fn render_beat(
-    image_paths: &[std::path::PathBuf],
-    audio_path: &Path,
-    out_path: &Path,
-    narration_secs: f64,
-    opts: &SegmentOpts,
-) -> Result<()> {
-    if image_paths.is_empty() {
-        bail!("render_beat called with no images");
-    }
-    let (total, per_photo) = beat_durations(narration_secs, image_paths.len());
-    let paths: Vec<String> = image_paths
-        .iter()
-        .map(|p| p.to_string_lossy().to_string())
-        .collect();
-    let args = build_beat_args(
-        &paths,
-        &audio_path.to_string_lossy(),
-        &out_path.to_string_lossy(),
-        &per_photo,
-        total,
-        opts,
-    );
-    run_ffmpeg(&args, "beat render").await
-}
-
-// --- Video-clip beats --------------------------------------------------------
-
-/// Decide how long the clip plays and how long the whole beat lasts, from the
-/// source video's length (if known) and the narration length. Returns
-/// `(clip_dur, beat_total)`.
-///
-/// The beat always lasts long enough for the full narration. The clip plays for
-/// as much of that beat as its footage covers — so the motion fills the screen
-/// time rather than stopping early. We only freeze the last frame (the
-/// `beat_total - clip_dur` gap, handled by `tpad` in [`clip_video_filter`]) when
-/// the source video is genuinely shorter than the narration. Capping clip
-/// playback at a fixed length while the narration ran longer was what produced
-/// the second-or-two freeze that read as a glitchy pause before the transition.
-pub fn clip_beat_plan(source_dur: Option<f64>, narration_secs: f64) -> (f64, f64) {
-    let want = segment_duration(narration_secs);
-    let clip_dur = match source_dur {
-        // Known length: play up to the whole beat, but never past the source.
-        Some(d) if d > 0.0 => d.min(want),
-        // Unknown length: read up to the fallback cap; tpad covers any shortfall.
-        _ => want.min(CLIP_SECONDS),
-    };
-    (clip_dur, want.max(clip_dur))
-}
-
-/// Video chain for a clip beat: fill the clip to the portrait canvas (blurred
-/// backdrop, same look as photos), normalize fps, hold the last frame if the
-/// narration outlasts the clip (`tpad`), then fade. Produces `[v]`.
-fn clip_video_filter(opts: &SegmentOpts, clip_dur: f64, beat_total: f64) -> String {
-    let (w, h, fps) = (opts.width, opts.height, opts.fps);
-    let fade = SINGLE_FADE_SECONDS;
-    let hold = (beat_total - clip_dur).max(0.0);
-    let fade_out_start = (beat_total - fade).max(0.0);
-    // Freeze the final frame to cover narration that runs past the clip.
-    let tpad = if hold > 0.05 {
-        format!(",tpad=stop_mode=clone:stop_duration={hold:.3}")
-    } else {
-        String::new()
-    };
-    format!(
-        "[0:v]split=2[bg][fg];\
-         [bg]scale={w}:{h}:force_original_aspect_ratio=increase,\
-         crop={w}:{h},boxblur=20:2[bgb];\
-         [fg]scale={w}:{h}:force_original_aspect_ratio=decrease[fgs];\
-         [bgb][fgs]overlay=(W-w)/2:(H-h)/2,fps={fps}{tpad},\
-         fade=t=in:st=0:d={fade},fade=t=out:st={fade_out_start:.3}:d={fade},\
-         setsar=1,format=yuv420p[v]"
-    )
-}
-
-/// Audio chain for a clip beat. With a clip audio track, duck it under the
-/// narration and mix; without one, just the narration. Produces `[a]`.
-fn clip_audio_filter(has_audio: bool) -> String {
-    if has_audio {
-        format!(
-            "[0:a]volume={CLIP_DUCK_VOLUME}[duck];[1:a]apad[narr];\
-             [duck][narr]amix=inputs=2:duration=longest:normalize=0[a]"
-        )
-    } else {
-        "[1:a]apad[a]".to_string()
-    }
-}
-
-/// Full `filter_complex` for a clip beat (input 0 = clip, input 1 = narration).
-pub fn clip_beat_filtergraph(
-    opts: &SegmentOpts,
-    clip_dur: f64,
-    beat_total: f64,
-    has_audio: bool,
-) -> String {
-    format!(
-        "{};{}",
-        clip_video_filter(opts, clip_dur, beat_total),
-        clip_audio_filter(has_audio)
-    )
-}
-
-/// Build the ffmpeg args for a clip beat: the first `clip_dur` seconds of the
-/// source video, filled to the portrait canvas with its live audio ducked under
-/// the narration, bounded to `beat_total`.
-pub fn build_clip_beat_args(
-    clip_path: &str,
-    audio_path: &str,
-    out_path: &str,
-    clip_dur: f64,
-    beat_total: f64,
-    has_audio: bool,
-    opts: &SegmentOpts,
-) -> Vec<String> {
-    let fps = opts.fps.to_string();
-    let mut args: Vec<String> = vec!["-y".into()];
-    if opts.nvenc {
-        args.extend(["-hwaccel".into(), "cuda".into()]);
-    }
-    args.extend([
-        // Input `-t` limits the clip to its window; audio has none (apad fills).
-        "-t".into(),
-        format!("{clip_dur:.3}"),
-        "-i".into(),
-        clip_path.into(),
-        "-i".into(),
-        audio_path.into(),
-        "-filter_complex".into(),
-        clip_beat_filtergraph(opts, clip_dur, beat_total, has_audio),
-        "-map".into(),
-        "[v]".into(),
-        "-map".into(),
-        "[a]".into(),
-        "-t".into(),
-        format!("{beat_total:.3}"),
-        "-r".into(),
-        fps,
-    ]);
-    args.extend(video_encoder_args(opts.nvenc));
-    args.extend(
-        ["-c:a", "aac", "-b:a", "160k", "-ar", "48000"]
-            .iter()
-            .map(|s| s.to_string()),
-    );
-    args.push(out_path.into());
-    args
-}
-
-/// Whether a media file has at least one audio stream (so a clip beat knows
-/// whether to mix in live audio). Defaults to `false` on any probe failure.
-pub async fn has_audio_stream(path: &str) -> bool {
-    Command::new("ffprobe")
-        .args([
-            "-v",
-            "error",
-            "-select_streams",
-            "a",
-            "-show_entries",
-            "stream=index",
-            "-of",
-            "csv=p=0",
-            path,
-        ])
-        .output()
-        .await
-        .map(|out| !out.stdout.is_empty())
-        .unwrap_or(false)
-}
-
-/// Render one clip beat: a section of `clip_path` (capped at [`CLIP_SECONDS`],
-/// and to the source length) under the narration in `audio_path`. The beat
-/// lasts at least the narration, freezing the clip's last frame if needed.
-pub async fn render_clip_beat(
-    clip_path: &Path,
-    audio_path: &Path,
-    out_path: &Path,
-    narration_secs: f64,
-    opts: &SegmentOpts,
-) -> Result<()> {
-    let clip_str = clip_path.to_string_lossy().to_string();
-    // Play the clip for as much of the beat as its footage covers; freeze only
-    // when the source is genuinely shorter than the narration (see clip_beat_plan).
-    let source_dur = crate::video::ffmpeg::get_duration_seconds(&clip_str)
-        .await
-        .ok()
-        .flatten();
-    let (clip_dur, beat_total) = clip_beat_plan(source_dur, narration_secs);
-    let has_audio = has_audio_stream(&clip_str).await;
-
-    let args = build_clip_beat_args(
-        &clip_str,
-        &audio_path.to_string_lossy(),
-        &out_path.to_string_lossy(),
-        clip_dur,
-        beat_total,
-        has_audio,
-        opts,
-    );
-    run_ffmpeg(&args, "clip beat render").await
-}
-
-/// Join rendered segments into the final reel. Writes the concat list into the
-/// same directory as the output so relative paths and cleanup stay local.
-pub async fn concat_segments(segment_paths: &[String], out_path: &Path) -> Result<()> {
-    let list_path = out_path.with_extension("concat.txt");
-    let body = build_concat_list(segment_paths);
-    tokio::fs::write(&list_path, body)
-        .await
-        .context("writing concat list")?;
-    let args = build_concat_args(&list_path.to_string_lossy(), &out_path.to_string_lossy());
-    let result = run_ffmpeg(&args, "concat").await;
-    let _ = tokio::fs::remove_file(&list_path).await;
-    result
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn segment_duration_floors_short_lines() {
-        // A one-word narration still lingers at the floor.
-        assert_eq!(segment_duration(0.5), MIN_SEGMENT_SECONDS);
-        assert_eq!(segment_duration(0.0), MIN_SEGMENT_SECONDS);
-    }
-
-    #[test]
-    fn segment_duration_covers_full_narration_plus_tail() {
-        // No ceiling: a long line gets its full length so speech isn't cut.
-        assert!((segment_duration(5.0) - 5.6).abs() < 1e-9);
-        assert!((segment_duration(20.0) - 20.6).abs() < 1e-9);
-    }
-
-    #[test]
-    fn segment_duration_rejects_nonfinite() {
-        assert_eq!(segment_duration(f64::NAN), MIN_SEGMENT_SECONDS);
-        assert_eq!(segment_duration(f64::INFINITY), MIN_SEGMENT_SECONDS);
-    }
-
-    #[test]
-    fn beat_durations_single_photo_matches_base() {
-        let (total, per) = beat_durations(4.0, 1);
-        assert!((total - 4.6).abs() < 1e-9); // narration + tail
-        assert_eq!(per.len(), 1);
-        assert!((per[0] - 4.6).abs() < 1e-9);
-    }
-
-    #[test]
-    fn beat_durations_burst_splits_evenly() {
-        // 5 photos, narration 4.6s base → ~0.92s each (above the 0.6 floor).
-        let (total, per) = beat_durations(4.0, 5);
-        assert!((total - 4.6).abs() < 1e-9);
-        assert_eq!(per.len(), 5);
-        assert!((per.iter().sum::<f64>() - total).abs() < 1e-9);
-        assert!(per.iter().all(|&d| d >= MIN_BURST_PHOTO_SECONDS));
-    }
-
-    #[test]
-    fn beat_durations_stretches_when_narration_too_short_for_burst() {
-        // Floor narration (2.5s) over 10 photos would be 0.25s each — below the
-        // legibility floor, so the beat stretches to 10 × 0.6 = 6s.
-        let (total, per) = beat_durations(0.0, 10);
-        assert!((total - 6.0).abs() < 1e-9);
-        assert!(per.iter().all(|&d| (d - 0.6).abs() < 1e-9));
-    }
-
-    #[test]
-    fn beat_filtergraph_single_photo_fills_portrait_and_holds() {
-        let (_t, per) = beat_durations(4.0, 1);
-        let g = beat_filtergraph(&SegmentOpts::default(), &per);
-        assert!(g.contains("[0:v]split=2[bg0][fg0]"));
-        assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=increase"));
-        assert!(g.contains("crop=1080:1920"));
-        assert!(g.contains("scale=1080:1920:force_original_aspect_ratio=decrease"));
-        assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
-        // Single photo → concat of one, gentle fade, audio is input 1.
-        assert!(g.contains("concat=n=1:v=1:a=0[v]"));
-        assert!(g.contains("d=0.35")); // SINGLE_FADE
-        assert!(g.contains("[1:a]apad[a]"));
-    }
-
-    #[test]
-    fn beat_filtergraph_burst_chains_concats_and_snappy_fade() {
-        let (_t, per) = beat_durations(4.0, 3);
-        let g = beat_filtergraph(&SegmentOpts::default(), &per);
-        // One chain per photo with index-suffixed labels.
-        assert!(g.contains("[0:v]split") && g.contains("[1:v]split") && g.contains("[2:v]split"));
-        // Concatenated in order, audio is the 4th input (index 3).
-        assert!(g.contains("[v0][v1][v2]concat=n=3:v=1:a=0[v]"));
-        assert!(g.contains("[3:a]apad[a]"));
-        // Burst uses the much snappier fade (vs 0.35 for a held shot).
-        assert!(g.contains("d=0.12"));
-        assert!(!g.contains("d=0.35"));
-    }
-
-    #[test]
-    fn beat_filtergraph_normalizes_fps_before_fading() {
-        // fps must precede the fades on every chain (else the dip looks steppy).
-        let (_t, per) = beat_durations(4.0, 1);
-        let g = beat_filtergraph(&SegmentOpts::default(), &per);
-        let fps_at = g.find("fps=30").expect("fps in graph");
-        let fade_at = g.find("fade=t=in").expect("fade in graph");
-        assert!(fps_at < fade_at);
-    }
-
-    #[test]
-    fn beat_args_one_input_per_photo_plus_audio_bound_by_total() {
-        let (total, per) = beat_durations(4.0, 2);
-        let args = build_beat_args(
-            &["/a.jpg".into(), "/b.jpg".into()],
-            "/n.wav",
-            "/out.mp4",
-            &per,
-            total,
-            &SegmentOpts::default(),
-        );
-        let joined = args.join(" ");
-        // A looped-still input per photo, each with its slice -t, then the audio.
-        assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /a.jpg"));
-        assert!(joined.contains("-framerate 30 -loop 1 -t 2.300 -i /b.jpg"));
-        assert!(joined.contains("-i /n.wav"));
-        // Output bounded to the beat total and forced CFR.
-        assert!(joined.contains("-t 4.600"));
-        assert!(joined.contains("-r 30"));
-        assert!(joined.ends_with("/out.mp4"));
-    }
-
-    #[test]
-    fn beat_args_use_nvenc_and_cuda_when_enabled() {
-        let opts = SegmentOpts {
-            nvenc: true,
-            ..SegmentOpts::default()
-        };
-        let (total, per) = beat_durations(3.0, 1);
-        let args = build_beat_args(
-            &["/img.jpg".into()],
-            "/a.wav",
-            "/out.mp4",
-            &per,
-            total,
-            &opts,
-        );
-        let joined = args.join(" ");
-        assert!(joined.contains("-hwaccel cuda"));
-        assert!(joined.contains("h264_nvenc"));
-        assert!(!joined.contains("libx264"));
-    }
-
-    #[test]
-    fn clip_filter_ducks_audio_and_holds_last_frame_when_narration_longer() {
-        // 5s clip, 7s beat → 2s freeze of the last frame, ducked-audio mix.
-        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 7.0, true);
-        assert!(g.contains("tpad=stop_mode=clone:stop_duration=2.000"));
-        assert!(g.contains("volume=0.35"));
-        assert!(g.contains("amix=inputs=2"));
-        assert!(g.contains("[1:a]apad[narr]"));
-        // Fill applied to the clip too.
-        assert!(g.contains("boxblur"));
-        assert!(g.contains("overlay=(W-w)/2:(H-h)/2"));
-    }
-
-    #[test]
-    fn clip_beat_plan_plays_clip_through_the_whole_beat_when_source_is_long() {
-        // 30s source, 4s narration → beat is narration+tail (4.6), and the clip
-        // plays that whole 4.6s of motion: no freeze (clip_dur == beat_total).
-        let (clip_dur, beat_total) = clip_beat_plan(Some(30.0), 4.0);
-        assert!((beat_total - 4.6).abs() < 1e-9);
-        assert!((clip_dur - 4.6).abs() < 1e-9);
-        assert!((beat_total - clip_dur).abs() < 1e-9); // no hold
-    }
-
-    #[test]
-    fn clip_beat_plan_freezes_only_when_source_shorter_than_narration() {
-        // 2s source under a 4s narration → play all 2s, freeze the remainder.
-        let (clip_dur, beat_total) = clip_beat_plan(Some(2.0), 4.0);
-        assert!((clip_dur - 2.0).abs() < 1e-9);
-        assert!((beat_total - 4.6).abs() < 1e-9);
-        assert!(beat_total - clip_dur > 2.0); // unavoidable freeze gap
-    }
-
-    #[test]
-    fn clip_beat_plan_caps_read_when_source_length_unknown() {
-        // Probe failed: read up to the fallback cap, beat still covers narration.
-        let (clip_dur, beat_total) = clip_beat_plan(None, 8.0);
-        assert!((clip_dur - CLIP_SECONDS).abs() < 1e-9);
-        assert!((beat_total - 8.6).abs() < 1e-9);
-    }
-
-    #[test]
-    fn clip_filter_no_tpad_when_clip_covers_the_beat() {
-        // Clip at least as long as the beat → no freeze.
-        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, true);
-        assert!(!g.contains("tpad"));
-    }
-
-    #[test]
-    fn clip_filter_narration_only_without_clip_audio() {
-        let g = clip_beat_filtergraph(&SegmentOpts::default(), 5.0, 5.0, false);
-        assert!(!g.contains("amix"));
-        assert!(!g.contains("volume="));
-        assert!(g.contains("[1:a]apad[a]"));
-    }
-
-    #[test]
-    fn clip_beat_args_bound_clip_and_output() {
-        let args = build_clip_beat_args(
-            "/v.mp4",
-            "/n.wav",
-            "/out.mp4",
-            5.0,
-            6.6,
-            true,
-            &SegmentOpts::default(),
-        );
-        let joined = args.join(" ");
-        // Input -t bounds the clip read; output -t bounds the beat.
-        assert!(joined.contains("-t 5.000 -i /v.mp4"));
-        assert!(joined.contains("-i /n.wav"));
-        assert!(joined.contains("-t 6.600"));
-        assert!(joined.contains("-r 30"));
-        assert!(joined.ends_with("/out.mp4"));
-    }
-
-    #[test]
-    fn concat_args_stream_copy_with_faststart_and_forced_muxer() {
-        // Output goes to a .tmp path, so the muxer must be forced — ffmpeg
-        // can't infer mp4 from the extension (the bug this guards against).
-        let args = build_concat_args("/tmp/list.txt", "/out.mp4.tmp");
-        let joined = args.join(" ");
-        assert!(joined.contains("-f concat -safe 0 -i /tmp/list.txt"));
-        assert!(joined.contains("-c copy"));
-        assert!(joined.contains("+faststart"));
-        assert!(joined.contains("-f mp4"));
-        // The forced muxer must come before the output path.
-        let f_mp4 = args.windows(2).position(|w| w == ["-f", "mp4"]).unwrap();
-        let out = args.iter().position(|a| a == "/out.mp4.tmp").unwrap();
-        assert!(f_mp4 < out);
-    }
-
-    #[test]
-    fn concat_list_escapes_single_quotes() {
-        let body = build_concat_list(&[
-            "/tmp/seg_000.mp4".into(),
-            "/tmp/own's dir/seg_001.mp4".into(),
-        ]);
-        assert!(body.contains("file '/tmp/seg_000.mp4'\n"));
-        // The apostrophe is closed-escaped-reopened per ffmpeg concat syntax.
-        assert!(body.contains(r"own'\''s"));
-    }
-}
@@ -1,491 +0,0 @@
-//! Narration scripting for memory reels.
-//!
-//! One LLM call turns the planned beats (each carrying its date and, where
-//! available, its cached insight) into a short first-person narration line per
-//! beat plus a title for the reel. A beat may show several photos in a quick
-//! burst, so a line narrates the *moment*, not a single frame. We reuse the
-//! cached insight summary as the richest signal rather than re-running vision
-//! at reel time — that keeps reel generation off the GPU's vision slot.
-//!
-//! The prompt builder and response parser are pure so the contract is
-//! unit-testable; `generate_script` wires them to the LLM client.
-//!
-//! The agentic scripter (pre-generation) resolves the backend through the
-//! InsightGenerator, builds a read-only tool set, and runs a tool loop to
-//! ground the narration in retrieved context before asking for the final JSON.
-
-use anyhow::{Context, Result};
-use std::sync::Arc;
-
-use super::{PlannedBeat, ReelMeta};
-use crate::ai::backend::{BackendKind, SamplingOverrides};
-use crate::ai::insight_generator::InsightGenerator;
-use crate::ai::llamacpp::LlamaCppClient;
-use crate::ai::llm_client::{LlmClient, Tool};
-use crate::ai::ollama::ChatMessage;
-
-/// The narration for a whole reel: a title and one line per beat, in order.
-#[derive(Debug, Clone, PartialEq)]
-pub struct ReelScript {
-    pub title: String,
-    pub lines: Vec<String>,
-}
-
-const SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \
-slideshow of someone's own photos set to a spoken voiceover. Write warm, \
-specific, first-person narration as if the person is gently looking back on \
-their own memories. Each line plays over one moment, which may be a quick burst \
-of several photos, so narrate the moment as a whole rather than a single frame. \
-Be concrete and grounded in the details given; never invent names, places, or \
-events that aren't supported. Keep each line to one or two short sentences that \
-can be read aloud in a few seconds. Avoid generic filler like \"what a \
-wonderful day\" — if you have little to go on, simply describe the moment \
-plainly.";
-
-/// Agentic scripter system prompt: richer version that tells the model it may
-/// call read-only tools to ground each line.
-const AGENTIC_SYSTEM_PROMPT: &str = "You are narrating a personal memory reel — a short \
-slideshow of someone's own photos set to a spoken voiceover. Write warm, \
-specific, first-person narration as if the person is gently looking back on \
-their own memories. Each line plays over one moment, which may be a quick burst \
-of several photos, so narrate the moment as a whole rather than a single frame. \
-Be concrete and grounded in the details given; never invent names, places, or \
-events that aren't supported. Keep each line to one or two short sentences that \
-can be read aloud in a few seconds. Avoid generic filler like \"what a \
-wonderful day\" — if you have little to go on, simply describe the moment \
-plainly.\n\nYou may call read-only tools (search_rag, search_messages, \
-get_sms_messages, get_calendar_events, get_location_history, reverse_geocode, \
-get_personal_place_at, recall_entities, get_current_datetime) to ground each \
-line in real context — e.g. reverse_geocode a moment's GPS to name the place, \
-or check the calendar/messages around its date. Never invent details. Return \
-ONLY the JSON object, no prose or code fences.";
-
-/// Maximum agentic tool iterations for pre-generation. Tunable via
-/// `REEL_PREGEN_MAX_TOOL_ITERS` (default 8).
-fn reel_pregen_max_tool_iters() -> usize {
-    std::env::var("REEL_PREGEN_MAX_TOOL_ITERS")
-        .ok()
-        .and_then(|s| s.trim().parse::<usize>().ok())
-        .filter(|x| *x > 0)
-        .unwrap_or(8)
-}
-
-/// Build the (system, user) prompt pair for the scripter. The user message
-/// describes each beat in order and asks for strict JSON back.
-pub fn build_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> (String, String) {
-    let mut user = String::new();
-    user.push_str(&format!(
-        "This reel has {} moments surfaced as memories {}.\n\n",
-        beats.len(),
-        meta.span_phrase()
-    ));
-    if !meta.years.is_empty() {
-        let years: Vec<String> = meta.years.iter().map(|y| y.to_string()).collect();
-        user.push_str(&format!("They span the years: {}.\n\n", years.join(", ")));
-    }
-    user.push_str("Moments, in the order they will appear:\n");
-    for (i, beat) in beats.iter().enumerate() {
-        user.push_str(&format!("\n[{}]", i + 1));
-        if let Some(date) = beat.date_label() {
-            user.push_str(&format!(" {date}"));
-        }
-        if beat.is_clip() {
-            user.push_str(" (a video clip)");
-        } else if beat.media.len() > 1 {
-            user.push_str(&format!(" (a burst of {} photos)", beat.media.len()));
-        }
-        user.push('\n');
-        match (&beat.insight_title, &beat.insight_summary) {
-            (Some(t), Some(s)) if !s.trim().is_empty() => {
-                user.push_str(&format!("  Known context: {t} — {s}\n"));
-            }
-            (Some(t), _) => user.push_str(&format!("  Known context: {t}\n")),
-            (_, Some(s)) if !s.trim().is_empty() => {
-                user.push_str(&format!("  Known context: {s}\n"));
-            }
-            _ => user.push_str("  (no extra context — narrate plainly from the date)\n"),
-        }
-    }
-    user.push_str(&format!(
-        "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\
-         {{\"title\": \"<short reel title>\", \"segments\": [\"<line for moment 1>\", \
-         \"<line for moment 2>\", ... ]}}\n\
-         The \"segments\" array MUST have exactly {} items, one per moment in order.",
-        beats.len()
-    ));
-    (SYSTEM_PROMPT.to_string(), user)
-}
-
-/// Build a richer (system, user) prompt pair for the agentic scripter. The
-/// system prompt tells the model it may call read-only tools to ground each
-/// line. The user message uses the same per-beat enumeration as
-/// `build_script_messages` plus a GPS line per beat when available.
-pub fn build_agentic_script_messages(meta: &ReelMeta, beats: &[PlannedBeat]) -> Vec<ChatMessage> {
-    let mut user = String::new();
-    user.push_str(&format!(
-        "This reel has {} moments surfaced as memories {}.\n\n",
-        beats.len(),
-        meta.span_phrase()
-    ));
-    if !meta.years.is_empty() {
-        let years: Vec<String> = meta.years.iter().map(|y| y.to_string()).collect();
-        user.push_str(&format!("They span the years: {}.\n\n", years.join(", ")));
-    }
-    user.push_str("Moments, in the order they will appear:\n");
-    for (i, beat) in beats.iter().enumerate() {
-        user.push_str(&format!("\n[{}]", i + 1));
-        if let Some(date) = beat.date_label() {
-            user.push_str(&format!(" {date}"));
-        }
-        if beat.is_clip() {
-            user.push_str(" (a video clip)");
-        } else if beat.media.len() > 1 {
-            user.push_str(&format!(" (a burst of {} photos)", beat.media.len()));
-        }
-        if let Some((lat, lon)) = beat.gps {
-            user.push_str(&format!("\n  GPS: {:.4}, {:.4}", lat, lon));
-        }
-        user.push('\n');
-        match (&beat.insight_title, &beat.insight_summary) {
-            (Some(t), Some(s)) if !s.trim().is_empty() => {
-                user.push_str(&format!("  Known context: {t} — {s}\n"));
-            }
-            (Some(t), _) => user.push_str(&format!("  Known context: {t}\n")),
-            (_, Some(s)) if !s.trim().is_empty() => {
-                user.push_str(&format!("  Known context: {s}\n"));
-            }
-            _ => user.push_str("  (no extra context — narrate plainly from the date)\n"),
-        }
-    }
-    user.push_str(&format!(
-        "\nReturn ONLY a JSON object, no prose or code fences, shaped exactly:\n\
-         {{\"title\": \"<short reel title>\", \"segments\": [\"<line for moment 1>\", \
-         \"<line for moment 2>\", ... ]}}\n\
-         The \"segments\" array MUST have exactly {} items, one per moment in order.",
-        beats.len()
-    ));
-
-    vec![
-        ChatMessage::system(AGENTIC_SYSTEM_PROMPT.to_string()),
-        ChatMessage::user(user),
-    ]
-}
-
-/// Parse the model's response into a script with exactly `n` lines. Tolerant of
-/// code fences and surrounding prose, and of both `segments: [".."]` and
-/// `segments: [{"narration": ".."}]` shapes. Missing/extra lines are padded or
-/// truncated so the caller always gets `n` aligned to the segments.
-pub fn parse_script_response(raw: &str, n: usize) -> ReelScript {
-    let fallback_line = "A moment worth remembering.";
-    let value = extract_json_object(raw);
-
-    let title = value
-        .as_ref()
-        .and_then(|v| v.get("title"))
-        .and_then(|t| t.as_str())
-        .map(clean_text)
-        .filter(|s| !s.is_empty())
-        .unwrap_or_else(|| "Memories".to_string());
-
-    let mut lines: Vec<String> = value
-        .as_ref()
-        .and_then(|v| v.get("segments"))
-        .and_then(|s| s.as_array())
-        .map(|arr| {
-            arr.iter()
-                .map(|item| {
-                    let text = item
-                        .as_str()
-                        .map(|s| s.to_string())
-                        .or_else(|| {
-                            item.get("narration")
-                                .and_then(|n| n.as_str())
-                                .map(|s| s.to_string())
-                        })
-                        .unwrap_or_default();
-                    clean_text(&text)
-                })
-                .collect()
-        })
-        .unwrap_or_default();
-
-    // Align to exactly n: drop extras, pad shortfalls with a neutral line so
-    // every photo still gets spoken audio.
-    lines.truncate(n);
-    while lines.len() < n {
-        lines.push(fallback_line.to_string());
-    }
-    for line in lines.iter_mut() {
-        if line.is_empty() {
-            *line = fallback_line.to_string();
-        }
-    }
-
-    ReelScript { title, lines }
-}
-
-/// Pull the first balanced top-level JSON object out of a possibly-noisy model
-/// response (code fences, leading prose). Returns None if nothing parses.
-fn extract_json_object(raw: &str) -> Option<serde_json::Value> {
-    // Fast path: the whole thing is valid JSON.
-    if let Ok(v) = serde_json::from_str::<serde_json::Value>(raw.trim()) {
-        return Some(v);
-    }
-    // Otherwise scan for the first '{' ... matching '}' span, ignoring braces
-    // inside strings.
-    let bytes = raw.as_bytes();
-    let start = raw.find('{')?;
-    let mut depth = 0i32;
-    let mut in_str = false;
-    let mut escaped = false;
-    for i in start..bytes.len() {
-        let c = bytes[i] as char;
-        if in_str {
-            if escaped {
-                escaped = false;
-            } else if c == '\\' {
-                escaped = true;
-            } else if c == '"' {
-                in_str = false;
-            }
-            continue;
-        }
-        match c {
-            '"' => in_str = true,
-            '{' => depth += 1,
-            '}' => {
-                depth -= 1;
-                if depth == 0 {
-                    return serde_json::from_str(&raw[start..=i]).ok();
-                }
-            }
-            _ => {}
-        }
-    }
-    None
-}
-
-/// Collapse whitespace and strip stray markdown/quote decorations a model
-/// sometimes leaves around a line.
-fn clean_text(s: &str) -> String {
-    let trimmed = s.trim().trim_matches('"').trim();
-    trimmed.split_whitespace().collect::<Vec<_>>().join(" ")
-}
-
-/// Generate the reel script via the LLM. Text-only (no images) — the per-beat
-/// context comes from cached insights. The call takes the GPU read lease
-/// internally (see `LlamaCppClient::generate`).
-pub async fn generate_script(
-    client: &Arc<LlamaCppClient>,
-    meta: &ReelMeta,
-    beats: &[PlannedBeat],
-) -> Result<ReelScript> {
-    let (system, user) = build_script_messages(meta, beats);
-    let raw = client
-        .generate(&user, Some(&system), None)
-        .await
-        .context("LLM script generation failed")?;
-    Ok(parse_script_response(&raw, beats.len()))
-}
-
-/// Agentic version of script generation: resolves the backend via the
-/// InsightGenerator (honouring LLM_BACKEND, model overrides, etc.), builds
-/// a read-only tool set, runs the tool loop, then parses the JSON response.
-/// Returns the same ReelScript shape. On failure the caller may fall back to
-/// `generate_script`.
-pub async fn generate_script_agentic(
-    generator: &InsightGenerator,
-    meta: &ReelMeta,
-    beats: &[PlannedBeat],
-) -> Result<ReelScript> {
-    // 1. Resolve the backend. Bail if the local model lacks tool-calling.
-    let backend = generator
-        .resolve_backend(
-            BackendKind::Local,
-            &SamplingOverrides {
-                model: None,
-                num_ctx: None,
-                temperature: None,
-                top_p: None,
-                top_k: None,
-                min_p: None,
-                enable_thinking: None,
-            },
-        )
-        .await
-        .context("resolving backend for agentic script")?;
-
-    // 2. Build the read-only tool set. Start from the persona gate (no
-    //    persona context, so corrections are closed), force has_vision=false,
-    //    then filter out write tools.
-    let gate = generator.current_gate_opts_for_persona(false, None);
-    let all_tools = InsightGenerator::build_tool_definitions(gate);
-    // Whole-reel calls have no single photo and no authenticated user, so the
-    // loop runs execute_tool with empty file/image context and user_id=0. Only
-    // tools that work without that context are useful here — photo/user-bound
-    // tools (get_file_tags, get_faces_in_photo, recall_facts_for_photo,
-    // recall_facts_for_entity) would just no-op or error, burning iterations,
-    // so they're excluded.
-    let read_only_names: std::collections::HashSet<&str> = [
-        "search_rag",
-        "search_messages",
-        "get_sms_messages",
-        "get_calendar_events",
-        "get_location_history",
-        "reverse_geocode",
-        "get_personal_place_at",
-        "recall_entities",
-        "get_current_datetime",
-    ]
-    .into_iter()
-    .collect();
-    let tools: Vec<Tool> = all_tools
-        .into_iter()
-        .filter(|t| read_only_names.contains(t.function.name.as_str()))
-        .collect();
-
-    // 3. Build the agentic prompt messages.
-    let messages = build_agentic_script_messages(meta, beats);
-
-    // 4. Run the tool loop.
-    let max_iter = reel_pregen_max_tool_iters();
-    let raw = generator
-        .run_readonly_tool_loop(&backend, messages, tools, max_iter)
-        .await
-        .context("agentic tool loop failed")?;
-
-    // 5. Strip any think-blocks the model may have emitted, then parse.
-    let raw = crate::ai::llm_client::strip_think_blocks(&raw);
-    Ok(parse_script_response(&raw, beats.len()))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::memories::MemoriesSpan;
-
-    fn meta() -> ReelMeta {
-        ReelMeta {
-            span: MemoriesSpan::Day,
-            years: vec![2019, 2021],
-        }
-    }
-
-    fn planned(n: usize) -> Vec<PlannedBeat> {
-        (0..n)
-            .map(|i| PlannedBeat {
-                media: vec![super::super::SegmentMedia::Photo {
-                    rel_path: format!("p{i}.jpg"),
-                    library_id: 1,
-                }],
-                date: Some(1_560_000_000 + i as i64 * 86_400),
-                insight_title: None,
-                insight_summary: None,
-                gps: None,
-            })
-            .collect()
-    }
-
-    #[test]
-    fn prompt_states_exact_moment_count_and_span() {
-        let (sys, user) = build_script_messages(&meta(), &planned(3));
-        assert!(sys.contains("memory reel"));
-        assert!(user.contains("3 moments"));
-        assert!(user.contains("on this day"));
-        assert!(user.contains("exactly 3 items"));
-        // Each moment gets an indexed entry.
-        assert!(user.contains("[1]") && user.contains("[2]") && user.contains("[3]"));
-    }
-
-    #[test]
-    fn prompt_notes_burst_photo_count() {
-        let mut p = planned(1);
-        p[0].media = vec![
-            super::super::SegmentMedia::Photo {
-                rel_path: "a.jpg".into(),
-                library_id: 1,
-            },
-            super::super::SegmentMedia::Photo {
-                rel_path: "b.jpg".into(),
-                library_id: 1,
-            },
-            super::super::SegmentMedia::Photo {
-                rel_path: "c.jpg".into(),
-                library_id: 1,
-            },
-        ];
-        let (_sys, user) = build_script_messages(&meta(), &p);
-        assert!(user.contains("a burst of 3 photos"));
-    }
-
-    #[test]
-    fn prompt_marks_clip_beats() {
-        let mut p = planned(1);
-        p[0].media = vec![super::super::SegmentMedia::Clip {
-            rel_path: "v.mp4".into(),
-            library_id: 1,
-        }];
-        let (_sys, user) = build_script_messages(&meta(), &p);
-        assert!(user.contains("a video clip"));
-    }
-
-    #[test]
-    fn prompt_includes_insight_context_when_present() {
-        let mut p = planned(1);
-        p[0].insight_title = Some("Lake house weekend".into());
-        p[0].insight_summary = Some("Swimming with the dogs.".into());
-        let (_sys, user) = build_script_messages(&meta(), &p);
-        assert!(user.contains("Lake house weekend — Swimming with the dogs."));
-    }
-
-    #[test]
-    fn parse_plain_json_object() {
-        let raw = r#"{"title":"Summer Days","segments":["First line.","Second line."]}"#;
-        let script = parse_script_response(raw, 2);
-        assert_eq!(script.title, "Summer Days");
-        assert_eq!(script.lines, vec!["First line.", "Second line."]);
-    }
-
-    #[test]
-    fn parse_tolerates_code_fences_and_prose() {
-        let raw = "Sure! Here's your reel:\n```json\n{\"title\": \"Trip\", \"segments\": [\"A.\", \"B.\"]}\n```\nEnjoy!";
-        let script = parse_script_response(raw, 2);
-        assert_eq!(script.title, "Trip");
-        assert_eq!(script.lines, vec!["A.", "B."]);
-    }
-
-    #[test]
-    fn parse_accepts_object_segment_shape() {
-        let raw = r#"{"title":"T","segments":[{"narration":"One."},{"narration":"Two."}]}"#;
-        let script = parse_script_response(raw, 2);
-        assert_eq!(script.lines, vec!["One.", "Two."]);
-    }
-
-    #[test]
-    fn parse_pads_short_and_truncates_long_to_n() {
-        // Model returned 1 line but we have 3 segments → pad with neutral lines.
-        let short = parse_script_response(r#"{"title":"T","segments":["Only one."]}"#, 3);
-        assert_eq!(short.lines.len(), 3);
-        assert_eq!(short.lines[0], "Only one.");
-        assert!(!short.lines[1].is_empty());
-
-        // Model returned 3 but we have 2 → truncate.
-        let long = parse_script_response(r#"{"title":"T","segments":["a","b","c"]}"#, 2);
-        assert_eq!(long.lines, vec!["a", "b"]);
-    }
-
-    #[test]
-    fn parse_falls_back_on_garbage() {
-        let script = parse_script_response("the model said no", 2);
-        assert_eq!(script.title, "Memories");
-        assert_eq!(script.lines.len(), 2);
-        assert!(script.lines.iter().all(|l| !l.is_empty()));
-    }
-
-    #[test]
-    fn parse_blank_line_replaced_with_fallback() {
-        let script = parse_script_response(r#"{"title":"T","segments":["  ","Real."]}"#, 2);
-        assert!(!script.lines[0].is_empty());
-        assert_eq!(script.lines[1], "Real.");
-    }
-}
@@ -1,560 +0,0 @@
-//! Reel selectors: resolve "what goes in the reel" into an ordered media set
-//! plus the metadata the scripter needs. The renderer and scripter are
-//! selector-agnostic, so adding tag- or date-range-based reels later means
-//! adding a variant here, not touching the pipeline.
-//!
-//! Resolution is split in two so the handler can compute a cache key (and
-//! short-circuit on a cache hit) without the per-photo insight lookups:
-//! [`resolve`] is the cheap media-set pass; [`enrich`] adds cached insights and
-//! runs in the background job.
-
-use std::path::Path;
-use std::sync::Mutex;
-
-use chrono::{DateTime, Datelike, FixedOffset};
-
-use super::{PlannedBeat, ReelMeta, SegmentMedia};
-use crate::database::{ExifDao, InsightDao};
-use crate::file_types::{is_image_file, is_video_file};
-use crate::memories::{self, MemoriesSpan};
-use crate::state::AppState;
-
-/// Default and hard caps on how many photos a reel covers. The default is an
-/// upper bound on the request; the effective count is usually smaller, set by
-/// the duration budget (see [`budget_segments`]). The hard cap bounds work per
-/// reel regardless.
-pub const DEFAULT_MAX_SEGMENTS: usize = 40;
-pub const HARD_MAX_SEGMENTS: usize = 40;
-
-/// Target reel length. Week and especially month spans can surface hundreds of
-/// photos; at a few seconds of narration each, a naive reel runs minutes. We
-/// cap the segment count to keep the reel near this length. Tunable via
-/// `REEL_TARGET_SECONDS`.
-const DEFAULT_TARGET_REEL_SECONDS: f64 = 90.0;
-
-/// Rough average wall-time per photo segment (a short narration line + the
-/// silent tail). Only used to turn the duration target into a segment count;
-/// the real per-segment time is the measured narration length.
-const EST_SECONDS_PER_SEGMENT: f64 = 5.0;
-
-/// Time gap that separates one "event/moment" from the next when clustering a
-/// span's photos. Photos within a few hours are treated as the same occasion
-/// (and across years/days the gaps are far larger, so each instance clusters
-/// on its own). 4 hours splits e.g. a morning hike from an evening dinner.
-const EVENT_GAP_SECONDS: i64 = 4 * 3600;
-
-fn target_reel_seconds() -> f64 {
-    std::env::var("REEL_TARGET_SECONDS")
-        .ok()
-        .and_then(|s| s.trim().parse::<f64>().ok())
-        .filter(|x| x.is_finite() && *x > 0.0)
-        .unwrap_or(DEFAULT_TARGET_REEL_SECONDS)
-}
-
-/// How many photo segments fit the duration budget, bounded by the request's
-/// max and the hard cap. This is what keeps week/month reels from running long.
-pub fn budget_segments(requested_max: usize) -> usize {
-    let by_budget = (target_reel_seconds() / EST_SECONDS_PER_SEGMENT).floor() as usize;
-    by_budget.min(requested_max).clamp(1, HARD_MAX_SEGMENTS)
-}
-
-/// What a reel is built from. v1 ships the memories (on this day/week/month)
-/// selector; tag and date-range variants slot in here later.
-#[derive(Debug, Clone)]
-pub enum ReelSelector {
-    Memories {
-        span: MemoriesSpan,
-        tz_offset_minutes: i32,
-        library: Option<String>,
-        max_segments: usize,
-    },
-}
-
-impl ReelSelector {
-    /// Stable string identity for the cache key. Captures everything that
-    /// changes *which* media is selected (but not the non-deterministic
-    /// narration, which can't be part of a pre-render key).
-    pub fn descriptor(&self) -> String {
-        match self {
-            ReelSelector::Memories {
-                span,
-                tz_offset_minutes,
-                library,
-                max_segments,
-            } => format!(
-                "memories:span={:?}:tz={}:lib={}:max={}",
-                span,
-                tz_offset_minutes,
-                library.as_deref().unwrap_or("all"),
-                max_segments
-            ),
-        }
-    }
-}
-
-/// Pick at most `max` items spread evenly across the input, always keeping the
-/// first and last. Returns the input unchanged when it already fits.
-pub fn sample_evenly<T: Clone>(items: &[T], max: usize) -> Vec<T> {
-    if max == 0 {
-        return Vec::new();
-    }
-    if items.len() <= max {
-        return items.to_vec();
-    }
-    if max == 1 {
-        return vec![items[0].clone()];
-    }
-    let last = items.len() - 1;
-    (0..max)
-        .map(|i| {
-            // Spread indices 0..=last across max picks, endpoints included.
-            let idx = (i * last + (max - 1) / 2) / (max - 1);
-            items[idx.min(last)].clone()
-        })
-        .collect()
-}
-
-/// Group time-sorted items into events by gap: a new event starts whenever the
-/// jump from the previous photo exceeds `gap_seconds`. Preserves order; items
-/// without a timestamp extend the current event.
-fn cluster_by_gap(
-    items: &[memories::MemoryItem],
-    gap_seconds: i64,
-) -> Vec<Vec<memories::MemoryItem>> {
-    let mut clusters: Vec<Vec<memories::MemoryItem>> = Vec::new();
-    let mut prev_ts: Option<i64> = None;
-    for it in items {
-        let starts_new = match (prev_ts, it.created) {
-            (Some(p), Some(c)) => c - p > gap_seconds,
-            _ => false,
-        };
-        if starts_new || clusters.is_empty() {
-            clusters.push(Vec::new());
-        }
-        clusters.last_mut().unwrap().push(it.clone());
-        if let Some(c) = it.created {
-            prev_ts = Some(c);
-        }
-    }
-    clusters
-}
-
-/// Most photos a single beat will flash through. Bounds the burst so one huge
-/// event doesn't dominate, and keeps each photo on screen long enough to
-/// register at the per-beat narration length (see render's beat timing).
-pub const MAX_BURST_PHOTOS: usize = 10;
-
-/// Merge a list of (time-ordered) event clusters into exactly `n` contiguous
-/// groups, so a span with more events than the beat budget still covers the
-/// whole timeline — adjacent events fold together into one beat rather than
-/// getting dropped. `n` must be ≥ 1 and ≤ clusters.len().
-fn partition_into_groups(
-    clusters: Vec<Vec<memories::MemoryItem>>,
-    n: usize,
-) -> Vec<Vec<memories::MemoryItem>> {
-    let c = clusters.len();
-    let mut clusters = clusters.into_iter();
-    (0..n)
-        .map(|j| {
-            // Even contiguous split of c clusters into n groups.
-            let start = j * c / n;
-            let end = (j + 1) * c / n;
-            let take = end.saturating_sub(start).max(1);
-            (0..take)
-                .flat_map(|_| clusters.next().into_iter().flatten())
-                .collect()
-        })
-        .collect()
-}
-
-/// Turn photo items into `n_beats` photo beats. Clusters photos into events by
-/// time gap; if there are more events than beats, adjacent events are merged so
-/// the whole span is still covered. Each beat then flashes up to `max_burst`
-/// photos (an even spread of its group) under one narration line — so a
-/// week/month reel *shows* all its moments without a narrated (and timed)
-/// segment per photo.
-fn form_photo_beats(
-    items: &[memories::MemoryItem],
-    n_beats: usize,
-    max_burst: usize,
-) -> Vec<PlannedBeat> {
-    if n_beats == 0 || items.is_empty() {
-        return Vec::new();
-    }
-    let clusters = cluster_by_gap(items, EVENT_GAP_SECONDS);
-    // One beat per event when they fit; otherwise fold adjacent events together
-    // into exactly n_beats groups.
-    let groups = if clusters.len() <= n_beats {
-        clusters
-    } else {
-        partition_into_groups(clusters, n_beats)
-    };
-
-    groups
-        .into_iter()
-        .filter(|g| !g.is_empty())
-        .map(|group| {
-            let shown = sample_evenly(&group, max_burst);
-            let date = shown.first().and_then(|it| it.created);
-            PlannedBeat {
-                media: shown
-                    .into_iter()
-                    .map(|it| SegmentMedia::Photo {
-                        rel_path: it.path,
-                        library_id: it.library_id,
-                    })
-                    .collect(),
-                date,
-                insight_title: None,
-                insight_summary: None,
-                gps: None,
-            }
-        })
-        .collect()
-}
-
-/// Split the beat budget between photo beats and video-clip beats. Clips are
-/// individually valuable (motion + live audio) so they get up to half the
-/// budget (at least one if any exist); photos take the rest. With only one
-/// kind present, it gets the whole budget.
-fn split_beat_budget(n_photos: usize, n_videos: usize, n_beats: usize) -> (usize, usize) {
-    if n_videos == 0 {
-        return (n_beats, 0);
-    }
-    if n_photos == 0 {
-        return (0, n_beats.min(n_videos));
-    }
-    let clip_beats = n_videos.min((n_beats / 2).max(1));
-    let photo_beats = n_beats.saturating_sub(clip_beats);
-    (photo_beats, clip_beats)
-}
-
-/// Build the reel's beats from a span's photos and videos under a beat budget.
-/// Videos become one-clip beats (sampled across time if there are more than the
-/// clip budget); photos cluster into burst beats. The two are merged back into
-/// chronological order so the reel reads as the span unfolded.
-pub fn form_beats(
-    photos: &[memories::MemoryItem],
-    videos: &[memories::MemoryItem],
-    n_beats: usize,
-    max_burst: usize,
-) -> Vec<PlannedBeat> {
-    if n_beats == 0 {
-        return Vec::new();
-    }
-    let (photo_budget, clip_budget) = split_beat_budget(photos.len(), videos.len(), n_beats);
-
-    let mut beats = form_photo_beats(photos, photo_budget, max_burst);
-
-    // One clip beat per chosen video, spread across the span's videos.
-    for v in sample_evenly(videos, clip_budget) {
-        beats.push(PlannedBeat {
-            media: vec![SegmentMedia::Clip {
-                rel_path: v.path,
-                library_id: v.library_id,
-            }],
-            date: v.created,
-            insight_title: None,
-            insight_summary: None,
-            gps: None,
-        });
-    }
-
-    // Merge photo and clip beats back into chronological order (undated last).
-    beats.sort_by(|a, b| match (a.date, b.date) {
-        (Some(x), Some(y)) => x.cmp(&y),
-        (Some(_), None) => std::cmp::Ordering::Less,
-        (None, Some(_)) => std::cmp::Ordering::Greater,
-        (None, None) => std::cmp::Ordering::Equal,
-    });
-    beats
-}
-
-/// Cheap pass: resolve the selector into an ordered list of media (no insight
-/// lookups yet) plus reel metadata. `Err` only on an invalid library param.
-pub fn resolve(
-    app_state: &AppState,
-    exif_dao: &Mutex<Box<dyn ExifDao>>,
-    span_context: &opentelemetry::Context,
-    selector: &ReelSelector,
-) -> Result<(Vec<PlannedBeat>, ReelMeta), String> {
-    match selector {
-        ReelSelector::Memories {
-            span,
-            tz_offset_minutes,
-            library,
-            max_segments,
-        } => {
-            let client_tz = FixedOffset::east_opt(tz_offset_minutes * 60);
-            let items = memories::gather_memory_items(
-                app_state,
-                exif_dao,
-                span_context,
-                *span,
-                *tz_offset_minutes,
-                client_tz,
-                library.as_deref(),
-            )?;
-
-            // Split into photos and video clips; anything that's neither is
-            // dropped. Years span both, computed before the budget narrows it.
-            let years = distinct_years(&items, client_tz);
-            let meta = ReelMeta { span: *span, years };
-
-            let (photos, videos): (Vec<_>, Vec<_>) = items
-                .into_iter()
-                .filter(|it| {
-                    is_image_file(Path::new(&it.path)) || is_video_file(Path::new(&it.path))
-                })
-                .partition(|it| is_image_file(Path::new(&it.path)));
-
-            // The budget caps the number of narrated beats (≈ reel length);
-            // photo beats then burst through several photos and video beats
-            // play a short clip, so the reel covers the span without running
-            // minutes long.
-            let n_beats = budget_segments(*max_segments);
-            let beats = form_beats(&photos, &videos, n_beats, MAX_BURST_PHOTOS);
-            Ok((beats, meta))
-        }
-    }
-}
-
-/// Distinct calendar years represented by the selected media, in the client's
-/// timezone, ascending. Used to tell the scripter how far back the reel reaches.
-fn distinct_years(items: &[memories::MemoryItem], tz: Option<FixedOffset>) -> Vec<i32> {
-    let mut years: Vec<i32> = items
-        .iter()
-        .filter_map(|it| it.created)
-        .filter_map(|ts| DateTime::from_timestamp(ts, 0))
-        .map(|dt| match tz {
-            Some(off) => dt.with_timezone(&off).year(),
-            None => dt.year(),
-        })
-        .collect();
-    years.sort_unstable();
-    years.dedup();
-    years
-}
-
-/// Background pass: fill each beat's cached insight (title + summary) and
-/// GPS coordinates from its lead photo, where one exists. Best-effort — a
-/// missing or errored lookup leaves the fields `None` and the scripter
-/// narrates from the date alone.
-pub fn enrich(
-    insight_dao: &Mutex<Box<dyn InsightDao>>,
-    exif_dao: &Mutex<Box<dyn ExifDao>>,
-    span_context: &opentelemetry::Context,
-    beats: &mut [PlannedBeat],
-) {
-    let Ok(mut insight_dao) = insight_dao.lock() else {
-        return;
-    };
-    let Ok(mut exif_dao) = exif_dao.lock() else {
-        return;
-    };
-    for beat in beats.iter_mut() {
-        let rel_path = match beat.media.first() {
-            Some(SegmentMedia::Photo { rel_path, .. } | SegmentMedia::Clip { rel_path, .. }) => {
-                rel_path.clone()
-            }
-            None => continue,
-        };
-        if let Ok(Some(insight)) = insight_dao.get_insight(span_context, &rel_path) {
-            beat.insight_title = Some(insight.title);
-            beat.insight_summary = Some(insight.summary);
-        }
-        // Enrich GPS from EXIF when the lead media is a photo.
-        if let Some(SegmentMedia::Photo { .. }) = beat.media.first()
-            && let Ok(Some(exif)) = exif_dao.get_exif(span_context, &rel_path)
-            && let (Some(lat), Some(lon)) = (exif.gps_latitude, exif.gps_longitude)
-        {
-            beat.gps = Some((lat as f64, lon as f64));
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sample_evenly_returns_all_when_under_cap() {
-        let v = vec![1, 2, 3];
-        assert_eq!(sample_evenly(&v, 5), vec![1, 2, 3]);
-        assert_eq!(sample_evenly(&v, 3), vec![1, 2, 3]);
-    }
-
-    #[test]
-    fn sample_evenly_keeps_endpoints_and_spreads() {
-        let v: Vec<i32> = (0..100).collect();
-        let picked = sample_evenly(&v, 5);
-        assert_eq!(picked.len(), 5);
-        assert_eq!(picked[0], 0); // first kept
-        assert_eq!(*picked.last().unwrap(), 99); // last kept
-        // Strictly increasing, no dupes.
-        assert!(picked.windows(2).all(|w| w[0] < w[1]));
-    }
-
-    #[test]
-    fn sample_evenly_handles_one_and_zero() {
-        let v: Vec<i32> = (0..10).collect();
-        assert_eq!(sample_evenly(&v, 1), vec![0]);
-        assert!(sample_evenly(&v, 0).is_empty());
-    }
-
-    #[test]
-    fn descriptor_is_stable_and_distinguishes_inputs() {
-        let a = ReelSelector::Memories {
-            span: MemoriesSpan::Day,
-            tz_offset_minutes: -480,
-            library: None,
-            max_segments: 24,
-        };
-        let b = ReelSelector::Memories {
-            span: MemoriesSpan::Week,
-            tz_offset_minutes: -480,
-            library: None,
-            max_segments: 24,
-        };
-        assert_eq!(a.descriptor(), a.clone().descriptor());
-        assert_ne!(a.descriptor(), b.descriptor());
-        assert!(a.descriptor().contains("lib=all"));
-    }
-
-    #[test]
-    fn distinct_years_dedupes_and_sorts() {
-        let items = vec![
-            memories::MemoryItem {
-                path: "a".into(),
-                created: Some(1_560_000_000), // 2019
-                modified: None,
-                library_id: 1,
-            },
-            memories::MemoryItem {
-                path: "b".into(),
-                created: Some(1_560_086_400), // 2019
-                modified: None,
-                library_id: 1,
-            },
-            memories::MemoryItem {
-                path: "c".into(),
-                created: Some(1_623_000_000), // 2021
-                modified: None,
-                library_id: 1,
-            },
-        ];
-        assert_eq!(distinct_years(&items, None), vec![2019, 2021]);
-    }
-
-    // Build an item at a given unix timestamp (seconds) with a chosen extension.
-    fn item_ext(ts: i64, name: &str, ext: &str) -> memories::MemoryItem {
-        memories::MemoryItem {
-            path: format!("{name}.{ext}"),
-            created: Some(ts),
-            modified: None,
-            library_id: 1,
-        }
-    }
-    fn item_at(ts: i64, name: &str) -> memories::MemoryItem {
-        item_ext(ts, name, "jpg")
-    }
-
-    #[test]
-    fn budget_segments_caps_to_duration_target() {
-        // 90s / 5s ≈ 18, bounded by the request max and hard cap.
-        assert_eq!(budget_segments(40), 18);
-        assert_eq!(budget_segments(5), 5); // request asked for fewer
-        assert_eq!(budget_segments(1000), 18); // hard cap / budget wins
-    }
-
-    #[test]
-    fn cluster_by_gap_splits_on_large_jumps() {
-        // Two photos minutes apart, then one a day later → two events.
-        let items = vec![
-            item_at(1_000_000, "a"),
-            item_at(1_000_300, "b"), // +5 min → same event
-            item_at(1_100_000, "c"), // +~27h → new event
-        ];
-        let clusters = cluster_by_gap(&items, EVENT_GAP_SECONDS);
-        assert_eq!(clusters.len(), 2);
-        assert_eq!(clusters[0].len(), 2);
-        assert_eq!(clusters[1].len(), 1);
-    }
-
-    #[test]
-    fn photo_beats_one_per_event_when_they_fit() {
-        // Three well-separated events, budget of 10 → three beats, each holding
-        // all of its (few) photos.
-        let items = vec![
-            item_at(0, "a"),
-            item_at(50, "b"), // same event as a
-            item_at(1_000_000, "c"),
-            item_at(2_000_000, "d"),
-        ];
-        let beats = form_photo_beats(&items, 10, MAX_BURST_PHOTOS);
-        assert_eq!(beats.len(), 3);
-        assert_eq!(beats[0].media.len(), 2); // burst of the first event
-        assert_eq!(beats[1].media.len(), 1);
-        assert_eq!(beats[2].media.len(), 1);
-    }
-
-    #[test]
-    fn photo_beats_merge_events_when_over_budget() {
-        // Six distinct events but only two beats → adjacent events fold in, and
-        // every event's photos still appear (capped by the burst max).
-        let items: Vec<memories::MemoryItem> = (0..6)
-            .map(|i| item_at(i as i64 * 1_000_000, &format!("e{i}")))
-            .collect();
-        let beats = form_photo_beats(&items, 2, MAX_BURST_PHOTOS);
-        assert_eq!(beats.len(), 2);
-        let shown: usize = beats.iter().map(|b| b.media.len()).sum();
-        assert_eq!(shown, 6); // all six moments still shown across two beats
-    }
-
-    #[test]
-    fn photo_beats_cap_burst_to_max() {
-        // One dense event of 30 photos, generous budget → a single beat that
-        // bursts at most MAX_BURST_PHOTOS, not all 30.
-        let items: Vec<memories::MemoryItem> = (0..30)
-            .map(|i| item_at(i as i64, &format!("p{i}")))
-            .collect();
-        let beats = form_photo_beats(&items, 18, MAX_BURST_PHOTOS);
-        assert_eq!(beats.len(), 1);
-        assert_eq!(beats[0].media.len(), MAX_BURST_PHOTOS);
-    }
-
-    #[test]
-    fn split_beat_budget_handles_each_mix() {
-        // Only photos / only videos → that kind gets the whole budget.
-        assert_eq!(split_beat_budget(10, 0, 18), (18, 0));
-        assert_eq!(split_beat_budget(0, 10, 18), (0, 10)); // capped at n_videos
-        assert_eq!(split_beat_budget(0, 30, 18), (0, 18)); // capped at budget
-        // Mixed → clips up to half (≥1), photos the rest.
-        assert_eq!(split_beat_budget(100, 100, 18), (9, 9));
-        assert_eq!(split_beat_budget(100, 1, 18), (17, 1)); // few videos
-    }
-
-    #[test]
-    fn form_beats_mixes_clip_and_photo_beats_in_time_order() {
-        let photos = vec![item_at(0, "p0"), item_at(2_000_000, "p1")];
-        // A video between the two photo events (in time).
-        let videos = vec![item_ext(1_000_000, "v0", "mp4")];
-        let beats = form_beats(&photos, &videos, 10, MAX_BURST_PHOTOS);
-        // Two photo events + one clip = three beats, chronological.
-        assert_eq!(beats.len(), 3);
-        assert!(!beats[0].is_clip()); // p0 @ t=0
-        assert!(beats[1].is_clip()); // v0 @ t=1e6
-        assert!(!beats[2].is_clip()); // p1 @ t=2e6
-        assert!(matches!(beats[1].media[0], SegmentMedia::Clip { .. }));
-    }
-
-    #[test]
-    fn form_beats_videos_only_become_clip_beats() {
-        let videos: Vec<memories::MemoryItem> = (0..3)
-            .map(|i| item_ext(i as i64 * 1_000_000, &format!("v{i}"), "mov"))
-            .collect();
-        let beats = form_beats(&[], &videos, 10, MAX_BURST_PHOTOS);
-        assert_eq!(beats.len(), 3);
-        assert!(beats.iter().all(|b| b.is_clip()));
-    }
-}
@@ -1,17 +1,13 @@
 use crate::ai::apollo_client::ApolloClient;
-use crate::ai::clip_client::ClipClient;
 use crate::ai::face_client::FaceClient;
 use crate::ai::insight_chat::{ChatLockMap, InsightChatService};
-use crate::ai::llamacpp::LlamaCppClient;
 use crate::ai::openrouter::OpenRouterClient;
-use crate::ai::turn_registry::TurnRegistry;
 use crate::ai::{InsightGenerator, OllamaClient, SmsApiClient};
 use crate::database::{
-    CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, InsightGenerationJobDao, KnowledgeDao,
-    LocationHistoryDao, PrecomputedReelDao, SearchHistoryDao, SqliteCalendarEventDao,
-    SqliteDailySummaryDao, SqliteExifDao, SqliteInsightDao, SqliteInsightGenerationJobDao,
-    SqliteKnowledgeDao, SqliteLocationHistoryDao, SqlitePrecomputedReelDao, SqliteSearchHistoryDao,
-    SqliteUserAiPrefsDao, UserAiPrefsDao, connect,
+    CalendarEventDao, DailySummaryDao, ExifDao, InsightDao, KnowledgeDao, LocationHistoryDao,
+    SearchHistoryDao, SqliteCalendarEventDao, SqliteDailySummaryDao, SqliteExifDao,
+    SqliteInsightDao, SqliteKnowledgeDao, SqliteLocationHistoryDao, SqliteSearchHistoryDao,
+    connect,
 };
 use crate::database::{PreviewDao, SqlitePreviewDao};
 use crate::faces;
@@ -21,7 +17,6 @@ use crate::video::actors::{
    PlaylistGenerator, PreviewClipGenerator, StreamActor, VideoPlaylistManager,
 };
 use actix::{Actor, Addr};
-use std::collections::HashMap;
 use std::env;
 use std::sync::{Arc, Mutex, RwLock};

@@ -54,10 +49,6 @@ pub struct AppState {
    pub video_path: String,
    pub gif_path: String,
    pub preview_clips_path: String,
-    /// Directory for cached memory-reel MP4s (+ title sidecars). Derived from
-    /// `REELS_DIRECTORY`, defaulting to a `reels` dir beside the preview clips.
-    /// Created lazily by the reel pipeline on first render.
-    pub reels_path: String,
    pub excluded_dirs: Vec<String>,
    pub ollama: OllamaClient,
    /// `None` when `OPENROUTER_API_KEY` is not configured. Consulted only
@@ -70,33 +61,15 @@ pub struct AppState {
    /// Curated list of OpenRouter model ids exposed to clients. Sourced from
    /// `OPENROUTER_ALLOWED_MODELS` (comma-separated). Empty when unset.
    pub openrouter_allowed_models: Vec<String>,
-    /// `None` when `LLAMA_SWAP_URL` is not configured. Consulted only when a
-    /// request explicitly opts into `backend=llamacpp`. Same shape as the
-    /// `openrouter` slot — present here so handlers can route to it without
-    /// threading through the generator.
-    #[allow(dead_code)]
-    pub llamacpp: Option<Arc<LlamaCppClient>>,
-    /// Curated list of llama-swap model ids exposed to clients. Sourced from
-    /// `LLAMA_SWAP_ALLOWED_MODELS` (comma-separated). Empty when unset; the
-    /// server then falls back to `LLAMA_SWAP_PRIMARY_MODEL`.
-    pub llamacpp_allowed_models: Vec<String>,
    pub sms_client: SmsApiClient,
    pub insight_generator: InsightGenerator,
    /// Chat continuation service. Hold an Arc so handlers can clone cheaply.
    pub insight_chat: Arc<InsightChatService>,
-    pub turn_registry: Arc<TurnRegistry>,
+    /// Face inference client (calls Apollo's `/api/internal/faces/*`).
+    /// Disabled (`is_enabled() == false`) when neither `APOLLO_FACE_API_BASE_URL`
+    /// nor `APOLLO_API_BASE_URL` is set; the file-watch hook (Phase 3) and
+    /// manual-face-create handler short-circuit in that case.
    pub face_client: FaceClient,
-    pub clip_client: ClipClient,
-    pub insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
-    pub insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
-    /// Ledger for precomputed memory reels. Written by the nightly agentic
-    /// job (Section D); read by `GET /reels/precomputed` (Section C).
-    #[allow(dead_code)]
-    pub precomputed_reel_dao: Arc<Mutex<Box<dyn PrecomputedReelDao>>>,
-    /// User AI preferences (voice, timezone, library). Mirrored by the
-    /// client; read by the nightly pre-generation scheduler.
-    #[allow(dead_code)]
-    pub user_ai_prefs_dao: Arc<Mutex<Box<dyn UserAiPrefsDao>>>,
 }

 impl AppState {
@@ -110,7 +83,6 @@ impl AppState {
        self.libraries.iter().find(|l| l.id == id)
    }

-    #[allow(dead_code)]
    pub fn library_by_name(&self, name: &str) -> Option<&Library> {
        self.libraries.iter().find(|l| l.name == name)
    }
@@ -128,26 +100,18 @@ impl AppState {
        ollama: OllamaClient,
        openrouter: Option<Arc<OpenRouterClient>>,
        openrouter_allowed_models: Vec<String>,
-        llamacpp: Option<Arc<LlamaCppClient>>,
-        llamacpp_allowed_models: Vec<String>,
        sms_client: SmsApiClient,
        insight_generator: InsightGenerator,
        insight_chat: Arc<InsightChatService>,
-        turn_registry: Arc<TurnRegistry>,
        preview_dao: Arc<Mutex<Box<dyn PreviewDao>>>,
        face_client: FaceClient,
-        clip_client: ClipClient,
-        insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>>,
-        insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>>,
-        precomputed_reel_dao: Arc<Mutex<Box<dyn PrecomputedReelDao>>>,
-        user_ai_prefs_dao: Arc<Mutex<Box<dyn UserAiPrefsDao>>>,
    ) -> Self {
        assert!(
            !libraries_vec.is_empty(),
            "AppState::new requires at least one library"
        );
        let base_path = libraries_vec[0].root_path.clone();
-        let playlist_generator = PlaylistGenerator::new(video_path.clone());
+        let playlist_generator = PlaylistGenerator::new();
        let video_playlist_manager =
            VideoPlaylistManager::new(video_path.clone(), playlist_generator.start());

@@ -157,19 +121,6 @@ impl AppState {
            preview_dao,
        );

-        // Reels cache dir: explicit env, else a `reels` sibling of the preview
-        // clips dir (a known-writable, test-safe location). Not created here —
-        // the reel pipeline does `create_dir_all` before its first write, so
-        // construction (incl. tests) never touches the filesystem.
-        let reels_path = std::env::var("REELS_DIRECTORY").unwrap_or_else(|_| {
-            std::path::Path::new(&preview_clips_path)
-                .parent()
-                .map(|p| p.join("reels"))
-                .unwrap_or_else(|| std::path::PathBuf::from("reels"))
-                .to_string_lossy()
-                .to_string()
-        });
-
        let library_health = libraries::new_health_map(&libraries_vec);
        let live_libraries = Arc::new(RwLock::new(libraries_vec.clone()));
        Self {
@@ -184,23 +135,14 @@ impl AppState {
            video_path,
            gif_path,
            preview_clips_path,
-            reels_path,
            excluded_dirs,
            ollama,
            openrouter,
            openrouter_allowed_models,
-            llamacpp,
-            llamacpp_allowed_models,
            sms_client,
            insight_generator,
            insight_chat,
-            turn_registry,
            face_client,
-            clip_client,
-            insight_job_dao,
-            insight_job_handles,
-            precomputed_reel_dao,
-            user_ai_prefs_dao,
        }
    }

@@ -218,14 +160,25 @@ impl AppState {
 impl Default for AppState {
    fn default() -> Self {
        // Initialize AI clients
-        let ollama = build_ollama_from_env();
+        let ollama_primary_url = env::var("OLLAMA_PRIMARY_URL").unwrap_or_else(|_| {
+            env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".to_string())
+        });
+        let ollama_fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
+        let ollama_primary_model = env::var("OLLAMA_PRIMARY_MODEL")
+            .or_else(|_| env::var("OLLAMA_MODEL"))
+            .unwrap_or_else(|_| "nemotron-3-nano:30b".to_string());
+        let ollama_fallback_model = env::var("OLLAMA_FALLBACK_MODEL").ok();
+
+        let ollama = OllamaClient::new(
+            ollama_primary_url,
+            ollama_fallback_url,
+            ollama_primary_model,
+            ollama_fallback_model,
+        );

        let openrouter = build_openrouter_from_env();
        let openrouter_allowed_models = parse_openrouter_allowed_models();

-        let llamacpp = build_llamacpp_from_env();
-        let llamacpp_allowed_models = parse_llamacpp_allowed_models();
-
        let sms_api_url =
            env::var("SMS_API_URL").unwrap_or_else(|_| "http://localhost:8000".to_string());
        let sms_api_token = env::var("SMS_API_TOKEN").ok();
@@ -245,9 +198,6 @@ impl Default for AppState {
            .or_else(|| env::var("APOLLO_API_BASE_URL").ok());
        let face_client = FaceClient::new(face_client_url);

-        // CLIP inference client. Same env var fallback as face_client.
-        let clip_client = ClipClient::from_env();
-
        // Initialize DAOs
        let insight_dao: Arc<Mutex<Box<dyn InsightDao>>> =
            Arc::new(Mutex::new(Box::new(SqliteInsightDao::new())));
@@ -275,20 +225,6 @@ impl Default for AppState {
        let face_dao: Arc<Mutex<Box<dyn faces::FaceDao>>> =
            Arc::new(Mutex::new(Box::new(faces::SqliteFaceDao::new())));

-        // Initialize insight generation job DAO (async generation tracking)
-        let insight_job_dao: Arc<Mutex<Box<dyn InsightGenerationJobDao>>> =
-            Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new())));
-        let insight_job_handles: Arc<Mutex<HashMap<i32, tokio::task::AbortHandle>>> =
-            Arc::new(Mutex::new(HashMap::new()));
-
-        // Initialize precomputed reel DAO (nightly pre-generation ledger)
-        let precomputed_reel_dao: Arc<Mutex<Box<dyn PrecomputedReelDao>>> =
-            Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new())));
-
-        // Initialize user AI preferences DAO (Section E)
-        let user_ai_prefs_dao: Arc<Mutex<Box<dyn UserAiPrefsDao>>> =
-            Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new())));
-
        // Load base path and ensure the primary library row reflects it.
        let base_path = env::var("BASE_PATH").expect("BASE_PATH was not set in the env");
        let mut seed_conn = connect();
@@ -304,7 +240,6 @@ impl Default for AppState {
        let insight_generator = InsightGenerator::new(
            ollama.clone(),
            openrouter.clone(),
-            llamacpp.clone(),
            sms_client.clone(),
            apollo_client.clone(),
            insight_dao.clone(),
@@ -326,18 +261,12 @@ impl Default for AppState {
            Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
        let insight_chat = Arc::new(InsightChatService::new(
            Arc::new(insight_generator.clone()),
+            ollama.clone(),
+            openrouter.clone(),
            insight_dao.clone(),
            chat_locks,
        ));

-        // Turn registry for reconnectable chat turns. 5-minute timeout for
-        // stale turns (background cleaner drops entries older than this).
-        let timeout_secs: u64 = env::var("INSIGHT_CHAT_TURN_TIMEOUT_SECS")
-            .ok()
-            .and_then(|v| v.parse().ok())
-            .unwrap_or(300);
-        let turn_registry = Arc::new(TurnRegistry::new(timeout_secs));
-
        // Ensure preview clips directory exists
        let preview_clips_path =
            env::var("PREVIEW_CLIPS_DIRECTORY").unwrap_or_else(|_| "preview_clips".to_string());
@@ -355,19 +284,11 @@ impl Default for AppState {
            ollama,
            openrouter,
            openrouter_allowed_models,
-            llamacpp,
-            llamacpp_allowed_models,
            sms_client,
            insight_generator,
            insight_chat,
-            turn_registry,
            preview_dao,
            face_client,
-            clip_client,
-            insight_job_dao,
-            insight_job_handles,
-            precomputed_reel_dao,
-            user_ai_prefs_dao,
        )
    }
 }
@@ -403,61 +324,10 @@ fn parse_openrouter_allowed_models() -> Vec<String> {
        .collect()
 }

-/// Build the `OllamaClient` from environment variables — the canonical
-/// `OLLAMA_*` wiring shared by the server (`AppState::default`) and the
-/// standalone binaries (which predate this helper and used to copy it).
-pub fn build_ollama_from_env() -> OllamaClient {
-    let primary_url = env::var("OLLAMA_PRIMARY_URL").unwrap_or_else(|_| {
-        env::var("OLLAMA_URL").unwrap_or_else(|_| "http://localhost:11434".to_string())
-    });
-    let fallback_url = env::var("OLLAMA_FALLBACK_URL").ok();
-    let primary_model = env::var("OLLAMA_PRIMARY_MODEL")
-        .or_else(|_| env::var("OLLAMA_MODEL"))
-        .unwrap_or_else(|_| "nemotron-3-nano:30b".to_string());
-    let fallback_model = env::var("OLLAMA_FALLBACK_MODEL").ok();
-
-    OllamaClient::new(primary_url, fallback_url, primary_model, fallback_model)
-}
-
-/// Build a `LlamaCppClient` from environment variables. Returns `None` when
-/// `LLAMA_SWAP_URL` is unset. The client is constructed unconditionally
-/// when the URL is set (so it's available even under `LLM_BACKEND=ollama`
-/// for ad-hoc tooling), but the agentic / chat paths only route through it
-/// when `LLM_BACKEND=llamacpp`. Slot ids default to the names the bundled
-/// `llama-swap/config.yaml` uses — `chat` / `vision` / `embed`.
-pub fn build_llamacpp_from_env() -> Option<Arc<LlamaCppClient>> {
-    let base_url = env::var("LLAMA_SWAP_URL").ok()?;
-    let primary_model = env::var("LLAMA_SWAP_PRIMARY_MODEL").ok();
-    let mut client = LlamaCppClient::new(Some(base_url), primary_model);
-    if let Ok(model) = env::var("LLAMA_SWAP_EMBEDDING_MODEL") {
-        client.set_embedding_model(model);
-    }
-    if let Ok(model) = env::var("LLAMA_SWAP_VISION_MODEL") {
-        client.set_vision_model(model);
-    }
-    if let Ok(model) = env::var("LLAMA_SWAP_TTS_MODEL") {
-        client.set_tts_model(model);
-    }
-    Some(Arc::new(client))
-}
-
-/// Parse `LLAMA_SWAP_ALLOWED_MODELS` (comma-separated) into a vec. Used to
-/// populate the model picker when `LLM_BACKEND=llamacpp` — `/insights/models`
-/// surfaces these slots with capabilities. Empty when unset.
-fn parse_llamacpp_allowed_models() -> Vec<String> {
-    env::var("LLAMA_SWAP_ALLOWED_MODELS")
-        .unwrap_or_default()
-        .split(',')
-        .map(|s| s.trim().to_string())
-        .filter(|s| !s.is_empty())
-        .collect()
-}
-
 #[cfg(test)]
 impl AppState {
    /// Creates an AppState instance for testing with temporary directories
    pub fn test_state() -> Self {
-        use crate::database::insight_generation_job_dao::SqliteInsightGenerationJobDao;
        use actix::Actor;
        // Create a base temporary directory
        let temp_dir = tempfile::tempdir().expect("Failed to create temp directory");
@@ -516,7 +386,6 @@ impl AppState {
        let insight_generator = InsightGenerator::new(
            ollama.clone(),
            None,
-            None,
            sms_client.clone(),
            apollo_client.clone(),
            insight_dao.clone(),
@@ -536,13 +405,12 @@ impl AppState {
            Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
        let insight_chat = Arc::new(InsightChatService::new(
            Arc::new(insight_generator.clone()),
+            ollama.clone(),
+            None,
            insight_dao.clone(),
            chat_locks,
        ));

-        // Turn registry for test state.
-        let turn_registry = Arc::new(TurnRegistry::new(300));
-
        // Initialize test preview DAO
        let preview_dao: Arc<Mutex<Box<dyn PreviewDao>>> =
            Arc::new(Mutex::new(Box::new(SqlitePreviewDao::new())));
@@ -566,19 +434,11 @@ impl AppState {
            ollama,
            None,
            Vec::new(),
-            None,
-            Vec::new(),
            sms_client,
            insight_generator,
            insight_chat,
-            turn_registry,
            preview_dao,
            FaceClient::new(None), // disabled in test
-            ClipClient::new(None), // disabled in test
-            Arc::new(Mutex::new(Box::new(SqliteInsightGenerationJobDao::new()))), // placeholder for test
-            Arc::new(Mutex::new(HashMap::new())), // placeholder for test
-            Arc::new(Mutex::new(Box::new(SqlitePrecomputedReelDao::new()))), // placeholder for test
-            Arc::new(Mutex::new(Box::new(SqliteUserAiPrefsDao::new()))), // placeholder for test
        )
    }
 }
@@ -168,7 +168,7 @@ async fn get_tags<D: TagDao>(
    // this file, so tags added under one library show up under the
    // others when they hold the same file. Falls back to direct rel_path
    // match when the file hasn't been hashed yet.
-    let library = libraries::resolve_library_param_state(&app_state, request.library.as_deref())
+    let library = libraries::resolve_library_param(&app_state, request.library.as_deref())
        .ok()
        .flatten()
        .unwrap_or_else(|| app_state.primary_library());
@@ -144,7 +144,6 @@ impl PreviewDao for TestPreviewDao {
        } else {
            Err(DbError {
                kind: DbErrorKind::UpdateError,
-                source: None,
            })
        }
    }
@@ -8,10 +8,7 @@
 //! skip them silently.

 use std::path::{Path, PathBuf};
-use std::process::Command;

-use image::GenericImageView;
-use image::codecs::jpeg::JpegEncoder;
 use lazy_static::lazy_static;
 use log::{debug, error, info, warn};
 use opentelemetry::{
@@ -29,26 +26,6 @@ use crate::libraries;
 use crate::otel::global_tracer;
 use crate::video::actors::{generate_image_thumbnail_ffmpeg, generate_video_thumbnail};

-/// Maximum long-edge size (px) for the large preview tier. Tuned to look
-/// crisp full-screen on a 3× phone (≈1290×2796 native) and to hold up
-/// through a few stops of pinch-zoom before the original streams in.
-/// Bigger doesn't help: callers that need true full resolution request
-/// `size=full` and the handler streams the original bytes.
-pub const LARGE_PREVIEW_MAX_DIM: u32 = 2048;
-
-/// JPEG quality for the large and xlarge preview tiers. 85 is the
-/// conventional "indistinguishable from source at viewing size" point —
-/// well above the `image` crate's default ~75, but well below quality-90+
-/// territory where file size doubles for no perceptible win.
-const LARGE_PREVIEW_JPEG_QUALITY: u8 = 85;
-
-/// Maximum long-edge size (px) for the xlarge preview tier. Bridges the
-/// gap between `large` (2048px, ~16MB decoded) and the original bytes
-/// (potentially 48+ MP / ~192MB decoded). At 4096px the decoded bitmap is
-/// ~64MB — enough for 2-3× pinch-zoom on any phone before the viewer
-/// needs to stream the true original.
-pub const XLARGE_PREVIEW_MAX_DIM: u32 = 4096;
-
 lazy_static! {
    pub static ref IMAGE_GAUGE: IntGauge = IntGauge::new(
        "imageserver_image_total",
@@ -112,186 +89,6 @@ pub fn generate_image_thumbnail(src: &Path, thumb_path: &Path) -> std::io::Resul
    Ok(())
 }

-/// Generate the on-demand large-preview tier (≈2048 long edge JPEG).
-///
-/// Mirrors [`generate_image_thumbnail`]'s decode waterfall — embedded RAW
-/// preview, then ffmpeg for HEIC/HEIF, then the `image` crate — but
-/// resizes to [`LARGE_PREVIEW_MAX_DIM`] instead of 200 and encodes at
-/// quality 85 rather than the crate default. Caller is expected to have
-/// already created the destination's parent dir.
-///
-/// Does not upscale: if the source's long edge is already below the cap,
-/// the file is encoded at its native size (still re-saved as JPEG so the
-/// served bytes match for callers that key off `Content-Length`).
-pub fn generate_large_preview(src: &Path, dest: &Path) -> std::io::Result<()> {
-    let orientation = exif::read_orientation(src).unwrap_or(1);
-
-    // RAW: prefer the in-file embedded JPEG preview over raw-sensor decode.
-    // The preview is typically already 1–2 MP and avoids RAW codec quirks.
-    if let Some(preview) = exif::extract_embedded_jpeg_preview(src) {
-        let img = image::load_from_memory(&preview).map_err(|e| {
-            std::io::Error::new(
-                std::io::ErrorKind::InvalidData,
-                format!("decode embedded preview {:?}: {}", src, e),
-            )
-        })?;
-        let img = exif::apply_orientation(img, orientation);
-        return encode_large_jpeg(img, dest);
-    }
-
-    if file_types::needs_ffmpeg_thumbnail(src) {
-        return generate_large_preview_ffmpeg(src, dest);
-    }
-
-    let img = image::open(src).map_err(|e| {
-        std::io::Error::new(std::io::ErrorKind::InvalidData, format!("{:?}: {}", src, e))
-    })?;
-    let img = exif::apply_orientation(img, orientation);
-    encode_large_jpeg(img, dest)
-}
-
-/// Resize-if-needed + JPEG-encode at q85. Used by both the embedded-preview
-/// and image-crate-decode branches of `generate_large_preview`.
-fn encode_large_jpeg(img: image::DynamicImage, dest: &Path) -> std::io::Result<()> {
-    let (w, h) = img.dimensions();
-    let max_dim = w.max(h);
-    // Avoid upscaling tiny sources — pointless work and adds nothing for
-    // the viewer. `thumbnail` would scale up freely; explicit guard.
-    let scaled = if max_dim > LARGE_PREVIEW_MAX_DIM {
-        img.thumbnail(LARGE_PREVIEW_MAX_DIM, LARGE_PREVIEW_MAX_DIM)
-    } else {
-        img
-    };
-    let file = std::fs::File::create(dest)
-        .map_err(|e| std::io::Error::other(format!("create {:?}: {}", dest, e)))?;
-    let mut writer = std::io::BufWriter::new(file);
-    let mut encoder = JpegEncoder::new_with_quality(&mut writer, LARGE_PREVIEW_JPEG_QUALITY);
-    encoder
-        .encode_image(&scaled)
-        .map_err(|e| std::io::Error::other(format!("encode {:?}: {}", dest, e)))?;
-    Ok(())
-}
-
-/// ffmpeg path for HEIC/HEIF (image crate can't decode these). Mirrors
-/// [`crate::video::actors::generate_image_thumbnail_ffmpeg`] but scales
-/// to the large-preview cap instead of 200.
-fn generate_large_preview_ffmpeg(src: &Path, dest: &Path) -> std::io::Result<()> {
-    // scale=W:-1 with force_original_aspect_ratio=decrease + the min(iw,W)
-    // trick caps the long edge regardless of orientation, mirroring what
-    // image::thumbnail does for the non-ffmpeg branch.
-    let vf = format!(
-        "scale='if(gt(iw,ih),min(iw,{cap}),-1)':'if(gt(iw,ih),-1,min(ih,{cap}))'",
-        cap = LARGE_PREVIEW_MAX_DIM
-    );
-    let output = Command::new("ffmpeg")
-        .arg("-y")
-        .arg("-i")
-        .arg(src)
-        .arg("-vframes")
-        .arg("1")
-        .arg("-vf")
-        .arg(&vf)
-        .arg("-q:v")
-        // ffmpeg's mjpeg qscale: 2 ≈ ~q95, 5 ≈ ~q85, 10 ≈ ~q70. We pick
-        // 5 to match the non-ffmpeg branch's q85 target.
-        .arg("5")
-        .arg("-f")
-        .arg("image2")
-        .arg("-c:v")
-        .arg("mjpeg")
-        .arg(dest)
-        .output()?;
-
-    if !output.status.success() {
-        return Err(std::io::Error::other(format!(
-            "ffmpeg failed ({}): {}",
-            output.status,
-            String::from_utf8_lossy(&output.stderr).trim()
-        )));
-    }
-    Ok(())
-}
-
-/// Generate the on-demand xlarge-preview tier (≈4096 long edge JPEG).
-///
-/// Same waterfall as [`generate_large_preview`] but targeting
-/// [`XLARGE_PREVIEW_MAX_DIM`]. Sources whose long edge is already below
-/// the cap are encoded at native size (no upscale).
-pub fn generate_xlarge_preview(src: &Path, dest: &Path) -> std::io::Result<()> {
-    let orientation = exif::read_orientation(src).unwrap_or(1);
-
-    if let Some(preview) = exif::extract_embedded_jpeg_preview(src) {
-        let img = image::load_from_memory(&preview).map_err(|e| {
-            std::io::Error::new(
-                std::io::ErrorKind::InvalidData,
-                format!("decode embedded preview {:?}: {}", src, e),
-            )
-        })?;
-        let img = exif::apply_orientation(img, orientation);
-        return encode_xlarge_jpeg(img, dest);
-    }
-
-    if file_types::needs_ffmpeg_thumbnail(src) {
-        return generate_xlarge_preview_ffmpeg(src, dest);
-    }
-
-    let img = image::open(src).map_err(|e| {
-        std::io::Error::new(std::io::ErrorKind::InvalidData, format!("{:?}: {}", src, e))
-    })?;
-    let img = exif::apply_orientation(img, orientation);
-    encode_xlarge_jpeg(img, dest)
-}
-
-fn encode_xlarge_jpeg(img: image::DynamicImage, dest: &Path) -> std::io::Result<()> {
-    let (w, h) = img.dimensions();
-    let max_dim = w.max(h);
-    let scaled = if max_dim > XLARGE_PREVIEW_MAX_DIM {
-        img.thumbnail(XLARGE_PREVIEW_MAX_DIM, XLARGE_PREVIEW_MAX_DIM)
-    } else {
-        img
-    };
-    let file = std::fs::File::create(dest)
-        .map_err(|e| std::io::Error::other(format!("create {:?}: {}", dest, e)))?;
-    let mut writer = std::io::BufWriter::new(file);
-    let mut encoder = JpegEncoder::new_with_quality(&mut writer, LARGE_PREVIEW_JPEG_QUALITY);
-    encoder
-        .encode_image(&scaled)
-        .map_err(|e| std::io::Error::other(format!("encode {:?}: {}", dest, e)))?;
-    Ok(())
-}
-
-fn generate_xlarge_preview_ffmpeg(src: &Path, dest: &Path) -> std::io::Result<()> {
-    let vf = format!(
-        "scale='if(gt(iw,ih),min(iw,{cap}),-1)':'if(gt(iw,ih),-1,min(ih,{cap}))'",
-        cap = XLARGE_PREVIEW_MAX_DIM
-    );
-    let output = Command::new("ffmpeg")
-        .arg("-y")
-        .arg("-i")
-        .arg(src)
-        .arg("-vframes")
-        .arg("1")
-        .arg("-vf")
-        .arg(&vf)
-        .arg("-q:v")
-        .arg("5")
-        .arg("-f")
-        .arg("image2")
-        .arg("-c:v")
-        .arg("mjpeg")
-        .arg(dest)
-        .output()?;
-
-    if !output.status.success() {
-        return Err(std::io::Error::other(format!(
-            "ffmpeg failed ({}): {}",
-            output.status,
-            String::from_utf8_lossy(&output.stderr).trim()
-        )));
-    }
-    Ok(())
-}
-
 pub fn create_thumbnails(libs: &[libraries::Library], excluded_dirs: &[String]) {
    let tracer = global_tracer();
    let span = tracer.start("creating thumbnails");
@@ -1,521 +0,0 @@
-//! `/photos/search/unified?q=<natural language>` — unified NL photo search.
-//!
-//! One free-text box that composes the two existing engines instead of making
-//! the user pick between them:
-//!  1. A grounded local-LLM call ([`crate::ai::nl_query`]) translates the
-//!     query into a structured filter + a semantic term.
-//!  2. Structured filters (tags / EXIF / geo / date / media-type) define the
-//!     candidate set; the semantic term ranks within it via CLIP.
-//!
-//! Path A (orchestration): we reuse `clip_search`'s scoring core and the
-//! existing `ExifDao` / `TagDao` queries, joining on `content_hash`. EXIF rows
-//! are the universal candidate carrier — each has `(library_id, file_path,
-//! content_hash, date_taken)` — so the structured filter is just a predicate
-//! over them, and the CLIP hits (which key on `content_hash`) intersect by
-//! hash. No new schema, no surgery on `list_photos`.
-//!
-//! Degenerate cases collapse to the existing behavior: semantic-only → plain
-//! CLIP search; filters-only → a date-sorted filtered listing.
-//!
-//! Person filtering is intentionally deferred (no person→photos resolver yet).
-
-use crate::AppState;
-use crate::ai::backend::{BackendKind, SamplingOverrides};
-use crate::ai::nl_query::{StructuredQuery, translate_nl_query};
-use crate::clip_search::{
-    SearchHit, parse_library_scope, resolve_hits, score_error_response, score_photos,
-};
-use crate::data::Claims;
-use crate::database::ExifDao;
-use crate::file_types::{is_image_file, is_video_file};
-use crate::geo::{forward_geocode, gps_bounding_box, haversine_distance};
-use crate::tags::TagDao;
-use actix_web::HttpResponse;
-use actix_web::web::{Data, Query};
-use serde::{Deserialize, Serialize};
-use std::collections::HashSet;
-use std::path::Path;
-use std::sync::Mutex;
-
-#[derive(Debug, Deserialize)]
-pub struct UnifiedQuery {
-    /// Natural-language query. Required; empty triggers 400.
-    pub q: String,
-    #[serde(default = "default_limit")]
-    pub limit: usize,
-    #[serde(default)]
-    pub offset: usize,
-    /// CLIP cosine floor for the semantic ranking stage. Same default as the
-    /// plain search endpoint.
-    #[serde(default = "default_threshold")]
-    pub threshold: f32,
-    /// Legacy single-library scope (see clip_search).
-    pub library: Option<i32>,
-    /// Multi-library scope, comma-separated ids.
-    pub library_ids: Option<String>,
-    /// Optional model override. The client passes the user's currently-selected
-    /// local model so the translation step reuses a model that's already loaded
-    /// (avoids a llama-swap eviction / cold start). Falls back to the configured
-    /// default local model when absent. Local only — no hybrid here.
-    pub model: Option<String>,
-}
-
-fn default_limit() -> usize {
-    20
-}
-fn default_threshold() -> f32 {
-    0.20
-}
-
-/// A geocoded place echoed back so the client can show / edit the location
-/// filter it actually searched.
-#[derive(Debug, Serialize)]
-struct ResolvedPlace {
-    display_name: String,
-    lat: f64,
-    lon: f64,
-    radius_km: f64,
-}
-
-/// How the server interpreted the NL query — echoed to the client to render
-/// editable filter chips. tag ids map to the client's existing tag list.
-#[derive(Debug, Serialize)]
-struct Interpreted {
-    semantic: Option<String>,
-    tag_ids: Vec<i32>,
-    exclude_tag_ids: Vec<i32>,
-    /// Words the model treated as tags that don't exist in the vocab; folded
-    /// into the semantic term and surfaced here so the UI can explain it.
-    unmatched_tags: Vec<String>,
-    camera_make: Option<String>,
-    camera_model: Option<String>,
-    lens_model: Option<String>,
-    date_from: Option<i64>,
-    date_to: Option<i64>,
-    media_type: Option<String>,
-    place: Option<ResolvedPlace>,
-}
-
-#[derive(Debug, Serialize)]
-struct UnifiedResponse {
-    query: String,
-    interpreted: Interpreted,
-    /// CLIP model version used for ranking; `None` when the query had no
-    /// semantic term (filters-only).
-    model_version: Option<String>,
-    /// Embeddings scored by CLIP (0 when filters-only).
-    considered: usize,
-    /// Matches before pagination.
-    total_matching: usize,
-    offset: usize,
-    results: Vec<SearchHit>,
-}
-
-#[derive(Debug, Serialize)]
-struct ErrorBody {
-    error: String,
-}
-
-fn bad_request(msg: impl Into<String>) -> HttpResponse {
-    HttpResponse::BadRequest().json(ErrorBody { error: msg.into() })
-}
-
-/// Combine the model's semantic term with any tag words that didn't match the
-/// vocab, so a hallucinated/non-vocab tag becomes a soft semantic signal
-/// rather than being dropped.
-fn effective_semantic(sq: &StructuredQuery) -> Option<String> {
-    let mut parts: Vec<String> = Vec::new();
-    if let Some(s) = sq.semantic.as_deref() {
-        parts.push(s.to_string());
-    }
-    parts.extend(sq.unmatched_tags.iter().cloned());
-    if parts.is_empty() {
-        None
-    } else {
-        Some(parts.join(" "))
-    }
-}
-
-pub async fn unified_search<TagD: TagDao>(
-    _: Claims,
-    state: Data<AppState>,
-    exif_dao: Data<Mutex<Box<dyn ExifDao>>>,
-    tag_dao: Data<Mutex<TagD>>,
-    query: Query<UnifiedQuery>,
-) -> HttpResponse {
-    let nl = query.q.trim().to_string();
-    if nl.is_empty() {
-        return bad_request("query parameter `q` is required");
-    }
-
-    let limit = query.limit.clamp(1, 200);
-    let offset = query.offset;
-    let threshold = query.threshold.clamp(-1.0, 1.0);
-
-    let library_ids = match parse_library_scope(query.library_ids.as_deref(), query.library) {
-        Ok(ids) => ids,
-        Err(msg) => return bad_request(msg),
-    };
-
-    let ctx = opentelemetry::Context::current();
-
-    // ── 1. Translate the NL query, grounded on the real tag vocabulary ──
-    let tag_vocab: Vec<(i32, String)> = {
-        let mut dao = tag_dao.lock().expect("tag dao");
-        match dao.get_all_tags(&ctx, None) {
-            Ok(tags) => tags.into_iter().map(|(_, t)| (t.id, t.name)).collect(),
-            Err(e) => {
-                log::warn!("unified_search: get_all_tags failed: {e:?}");
-                Vec::new()
-            }
-        }
-    };
-
-    // Respect env/config for the LLM backend (LLM_BACKEND → ollama or
-    // llama-swap); local only, no hybrid, per the feature's design.
-    //
-    // Translation-model precedence:
-    //   1. UNIFIED_SEARCH_MODEL env — pin a small, fast model that can stay
-    //      co-resident with CLIP (and the chat model) so translation never
-    //      evicts them. This is the recommended setup on a tight VRAM budget.
-    //   2. the client-selected model — routes translation to whatever the user
-    //      already has loaded (no swap) when no dedicated model is pinned.
-    //   3. None → resolve_backend uses the configured default local model.
-    let translation_model = std::env::var("UNIFIED_SEARCH_MODEL")
-        .ok()
-        .filter(|m| !m.trim().is_empty())
-        .or_else(|| query.model.clone())
-        .filter(|m| !m.trim().is_empty());
-    let overrides = SamplingOverrides {
-        model: translation_model,
-        num_ctx: None,
-        temperature: None,
-        top_p: None,
-        top_k: None,
-        min_p: None,
-        enable_thinking: None,
-    };
-    let backend = match state
-        .insight_generator
-        .resolve_backend(BackendKind::Local, &overrides)
-        .await
-    {
-        Ok(b) => b,
-        Err(e) => {
-            log::warn!("unified_search: resolve_backend failed: {e:?}");
-            return HttpResponse::ServiceUnavailable().json(ErrorBody {
-                error: "LLM backend unavailable".into(),
-            });
-        }
-    };
-    log::info!("unified_search: translating with model={}", backend.model());
-
-    let today = chrono::Utc::now().date_naive();
-    let sq = match translate_nl_query(backend.chat(), &nl, &tag_vocab, today).await {
-        Ok(sq) => sq,
-        Err(e) => {
-            log::warn!("unified_search: translate_nl_query failed: {e:?}");
-            return HttpResponse::BadGateway().json(ErrorBody {
-                error: "could not interpret the query".into(),
-            });
-        }
-    };
-
-    // ── 2. Forward-geocode the place name into a gps circle ──
-    let resolved_place = match sq.place.as_deref() {
-        Some(p) => forward_geocode(p).await.map(|g| ResolvedPlace {
-            display_name: g.display_name,
-            lat: g.lat,
-            lon: g.lon,
-            radius_km: g.radius_km,
-        }),
-        None => None,
-    };
-    let gps = resolved_place.as_ref().map(|p| (p.lat, p.lon, p.radius_km));
-
-    let semantic = effective_semantic(&sq);
-
-    let has_exif_filter = sq.camera_make.is_some()
-        || sq.camera_model.is_some()
-        || sq.lens_model.is_some()
-        || sq.date_from.is_some()
-        || sq.date_to.is_some();
-    let has_struct =
-        has_exif_filter || gps.is_some() || !sq.tag_ids.is_empty() || sq.media_type.is_some();
-
-    // Stage trace: what the model extracted + whether a structured filter is
-    // active. The chips show this to the user too, but logging it makes the
-    // "why no results" path debuggable from the server side.
-    log::info!(
-        "unified_search: q={nl:?} semantic={:?} tag_ids={:?} exclude={:?} place={:?} gps={:?} date=({:?},{:?}) media={:?} unmatched={:?} has_struct={has_struct}",
-        sq.semantic,
-        sq.tag_ids,
-        sq.exclude_tag_ids,
-        resolved_place.as_ref().map(|p| p.display_name.as_str()),
-        gps,
-        sq.date_from,
-        sq.date_to,
-        sq.media_type,
-        sq.unmatched_tags,
-    );
-
-    // ── 3. Build the structured candidate set (EXIF rows passing every
-    // filter). Skipped entirely for a pure-semantic query. ──
-    let mut candidate: Vec<crate::database::models::ImageExif> = Vec::new();
-    let mut allowed_hashes: HashSet<String> = HashSet::new();
-    if has_struct {
-        // Tag membership set (rel_path only — same cross-library imprecision
-        // as the existing /photos tag listing). ANY-mode: a photo matches if
-        // it carries any of the named tags. ALL-mode over-constrains NL
-        // queries (the model maps several words to tags and few photos carry
-        // them all); the semantic term does the precision work instead.
-        let tag_set: Option<HashSet<String>> = if sq.tag_ids.is_empty() {
-            None
-        } else {
-            let mut dao = tag_dao.lock().expect("tag dao");
-            match dao.get_files_with_any_tag_ids(
-                sq.tag_ids.clone(),
-                sq.exclude_tag_ids.clone(),
-                &ctx,
-            ) {
-                Ok(files) => Some(files.into_iter().map(|f| f.file_name).collect()),
-                Err(e) => {
-                    log::warn!("unified_search: tag filter failed: {e:?}");
-                    Some(HashSet::new())
-                }
-            }
-        };
-        log::info!(
-            "unified_search: tag_ids={:?} -> tag_set_files={:?}",
-            sq.tag_ids,
-            tag_set.as_ref().map(|s| s.len())
-        );
-
-        // EXIF query handles camera/lens/gps-box/date. With no EXIF filters
-        // it returns the whole table, which we then narrow by the predicates
-        // below (tags / media / scope). Fine at personal-library scale.
-        let gps_bounds = gps.map(|(lat, lon, r)| gps_bounding_box(lat, lon, r));
-        let rows = {
-            let mut dao = exif_dao.lock().expect("exif dao");
-            dao.query_by_exif(
-                &ctx,
-                None, // scope filtered in-Rust to support multi-library
-                sq.camera_make.as_deref(),
-                sq.camera_model.as_deref(),
-                sq.lens_model.as_deref(),
-                gps_bounds,
-                sq.date_from,
-                sq.date_to,
-            )
-            .unwrap_or_else(|e| {
-                log::warn!("unified_search: query_by_exif failed: {e:?}");
-                Vec::new()
-            })
-        };
-
-        candidate = rows
-            .into_iter()
-            .filter(|row| {
-                // Library scope.
-                if !library_ids.is_empty() && !library_ids.contains(&row.library_id) {
-                    return false;
-                }
-                // Precise GPS distance (the EXIF query only did a coarse box).
-                if let Some((lat, lon, radius_km)) = gps {
-                    match (row.gps_latitude, row.gps_longitude) {
-                        (Some(plat), Some(plon)) => {
-                            if haversine_distance(lat, lon, plat as f64, plon as f64) > radius_km {
-                                return false;
-                            }
-                        }
-                        _ => return false,
-                    }
-                }
-                // Media type.
-                if let Some(mt) = sq.media_type.as_deref() {
-                    let p = Path::new(&row.file_path);
-                    let ok = if mt == "video" {
-                        is_video_file(p)
-                    } else {
-                        is_image_file(p)
-                    };
-                    if !ok {
-                        return false;
-                    }
-                }
-                // Tag membership.
-                if let Some(ts) = &tag_set
-                    && !ts.contains(&row.file_path)
-                {
-                    return false;
-                }
-                true
-            })
-            .collect();
-
-        allowed_hashes = candidate
-            .iter()
-            .filter_map(|r| r.content_hash.clone())
-            .collect();
-        log::info!(
-            "unified_search: candidate_rows={} allowed_hashes={}",
-            candidate.len(),
-            allowed_hashes.len()
-        );
-    }
-
-    // ── 4. Rank ──
-    match semantic {
-        Some(ref sem) => {
-            // When structured filters are present they ARE the constraint —
-            // CLIP only ranks within the candidate set. So drop the global
-            // similarity threshold (it's tuned for whole-library search and
-            // would pre-discard filter-matching photos that scored just under
-            // it — e.g. a 2022 beach photo at 0.18 — before the intersection
-            // ever runs). With no filters, keep the user's threshold for the
-            // plain semantic case.
-            let clip_threshold = if has_struct { -1.0 } else { threshold };
-            let scored = match score_photos(
-                &state,
-                &exif_dao,
-                sem,
-                &library_ids,
-                clip_threshold,
-                None,
-            )
-            .await
-            {
-                Ok(s) => s,
-                Err(e) => return score_error_response(e),
-            };
-            let considered = scored.considered;
-            let clip_hits = scored.hits.len();
-            let hits: Vec<(f32, String)> = if has_struct {
-                scored
-                    .hits
-                    .into_iter()
-                    .filter(|(_, h)| allowed_hashes.contains(h))
-                    .collect()
-            } else {
-                scored.hits
-            };
-            log::info!(
-                "unified_search: clip considered={considered} hits={clip_hits} after_struct_filter={}",
-                hits.len()
-            );
-            let total_matching = hits.len();
-            let page = paginate(&hits, offset, limit);
-            let results = resolve_hits(&exif_dao, &page);
-            HttpResponse::Ok().json(UnifiedResponse {
-                query: nl,
-                interpreted: interpreted(&sq, resolved_place),
-                model_version: Some(scored.model_version),
-                considered: scored.considered,
-                total_matching,
-                offset,
-                results,
-            })
-        }
-        None => {
-            // Filters-only: no semantic term. Require at least one filter,
-            // then return the candidate set newest-first.
-            if !has_struct {
-                return bad_request("query had no searchable terms");
-            }
-            candidate.sort_by(|a, b| b.date_taken.cmp(&a.date_taken));
-            let total_matching = candidate.len();
-            log::info!("unified_search: filters-only matches={total_matching}");
-            let end = (offset + limit).min(total_matching);
-            let results: Vec<SearchHit> = if offset >= total_matching {
-                Vec::new()
-            } else {
-                candidate[offset..end]
-                    .iter()
-                    .map(|r| SearchHit {
-                        library_id: r.library_id,
-                        rel_path: r.file_path.clone(),
-                        content_hash: r.content_hash.clone().unwrap_or_default(),
-                        score: 0.0,
-                    })
-                    .collect()
-            };
-            HttpResponse::Ok().json(UnifiedResponse {
-                query: nl,
-                interpreted: interpreted(&sq, resolved_place),
-                model_version: None,
-                considered: 0,
-                total_matching,
-                offset,
-                results,
-            })
-        }
-    }
-}
-
-/// Slice a sorted hit list at `[offset, offset+limit)`, tolerating
-/// out-of-range offsets (empty page).
-fn paginate(hits: &[(f32, String)], offset: usize, limit: usize) -> Vec<(f32, String)> {
-    if offset >= hits.len() {
-        return Vec::new();
-    }
-    let end = (offset + limit).min(hits.len());
-    hits[offset..end].to_vec()
-}
-
-fn interpreted(sq: &StructuredQuery, place: Option<ResolvedPlace>) -> Interpreted {
-    Interpreted {
-        semantic: sq.semantic.clone(),
-        tag_ids: sq.tag_ids.clone(),
-        exclude_tag_ids: sq.exclude_tag_ids.clone(),
-        unmatched_tags: sq.unmatched_tags.clone(),
-        camera_make: sq.camera_make.clone(),
-        camera_model: sq.camera_model.clone(),
-        lens_model: sq.lens_model.clone(),
-        date_from: sq.date_from,
-        date_to: sq.date_to,
-        media_type: sq.media_type.clone(),
-        place,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::ai::nl_query::StructuredQuery;
-
-    #[test]
-    fn effective_semantic_combines_semantic_and_unmatched() {
-        let sq = StructuredQuery {
-            semantic: Some("sunset".into()),
-            unmatched_tags: vec!["golden hour".into()],
-            ..Default::default()
-        };
-        assert_eq!(
-            effective_semantic(&sq).as_deref(),
-            Some("sunset golden hour")
-        );
-    }
-
-    #[test]
-    fn effective_semantic_none_when_empty() {
-        let sq = StructuredQuery::default();
-        assert_eq!(effective_semantic(&sq), None);
-    }
-
-    #[test]
-    fn effective_semantic_unmatched_only() {
-        let sq = StructuredQuery {
-            unmatched_tags: vec!["disco".into()],
-            ..Default::default()
-        };
-        assert_eq!(effective_semantic(&sq).as_deref(), Some("disco"));
-    }
-
-    #[test]
-    fn paginate_handles_out_of_range_offset() {
-        let hits = vec![(0.9, "a".to_string()), (0.8, "b".to_string())];
-        assert_eq!(paginate(&hits, 5, 10).len(), 0);
-        assert_eq!(paginate(&hits, 0, 1).len(), 1);
-        assert_eq!(paginate(&hits, 1, 10).len(), 1);
-    }
-}
@@ -1,18 +1,18 @@
-use crate::content_hash;
 use crate::database::PreviewDao;
 use crate::libraries::Library;
 use crate::otel::global_tracer;
+use crate::thumbnails::is_video;
 use crate::video::ffmpeg::{generate_preview_clip, get_duration_seconds_blocking};
-use crate::video::hls_paths;
 use actix::prelude::*;
-use log::{debug, error, info, warn};
+use log::{debug, error, info, trace, warn};
 use opentelemetry::KeyValue;
 use opentelemetry::trace::{Span, Status, Tracer};
 use std::io::Result;
 use std::path::{Path, PathBuf};
-use std::process::{Command, Stdio};
+use std::process::{Child, Command, ExitStatus, Stdio};
 use std::sync::{Arc, Mutex};
 use tokio::sync::Semaphore;
+use walkdir::{DirEntry, WalkDir};
 // ffmpeg -i test.mp4 -c:v h264 -flags +cgop -g 30 -hls_time 3 out.m3u8
 // ffmpeg -i "filename.mp4" -preset veryfast -c:v libx264 -f hls -hls_list_size 100 -hls_time 2 -crf 24 -vf scale=1080:-2,setsar=1:1 attempt/vid_out.m3u8

@@ -22,14 +22,89 @@ impl Actor for StreamActor {
    type Context = Context<Self>;
 }

-/// A video paired with its content hash, ready to be queued for HLS
-/// playlist generation. Hash is required because all output paths are
-/// keyed on it; callers that lack a hash (rows mid-backfill) must skip
-/// the video rather than fabricate one.
-#[derive(Debug, Clone)]
-pub struct VideoToQueue {
-    pub video_path: PathBuf,
-    pub content_hash: String,
+pub struct ProcessMessage(pub String, pub Child);
+
+impl Message for ProcessMessage {
+    type Result = Result<ExitStatus>;
+}
+
+impl Handler<ProcessMessage> for StreamActor {
+    type Result = Result<ExitStatus>;
+
+    fn handle(&mut self, msg: ProcessMessage, _ctx: &mut Self::Context) -> Self::Result {
+        trace!("Message received");
+        let mut process = msg.1;
+        let result = process.wait();
+
+        debug!(
+            "Finished waiting for: {:?}. Code: {:?}",
+            msg.0,
+            result
+                .as_ref()
+                .map_or(-1, |status| status.code().unwrap_or(-1))
+        );
+        result
+    }
+}
+
+pub fn playlist_file_for(playlist_dir: &str, video_path: &Path) -> PathBuf {
+    let filename = video_path
+        .file_name()
+        .and_then(|n| n.to_str())
+        .unwrap_or("unknown");
+    PathBuf::from(format!("{}/{}.m3u8", playlist_dir, filename))
+}
+
+/// Sentinel path written next to a would-be playlist when ffmpeg cannot
+/// transcode the source (e.g. truncated mp4 with no moov atom). Its presence
+/// causes future scans to skip the file instead of re-running ffmpeg every
+/// pass. Delete the `.unsupported` file to force a retry.
+pub fn playlist_unsupported_sentinel(playlist_file: &Path) -> PathBuf {
+    let mut s = playlist_file.as_os_str().to_owned();
+    s.push(".unsupported");
+    PathBuf::from(s)
+}
+
+pub async fn create_playlist(video_path: &str, playlist_file: &str) -> Result<Child> {
+    if Path::new(playlist_file).exists() {
+        debug!("Playlist already exists: {}", playlist_file);
+        return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists));
+    }
+
+    let result = Command::new("ffmpeg")
+        .arg("-i")
+        .arg(video_path)
+        .arg("-c:v")
+        .arg("h264")
+        .arg("-crf")
+        .arg("21")
+        .arg("-preset")
+        .arg("veryfast")
+        .arg("-hls_time")
+        .arg("3")
+        .arg("-hls_list_size")
+        .arg("0")
+        .arg("-hls_playlist_type")
+        .arg("vod")
+        .arg("-vf")
+        .arg("scale='min(1080,iw)':-2,setsar=1:1")
+        .arg(playlist_file)
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .spawn();
+
+    let start_time = std::time::Instant::now();
+    loop {
+        actix::clock::sleep(std::time::Duration::from_secs(1)).await;
+
+        if Path::new(playlist_file).exists()
+            || std::time::Instant::now() - start_time > std::time::Duration::from_secs(5)
+        {
+            break;
+        }
+    }
+
+    result
 }

 pub fn generate_video_thumbnail(path: &Path, destination: &Path) -> std::io::Result<()> {
@@ -122,36 +197,16 @@ pub fn generate_image_thumbnail_ffmpeg(path: &Path, destination: &Path) -> std::
 /// Video stream metadata needed to pick HLS encode settings. Populated by
 /// a single ffprobe call to avoid spawning multiple subprocesses per video.
 #[derive(Debug, Default)]
-pub struct VideoStreamMeta {
-    pub is_h264: bool,
+struct VideoStreamMeta {
+    is_h264: bool,
    /// Rotation in degrees (0/90/180/270). Checks both the legacy `rotate`
    /// stream tag and the modern display-matrix side data.
-    pub rotation: i32,
-    /// Frames per second. Prefers `avg_frame_rate` (handles VFR better than
-    /// `r_frame_rate`, which lies on variable-framerate sources). `None`
-    /// when ffprobe couldn't parse either field — caller picks a fallback.
-    pub frame_rate: Option<f32>,
-}
-
-/// Parse ffprobe's rational frame-rate strings (`"30000/1001"`,
-/// `"60/1"`, `"0/0"`). Rejects 0/0 (ffprobe's "unknown" sentinel),
-/// non-positive results, and anything wildly out of range so a malformed
-/// probe can't poison the scrubber's step size.
-fn parse_ffprobe_rational(s: &str) -> Option<f32> {
-    let (num, den) = s.split_once('/')?;
-    let num: f32 = num.parse().ok()?;
-    let den: f32 = den.parse().ok()?;
-    if den.abs() < f32::EPSILON {
-        return None;
-    }
-    let v = num / den;
-    (v.is_finite() && v > 0.0 && v < 1000.0).then_some(v)
+    rotation: i32,
 }

 /// Probe video stream metadata in one ffprobe call. Returns default (codec
-/// unknown, rotation 0, fps None) on any failure — callers fall back to
-/// transcoding / a default framerate.
-pub async fn probe_video_stream_meta(video_path: &str) -> VideoStreamMeta {
+/// unknown, rotation 0) on any failure — callers fall back to transcoding.
+async fn probe_video_stream_meta(video_path: &str) -> VideoStreamMeta {
    let output = tokio::process::Command::new("ffprobe")
        .arg("-v")
        .arg("error")
@@ -159,16 +214,8 @@ pub async fn probe_video_stream_meta(video_path: &str) -> VideoStreamMeta {
        .arg("v:0")
        .arg("-print_format")
        .arg("json")
-        // NOTE: request `stream_side_data_list` (stream-level side data, read
-        // from the moov atom), NOT the bare `side_data_list` section. On modern
-        // ffprobe the latter is the *frame* side-data section, which forces
-        // ffprobe to enumerate every frame — reading the entire mdat over the
-        // network. For non-faststart phone clips on an SMB mount that turned a
-        // metadata probe into a full-file read (tens of seconds per open). The
-        // Display Matrix rotation we need is present at stream level, so this
-        // keeps codec/fps/rotation while reading only the header.
        .arg("-show_entries")
-        .arg("stream=codec_name,r_frame_rate,avg_frame_rate:stream_tags=rotate:stream_side_data_list")
+        .arg("stream=codec_name:stream_tags=rotate:side_data_list")
        .arg(video_path)
        .output()
        .await;
@@ -219,29 +266,12 @@ pub async fn probe_video_stream_meta(video_path: &str) -> VideoStreamMeta {
        })
        .unwrap_or(0);

-    // ffprobe reports frame rates as rational strings like "30000/1001".
-    // Prefer avg_frame_rate (handles VFR) and fall back to r_frame_rate.
-    let frame_rate = stream
-        .get("avg_frame_rate")
-        .and_then(|v| v.as_str())
-        .and_then(parse_ffprobe_rational)
-        .or_else(|| {
-            stream
-                .get("r_frame_rate")
-                .and_then(|v| v.as_str())
-                .and_then(parse_ffprobe_rational)
-        });
-
    debug!(
-        "Probed {}: codec_h264={}, rotation={}°, fps={:?}",
-        video_path, is_h264, rotation, frame_rate
+        "Probed {}: codec_h264={}, rotation={}°",
+        video_path, is_h264, rotation
    );

-    VideoStreamMeta {
-        is_h264,
-        rotation,
-        frame_rate,
-    }
+    VideoStreamMeta { is_h264, rotation }
 }

 /// Probe the max keyframe interval (GOP) in the first ~30s of a video.
@@ -301,17 +331,17 @@ async fn get_max_gop_seconds(video_path: &str) -> Option<f64> {
 }

 pub struct VideoPlaylistManager {
-    video_dir: PathBuf,
+    playlist_dir: PathBuf,
    playlist_generator: Addr<PlaylistGenerator>,
 }

 impl VideoPlaylistManager {
    pub fn new<P: Into<PathBuf>>(
-        video_dir: P,
+        playlist_dir: P,
        playlist_generator: Addr<PlaylistGenerator>,
    ) -> Self {
        Self {
-            video_dir: video_dir.into(),
+            playlist_dir: playlist_dir.into(),
            playlist_generator,
        }
    }
@@ -321,68 +351,144 @@ impl Actor for VideoPlaylistManager {
    type Context = Context<Self>;
 }

+impl Handler<ScanDirectoryMessage> for VideoPlaylistManager {
+    type Result = ResponseFuture<()>;
+
+    fn handle(&mut self, msg: ScanDirectoryMessage, _ctx: &mut Self::Context) -> Self::Result {
+        let tracer = global_tracer();
+        let mut span = tracer.start("videoplaylistmanager.scan_directory");
+
+        let start = std::time::Instant::now();
+        info!(
+            "Starting scan directory for video playlist generation: {}",
+            msg.directory
+        );
+
+        let playlist_output_dir = self.playlist_dir.clone();
+        let playlist_dir_str = playlist_output_dir.to_str().unwrap().to_string();
+
+        let video_files = WalkDir::new(&msg.directory)
+            .into_iter()
+            .filter_map(|e| e.ok())
+            .filter(|e| e.file_type().is_file())
+            .filter(is_video)
+            .filter(|e| {
+                let playlist = playlist_file_for(&playlist_dir_str, e.path());
+                !playlist.exists() && !playlist_unsupported_sentinel(&playlist).exists()
+            })
+            .collect::<Vec<DirEntry>>();
+
+        let scan_dir_name = msg.directory.clone();
+        let playlist_generator = self.playlist_generator.clone();
+
+        Box::pin(async move {
+            for e in video_files {
+                let path = e.path();
+                let path_as_str = path.to_str().unwrap();
+                debug!(
+                    "Sending generate playlist message for path: {}",
+                    path_as_str
+                );
+
+                match playlist_generator
+                    .send(GeneratePlaylistMessage {
+                        playlist_path: playlist_output_dir.to_str().unwrap().to_string(),
+                        video_path: PathBuf::from(path),
+                    })
+                    .await
+                    .expect("Failed to send generate playlist message")
+                {
+                    Ok(_) => {
+                        span.add_event(
+                            "Playlist generated",
+                            vec![KeyValue::new("video_path", path_as_str.to_string())],
+                        );
+
+                        debug!(
+                            "Successfully generated playlist for file: '{}'",
+                            path_as_str
+                        );
+                    }
+                    Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
+                        debug!("Playlist already exists for '{:?}', skipping", path);
+                    }
+                    Err(e) => {
+                        warn!("Failed to generate playlist for path '{:?}'. {:?}", path, e);
+                    }
+                }
+            }
+
+            span.add_event(
+                "Finished directory scan",
+                vec![KeyValue::new("directory", scan_dir_name.to_string())],
+            );
+            info!(
+                "Finished directory scan of '{}' in {:?}",
+                scan_dir_name,
+                start.elapsed()
+            );
+        })
+    }
+}
+
 impl Handler<QueueVideosMessage> for VideoPlaylistManager {
    type Result = ();

    fn handle(&mut self, msg: QueueVideosMessage, _ctx: &mut Self::Context) -> Self::Result {
-        if msg.videos.is_empty() {
+        if msg.video_paths.is_empty() {
            return;
        }

-        let video_dir = self.video_dir.clone();
+        info!(
+            "Queueing {} videos for HLS playlist generation",
+            msg.video_paths.len()
+        );
+
+        let playlist_output_dir = self.playlist_dir.clone();
+        let playlist_dir_str = playlist_output_dir.to_str().unwrap().to_string();
        let playlist_generator = self.playlist_generator.clone();

-        let mut queued = 0usize;
-        let mut already_present = 0usize;
-        for VideoToQueue {
-            video_path,
-            content_hash,
-        } in msg.videos
-        {
-            let playlist = hls_paths::playlist_for_hash(&video_dir, &content_hash);
-            let sentinel = hls_paths::sentinel_for_hash(&video_dir, &content_hash);
-            if playlist.exists() || sentinel.exists() {
-                already_present += 1;
+        for video_path in msg.video_paths {
+            let playlist = playlist_file_for(&playlist_dir_str, &video_path);
+            if playlist.exists() || playlist_unsupported_sentinel(&playlist).exists() {
                continue;
            }
-            debug!(
-                "Queueing playlist generation for {} (hash={})",
-                video_path.display(),
-                short_hash(&content_hash)
-            );
+            let path_str = video_path.to_string_lossy().to_string();
+            debug!("Queueing playlist generation for: {}", path_str);
+
            playlist_generator.do_send(GeneratePlaylistMessage {
+                playlist_path: playlist_dir_str.clone(),
                video_path,
-                content_hash,
            });
-            queued += 1;
        }
-        info!(
-            "Queue tick: {} queued, {} skipped (playlist or sentinel already on disk)",
-            queued, already_present
-        );
    }
 }

+#[derive(Message)]
+#[rtype(result = "()")]
+pub struct ScanDirectoryMessage {
+    pub(crate) directory: String,
+}
+
 #[derive(Message)]
 #[rtype(result = "()")]
 pub struct QueueVideosMessage {
-    pub videos: Vec<VideoToQueue>,
+    pub video_paths: Vec<PathBuf>,
 }

 #[derive(Message)]
 #[rtype(result = "Result<()>")]
 pub struct GeneratePlaylistMessage {
    pub video_path: PathBuf,
-    pub content_hash: String,
+    pub playlist_path: String,
 }

 pub struct PlaylistGenerator {
    semaphore: Arc<Semaphore>,
-    video_dir: PathBuf,
 }

 impl PlaylistGenerator {
-    pub(crate) fn new<P: Into<PathBuf>>(video_dir: P) -> Self {
+    pub(crate) fn new() -> Self {
        // Concurrency is tunable via HLS_CONCURRENCY so operators can dial
        // it to their hardware: 1 on weak Synology boxes to avoid thermal
        // throttling, higher on desktops with spare cores.
@@ -394,7 +500,6 @@ impl PlaylistGenerator {
        info!("PlaylistGenerator: concurrency={}", concurrency);
        PlaylistGenerator {
            semaphore: Arc::new(Semaphore::new(concurrency)),
-            video_dir: video_dir.into(),
        }
    }
 }
@@ -408,23 +513,20 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {

    fn handle(&mut self, msg: GeneratePlaylistMessage, _ctx: &mut Self::Context) -> Self::Result {
        let video_file = msg.video_path.to_str().unwrap().to_owned();
-        let content_hash_str = msg.content_hash.clone();
+        let playlist_path = msg.playlist_path.as_str().to_owned();
        let semaphore = self.semaphore.clone();
-        let video_dir = self.video_dir.clone();

-        let hash_dir = content_hash::hls_dir(&video_dir, &content_hash_str);
-        let playlist_path = hls_paths::playlist_for_hash(&video_dir, &content_hash_str);
-        let sentinel_path = hls_paths::sentinel_for_hash(&video_dir, &content_hash_str);
-        let segment_template = hls_paths::segment_template_for_hash(&video_dir, &content_hash_str);
-        let playlist_file = playlist_path.to_string_lossy().to_string();
-        let segment_pattern = segment_template.to_string_lossy().to_string();
+        let playlist_file = format!(
+            "{}/{}.m3u8",
+            playlist_path,
+            msg.video_path.file_name().unwrap().to_str().unwrap()
+        );

        let tracer = global_tracer();
        let mut span = tracer
            .span_builder("playlistgenerator.generate_playlist")
            .with_attributes(vec![
                KeyValue::new("video_file", video_file.clone()),
-                KeyValue::new("content_hash", content_hash_str.clone()),
                KeyValue::new("playlist_file", playlist_file.clone()),
            ])
            .start(&tracer);
@@ -448,7 +550,7 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
                )],
            );

-            if playlist_path.exists() {
+            if Path::new(&playlist_file).exists() {
                debug!("Playlist already exists: {}", playlist_file);
                span.set_status(Status::error(format!(
                    "Playlist already exists: {}",
@@ -457,19 +559,6 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
                return Err(std::io::Error::from(std::io::ErrorKind::AlreadyExists));
            }

-            // Ensure the shard + hash directory exist. Idempotent — the
-            // dir may already be present from a prior attempt that wrote
-            // a sentinel before being cleared for retry.
-            if let Err(e) = tokio::fs::create_dir_all(&hash_dir).await {
-                error!(
-                    "Failed to create HLS hash dir {}: {}",
-                    hash_dir.display(),
-                    e
-                );
-                span.set_status(Status::error(format!("mkdir failed: {}", e)));
-                return Err(e);
-            }
-
            // One ffprobe call for codec + rotation metadata.
            let stream_meta = probe_video_stream_meta(&video_file).await;
            let is_h264 = stream_meta.is_h264;
@@ -530,11 +619,16 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
                span.add_event("Transcoding to h264", vec![]);
            }

-            // Encode to a .tmp playlist alongside the final inside the
-            // hash dir, so a concurrent scan never sees a half-written
-            // .m3u8 as "done". Segments use the hash-keyed template;
-            // ffmpeg writes them next to the playlist (relative refs).
+            // Encode to a .tmp playlist and explicit segment names so a failed
+            // encode leaves predictable artifacts we can clean up — and so a
+            // concurrent scan doesn't see a half-written .m3u8 as "done".
            let playlist_tmp = format!("{}.tmp", playlist_file);
+            let video_stem = msg
+                .video_path
+                .file_name()
+                .and_then(|n| n.to_str())
+                .unwrap_or("video");
+            let segment_pattern = format!("{}/{}_%03d.ts", playlist_path, video_stem);

            let mut cmd = tokio::process::Command::new("ffmpeg");
            cmd.arg("-y").arg("-i").arg(&video_file);
@@ -623,12 +717,12 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
            let success = matches!(&ffmpeg_result, Ok(out) if out.status.success());

            if success {
-                if let Err(e) = tokio::fs::rename(&playlist_tmp, &playlist_path).await {
+                if let Err(e) = tokio::fs::rename(&playlist_tmp, &playlist_file).await {
                    error!(
                        "ffmpeg succeeded but rename {} -> {} failed: {}",
                        playlist_tmp, playlist_file, e
                    );
-                    cleanup_partial_hls(&hash_dir).await;
+                    cleanup_partial_hls(&playlist_tmp, playlist_path.as_str(), video_stem).await;
                    span.set_status(Status::error(format!("rename failed: {}", e)));
                    return Err(e);
                }
@@ -645,17 +739,18 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
                    Err(e) => format!("ffmpeg failed: {}", e),
                };
                error!("ffmpeg failed for {}: {}", video_file, detail);
-                cleanup_partial_hls(&hash_dir).await;
-                if let Err(se) = tokio::fs::write(&sentinel_path, b"").await {
+                cleanup_partial_hls(&playlist_tmp, playlist_path.as_str(), video_stem).await;
+                let sentinel = playlist_unsupported_sentinel(Path::new(&playlist_file));
+                if let Err(se) = tokio::fs::write(&sentinel, b"").await {
                    warn!(
                        "Failed to write playlist sentinel {}: {}",
-                        sentinel_path.display(),
+                        sentinel.display(),
                        se
                    );
                } else {
                    info!(
                        "Wrote playlist sentinel {} so future scans skip {}",
-                        sentinel_path.display(),
+                        sentinel.display(),
                        video_file
                    );
                }
@@ -666,47 +761,29 @@ impl Handler<GeneratePlaylistMessage> for PlaylistGenerator {
    }
 }

-/// Delete the partial playlist (.tmp) and any segment files left behind by
-/// a failed ffmpeg run. Wipes every non-sentinel file in the hash dir;
-/// retains the sentinel if one has already been written by an earlier
-/// caller in the same path (today there is none, but kept defensively so
-/// the function is safe to call after sentinel write too).
-async fn cleanup_partial_hls(hash_dir: &Path) {
-    let Ok(mut entries) = tokio::fs::read_dir(hash_dir).await else {
+/// Delete the temp playlist and any segment files that ffmpeg may have written
+/// before failing. Called both on ffmpeg error and on rename failure so a
+/// retry on the next scan starts from a clean slate.
+async fn cleanup_partial_hls(playlist_tmp: &str, playlist_dir: &str, video_stem: &str) {
+    let _ = tokio::fs::remove_file(playlist_tmp).await;
+
+    let segment_prefix = format!("{}_", video_stem);
+    let Ok(mut entries) = tokio::fs::read_dir(playlist_dir).await else {
        return;
    };
    while let Ok(Some(entry)) = entries.next_entry().await {
-        let path = entry.path();
-        let is_sentinel = path
-            .file_name()
-            .and_then(|n| n.to_str())
-            .map(|n| n == hls_paths::UNSUPPORTED_SENTINEL_FILENAME)
-            .unwrap_or(false);
-        if is_sentinel {
+        let Some(name) = entry.file_name().to_str().map(str::to_owned) else {
            continue;
-        }
-        if let Err(e) = tokio::fs::remove_file(&path).await {
-            warn!(
-                "Failed to remove partial HLS file {}: {}",
-                path.display(),
-                e
-            );
+        };
+        if name.starts_with(&segment_prefix)
+            && name.ends_with(".ts")
+            && let Err(e) = tokio::fs::remove_file(entry.path()).await
+        {
+            warn!("Failed to remove partial segment {}: {}", name, e);
        }
    }
 }

-/// First 16 chars of a content hash for log lines. Short enough to keep
-/// log volume sane, long enough that distinct hashes don't collide in
-/// practice.
-fn short_hash(hash: &str) -> &str {
-    let end = hash
-        .char_indices()
-        .nth(16)
-        .map(|(i, _)| i)
-        .unwrap_or(hash.len());
-    &hash[..end]
-}
-
 #[derive(Message)]
 #[rtype(result = "()")]
 pub struct GeneratePreviewClipMessage {
@@ -831,50 +908,3 @@ impl Handler<GeneratePreviewClipMessage> for PreviewClipGenerator {
        })
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::parse_ffprobe_rational;
-
-    #[test]
-    fn parses_common_rational_framerates() {
-        // NTSC 29.97 fps
-        assert!((parse_ffprobe_rational("30000/1001").unwrap() - 29.970_03).abs() < 1e-3);
-        // Plain integer fps
-        assert!((parse_ffprobe_rational("30/1").unwrap() - 30.0).abs() < 1e-6);
-        assert!((parse_ffprobe_rational("60/1").unwrap() - 60.0).abs() < 1e-6);
-        // iPhone slow-mo
-        assert!((parse_ffprobe_rational("240/1").unwrap() - 240.0).abs() < 1e-6);
-    }
-
-    #[test]
-    fn rejects_ffprobe_unknown_sentinel() {
-        // 0/0 is ffprobe's way of saying "I don't know" — must not be
-        // interpreted as 0 fps.
-        assert_eq!(parse_ffprobe_rational("0/0"), None);
-    }
-
-    #[test]
-    fn rejects_malformed_input() {
-        assert_eq!(parse_ffprobe_rational(""), None);
-        assert_eq!(parse_ffprobe_rational("30"), None);
-        assert_eq!(parse_ffprobe_rational("/1"), None);
-        assert_eq!(parse_ffprobe_rational("30/"), None);
-        assert_eq!(parse_ffprobe_rational("abc/def"), None);
-    }
-
-    #[test]
-    fn rejects_non_positive_results() {
-        // Negative numerator -> negative fps; meaningless.
-        assert_eq!(parse_ffprobe_rational("-30/1"), None);
-        // Zero numerator -> zero fps; also meaningless for frame stepping.
-        assert_eq!(parse_ffprobe_rational("0/1"), None);
-    }
-
-    #[test]
-    fn rejects_out_of_range() {
-        // Anything > 1000 fps is almost certainly garbage probe output,
-        // not a real source. (Real high-speed capture maxes near 1 kHz.)
-        assert_eq!(parse_ffprobe_rational("999999/1"), None);
-    }
-}
@@ -231,7 +231,7 @@ impl Ffmpeg {
 /// a hard failure — previously the `parse::<f64>` on empty stdout produced
 /// "cannot parse float from empty string" and poisoned the preview-clip row
 /// with status=failed, which the watcher would re-queue every full scan.
-pub async fn get_duration_seconds(input_file: &str) -> Result<Option<f64>> {
+async fn get_duration_seconds(input_file: &str) -> Result<Option<f64>> {
    if let Some(d) = probe_duration(input_file, "format=duration").await? {
        return Ok(Some(d));
    }
@@ -1,84 +0,0 @@
-//! Path layout for hash-keyed HLS output.
-//!
-//! Source-of-truth is [`crate::content_hash::hls_dir`], which gives
-//! `<video_dir>/<hash[..2]>/<hash>/`. The playlist, the per-segment files,
-//! and the "ffmpeg refused" sentinel all live inside that directory so a
-//! `.m3u8` written with relative segment references resolves correctly
-//! at serve time without any URL rewriting.
-
-use std::path::{Path, PathBuf};
-
-use crate::content_hash;
-
-/// Standard filename for the HLS playlist inside a hash dir. Fixed so
-/// the URL contract is `playlist.m3u8` regardless of the source video's
-/// original basename.
-pub const PLAYLIST_FILENAME: &str = "playlist.m3u8";
-
-/// Sentinel filename written when ffmpeg refused to transcode the
-/// source. Presence in the hash dir tells future scans to skip the file
-/// instead of re-running ffmpeg every tick. Delete to force a retry.
-pub const UNSUPPORTED_SENTINEL_FILENAME: &str = "playlist.unsupported";
-
-/// Segment-name template passed to ffmpeg via `-hls_segment_filename`.
-/// Segments live inside the hash dir; the playlist's relative refs
-/// resolve to siblings automatically.
-pub const SEGMENT_TEMPLATE: &str = "segment_%03d.ts";
-
-/// Path to the HLS playlist for a video identified by content hash.
-pub fn playlist_for_hash(video_dir: &Path, hash: &str) -> PathBuf {
-    content_hash::hls_dir(video_dir, hash).join(PLAYLIST_FILENAME)
-}
-
-/// Path to the unsupported-source sentinel for a hash.
-pub fn sentinel_for_hash(video_dir: &Path, hash: &str) -> PathBuf {
-    content_hash::hls_dir(video_dir, hash).join(UNSUPPORTED_SENTINEL_FILENAME)
-}
-
-/// Absolute path used as ffmpeg's `-hls_segment_filename` value.
-pub fn segment_template_for_hash(video_dir: &Path, hash: &str) -> PathBuf {
-    content_hash::hls_dir(video_dir, hash).join(SEGMENT_TEMPLATE)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn playlist_path_lives_inside_sharded_hash_dir() {
-        let video = Path::new("/var/video");
-        let p = playlist_for_hash(video, "abcdef0123456789");
-        assert_eq!(
-            p,
-            PathBuf::from("/var/video/ab/abcdef0123456789/playlist.m3u8")
-        );
-    }
-
-    #[test]
-    fn sentinel_path_lives_alongside_playlist() {
-        let video = Path::new("/var/video");
-        let s = sentinel_for_hash(video, "abcdef0123456789");
-        assert_eq!(
-            s,
-            PathBuf::from("/var/video/ab/abcdef0123456789/playlist.unsupported")
-        );
-    }
-
-    #[test]
-    fn segment_template_lives_alongside_playlist() {
-        let video = Path::new("/var/video");
-        let t = segment_template_for_hash(video, "abcdef0123456789");
-        assert_eq!(
-            t,
-            PathBuf::from("/var/video/ab/abcdef0123456789/segment_%03d.ts")
-        );
-    }
-
-    #[test]
-    fn distinct_hashes_yield_distinct_dirs() {
-        let video = Path::new("/var/video");
-        let a = playlist_for_hash(video, "1111aaaa");
-        let b = playlist_for_hash(video, "2222bbbb");
-        assert_ne!(a.parent(), b.parent());
-    }
-}
@@ -1,243 +0,0 @@
-//! One-shot retirement of the pre-content-hash HLS output layout.
-//!
-//! Before the hash-keyed layout landed, the actor pipeline wrote every
-//! playlist as `$VIDEO_PATH/<source-basename>.m3u8` with sibling
-//! `<source-basename>_NNN.ts` segments and a `<source-basename>.m3u8.unsupported`
-//! sentinel on ffmpeg failure. The new pipeline (see
-//! [`crate::video::hls_paths`]) puts everything inside a hash-keyed
-//! subdirectory, so the legacy flat files are orphaned the moment the
-//! upgraded binary boots — they're not served, not refreshed, and not
-//! GC'd by the new orphan cleanup (which deliberately ignores anything
-//! that doesn't sit inside a `<shard>/<hash>/` dir).
-//!
-//! This migration runs once on startup. It walks `$VIDEO_PATH` at depth
-//! 1, deletes every `.m3u8` / `.m3u8.tmp` / `.m3u8.unsupported` / `.ts`
-//! file, and reports a single info line. It is idempotent — a second
-//! run finds nothing and reports zero deletions, so it's safe to leave
-//! wired in across releases until the codebase finally drops the
-//! module.
-//!
-//! Sub-directories under `$VIDEO_PATH` are intentionally left alone:
-//! every legitimate child of `$VIDEO_PATH` in the new layout is a
-//! 2-char shard directory holding hash subdirs, and those are managed
-//! by `cleanup_orphaned_playlists`.
-
-use std::path::Path;
-
-use log::{info, warn};
-
-/// Counters for what the migration did this run.
-#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
-pub struct RetireStats {
-    pub deleted_playlists: usize,
-    pub deleted_segments: usize,
-    pub deleted_sentinels: usize,
-    pub deleted_tmp: usize,
-    pub errors: usize,
-}
-
-impl RetireStats {
-    pub fn total_deleted(&self) -> usize {
-        self.deleted_playlists + self.deleted_segments + self.deleted_sentinels + self.deleted_tmp
-    }
-}
-
-/// Delete every legacy basename-keyed HLS artifact at the root of
-/// `video_dir`. Hash dirs (children that are directories) are skipped.
-/// Returns counts so the caller can log a single line summary.
-pub fn retire_legacy_hls_output(video_dir: &Path) -> RetireStats {
-    let mut stats = RetireStats::default();
-
-    let read = match std::fs::read_dir(video_dir) {
-        Ok(r) => r,
-        Err(e) => {
-            warn!(
-                "Legacy HLS migration: cannot read {} ({}); skipping",
-                video_dir.display(),
-                e
-            );
-            return stats;
-        }
-    };
-
-    for entry in read.flatten() {
-        let file_type = match entry.file_type() {
-            Ok(t) => t,
-            Err(_) => continue,
-        };
-        if !file_type.is_file() {
-            // Hash shard directories live here in the new layout.
-            continue;
-        }
-        let path = entry.path();
-        let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
-            continue;
-        };
-
-        let bucket = classify(name);
-        let Some(bucket) = bucket else {
-            continue;
-        };
-
-        match std::fs::remove_file(&path) {
-            Ok(()) => match bucket {
-                LegacyKind::Playlist => stats.deleted_playlists += 1,
-                LegacyKind::Segment => stats.deleted_segments += 1,
-                LegacyKind::Sentinel => stats.deleted_sentinels += 1,
-                LegacyKind::Tmp => stats.deleted_tmp += 1,
-            },
-            Err(e) => {
-                warn!(
-                    "Legacy HLS migration: failed to remove {}: {}",
-                    path.display(),
-                    e
-                );
-                stats.errors += 1;
-            }
-        }
-    }
-
-    if stats.total_deleted() > 0 || stats.errors > 0 {
-        info!(
-            "Legacy HLS migration: deleted {} playlist(s), {} segment(s), {} sentinel(s), {} tmp; {} error(s)",
-            stats.deleted_playlists,
-            stats.deleted_segments,
-            stats.deleted_sentinels,
-            stats.deleted_tmp,
-            stats.errors,
-        );
-    } else {
-        info!(
-            "Legacy HLS migration: nothing to do under {}",
-            video_dir.display()
-        );
-    }
-
-    stats
-}
-
-#[derive(Debug, Clone, Copy)]
-enum LegacyKind {
-    Playlist,
-    Segment,
-    Sentinel,
-    Tmp,
-}
-
-/// Decide whether a flat file at `$VIDEO_PATH` root is legacy HLS
-/// output. Returns `None` for anything else — operator-stashed files,
-/// new-layout files (which don't live here), etc. — so we don't rm them.
-fn classify(name: &str) -> Option<LegacyKind> {
-    // Order matters: sentinel and tmp are more specific suffixes that
-    // sit on top of the .m3u8 / .ts extensions, so check them first.
-    if name.ends_with(".m3u8.unsupported") {
-        return Some(LegacyKind::Sentinel);
-    }
-    if name.ends_with(".m3u8.tmp") {
-        return Some(LegacyKind::Tmp);
-    }
-    if name.ends_with(".m3u8") {
-        return Some(LegacyKind::Playlist);
-    }
-    if name.ends_with(".ts") {
-        return Some(LegacyKind::Segment);
-    }
-    None
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::fs;
-    use tempfile::tempdir;
-
-    #[test]
-    fn classify_recognises_each_legacy_artifact() {
-        assert!(matches!(
-            classify("IMG_0341.MOV.m3u8"),
-            Some(LegacyKind::Playlist)
-        ));
-        assert!(matches!(
-            classify("IMG_0341.MOV_000.ts"),
-            Some(LegacyKind::Segment)
-        ));
-        assert!(matches!(
-            classify("IMG_0341.MOV.m3u8.unsupported"),
-            Some(LegacyKind::Sentinel)
-        ));
-        assert!(matches!(
-            classify("IMG_0341.MOV.m3u8.tmp"),
-            Some(LegacyKind::Tmp)
-        ));
-
-        assert!(classify("README.md").is_none());
-        assert!(classify("ab").is_none()); // shard dir name
-        assert!(classify(".keep").is_none());
-    }
-
-    #[test]
-    fn retire_deletes_legacy_and_leaves_hash_dirs() {
-        let tmp = tempdir().unwrap();
-        let root = tmp.path();
-
-        // Legacy artifacts at root.
-        fs::write(root.join("IMG_0341.MOV.m3u8"), b"#EXTM3U").unwrap();
-        fs::write(root.join("IMG_0341.MOV_000.ts"), b"\x00").unwrap();
-        fs::write(root.join("IMG_0341.MOV_001.ts"), b"\x00").unwrap();
-        fs::write(root.join("clip.MP4.m3u8.unsupported"), b"").unwrap();
-        fs::write(root.join("partial.m3u8.tmp"), b"").unwrap();
-
-        // New-layout hash dir we must NOT touch.
-        let hash_dir = root.join("ab").join("a".repeat(64));
-        fs::create_dir_all(&hash_dir).unwrap();
-        fs::write(hash_dir.join("playlist.m3u8"), b"#EXTM3U").unwrap();
-        fs::write(hash_dir.join("segment_000.ts"), b"\x00").unwrap();
-
-        // Unrelated file we must NOT touch.
-        fs::write(root.join("README.md"), b"don't touch me").unwrap();
-
-        let stats = retire_legacy_hls_output(root);
-        assert_eq!(stats.deleted_playlists, 1);
-        assert_eq!(stats.deleted_segments, 2);
-        assert_eq!(stats.deleted_sentinels, 1);
-        assert_eq!(stats.deleted_tmp, 1);
-        assert_eq!(stats.errors, 0);
-
-        // Legacy artifacts gone.
-        assert!(!root.join("IMG_0341.MOV.m3u8").exists());
-        assert!(!root.join("IMG_0341.MOV_000.ts").exists());
-        assert!(!root.join("clip.MP4.m3u8.unsupported").exists());
-        assert!(!root.join("partial.m3u8.tmp").exists());
-        // Hash dir untouched.
-        assert!(hash_dir.join("playlist.m3u8").exists());
-        assert!(hash_dir.join("segment_000.ts").exists());
-        // Unrelated file untouched.
-        assert!(root.join("README.md").exists());
-    }
-
-    #[test]
-    fn retire_is_idempotent() {
-        let tmp = tempdir().unwrap();
-        let root = tmp.path();
-
-        fs::write(root.join("video.mp4.m3u8"), b"#EXTM3U").unwrap();
-        fs::write(root.join("video.mp4_000.ts"), b"\x00").unwrap();
-
-        let first = retire_legacy_hls_output(root);
-        assert_eq!(first.deleted_playlists + first.deleted_segments, 2);
-
-        let second = retire_legacy_hls_output(root);
-        assert_eq!(second.total_deleted(), 0);
-        assert_eq!(second.errors, 0);
-    }
-
-    #[test]
-    fn retire_handles_missing_dir() {
-        // No panic, no error count blowing up — just a warn + zero stats.
-        let tmp = tempdir().unwrap();
-        let missing = tmp.path().join("does_not_exist");
-        let stats = retire_legacy_hls_output(&missing);
-        assert_eq!(stats.total_deleted(), 0);
-        assert_eq!(stats.errors, 0);
-    }
-}
@@ -9,8 +9,6 @@ use walkdir::WalkDir;

 pub mod actors;
 pub mod ffmpeg;
-pub mod hls_paths;
-pub mod legacy_migration;

 #[allow(dead_code)]
 pub async fn generate_video_gifs() {
@@ -22,6 +22,7 @@ use std::time::{Duration, SystemTime};
 use actix::Addr;
 use chrono::Utc;
 use log::{debug, error, info, warn};
+use walkdir::WalkDir;

 use crate::backfill;
 use crate::content_hash;
@@ -32,7 +33,6 @@ use crate::exif;
 use crate::face_watch;
 use crate::faces;
 use crate::file_types;
-use crate::hls_stats;
 use crate::libraries;
 use crate::library_maintenance;
 use crate::perceptual_hash;
@@ -40,34 +40,20 @@ use crate::tags;
 use crate::tags::SqliteTagDao;
 use crate::thumbnails;
 use crate::video;
-use crate::video::actors::{
-    GeneratePreviewClipMessage, QueueVideosMessage, VideoPlaylistManager, VideoToQueue,
-};
-use crate::video::hls_paths;
+use crate::video::actors::{GeneratePreviewClipMessage, QueueVideosMessage, VideoPlaylistManager};

-/// Clean up orphaned HLS hash directories under `$VIDEO_PATH` whose
-/// content_hash no longer appears in `image_exif`.
-///
-/// Walks `<video_path>/<shard>/<hash>/` — the layout written by the
-/// hash-keyed `PlaylistGenerator` — and deletes any hash directory whose
-/// hash isn't in the current DISTINCT set of `image_exif.content_hash`
-/// values. Empty shard parents are reaped on the same pass.
-///
-/// Legacy basename-keyed files at `$VIDEO_PATH` root (from the
-/// pre-content-hash layout) are left alone here; the one-shot startup
-/// migration is responsible for retiring those.
+/// Clean up orphaned HLS playlists and segments whose source videos no longer exist.
 ///
 /// `libs_lock` is the shared live view of the libraries table — read at the
 /// top of each cleanup pass so a PATCH /libraries/{id} that disables or
 /// re-mounts a library is picked up without a restart.
 pub fn cleanup_orphaned_playlists(
    libs_lock: Arc<RwLock<Vec<libraries::Library>>>,
-    _excluded_dirs: Vec<String>,
+    excluded_dirs: Vec<String>,
    library_health: libraries::LibraryHealthMap,
 ) {
    std::thread::spawn(move || {
-        let video_path_str = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
-        let video_path = PathBuf::from(&video_path_str);
+        let video_path = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");

        // Get cleanup interval from environment (default: 24 hours)
        let cleanup_interval_secs = dotenv::var("PLAYLIST_CLEANUP_INTERVAL_SECONDS")
@@ -75,14 +61,18 @@ pub fn cleanup_orphaned_playlists(
            .and_then(|s| s.parse::<u64>().ok())
            .unwrap_or(86400); // 24 hours

-        info!("Starting orphaned HLS cleanup job");
+        info!("Starting orphaned playlist cleanup job");
        info!("  Cleanup interval: {} seconds", cleanup_interval_secs);
-        info!("  HLS directory: {}", video_path.display());
-
-        let exif_dao: Arc<Mutex<Box<dyn ExifDao>>> = Arc::new(Mutex::new(Box::new(
-            SqliteExifDao::new(),
-        )
-            as Box<dyn ExifDao>));
+        info!("  Playlist directory: {}", video_path);
+        {
+            let libs = libs_lock.read().unwrap_or_else(|e| e.into_inner());
+            for lib in libs.iter() {
+                info!(
+                    "  Checking sources under '{}' at {}",
+                    lib.name, lib.root_path
+                );
+            }
+        }

        loop {
            std::thread::sleep(Duration::from_secs(cleanup_interval_secs));
@@ -93,27 +83,22 @@ pub fn cleanup_orphaned_playlists(
            let libs: Vec<libraries::Library> =
                libs_lock.read().unwrap_or_else(|e| e.into_inner()).clone();

-            // Safety gate: skip the cleanup cycle if any (enabled)
-            // library is stale. With hash-keyed layout the orphan
-            // decision is a pure DB query, but the upstream
-            // missing-file scan that *removes* image_exif rows already
-            // pauses for stale libraries — so a stale tick can hold
-            // hashes alive that would otherwise have been GC'd. The
-            // safety is then mostly belt-and-suspenders: a hash that
-            // should have been retired is just kept one tick longer.
-            // We'd rather leak a few hash dirs for 24h than wipe a
-            // hash dir whose source was briefly unreachable.
+            // Safety gate: skip the cleanup cycle if any library is
+            // stale. A missing source video on a stale library is
+            // indistinguishable from a transient unmount, and the
+            // cleanup is destructive — we'd rather leak a few playlist
+            // files for a tick than delete one whose source is briefly
+            // unreachable. The cycle re-runs on the next interval.
            {
                let guard = library_health.read().unwrap_or_else(|e| e.into_inner());
                let stale: Vec<String> = libs
                    .iter()
-                    .filter(|lib| lib.enabled)
                    .filter(|lib| guard.get(&lib.id).map(|h| !h.is_online()).unwrap_or(false))
                    .map(|lib| lib.name.clone())
                    .collect();
                if !stale.is_empty() {
                    warn!(
-                        "Skipping orphaned-HLS cleanup: {} library(ies) stale: [{}]",
+                        "Skipping orphaned-playlist cleanup: {} library(ies) stale: [{}]",
                        stale.len(),
                        stale.join(", ")
                    );
@@ -121,129 +106,116 @@ pub fn cleanup_orphaned_playlists(
                }
            }

-            info!("Running orphaned HLS cleanup");
+            info!("Running orphaned playlist cleanup");
            let start = std::time::Instant::now();
+            let mut deleted_count = 0;
+            let mut error_count = 0;

-            // Snapshot every live content_hash currently in image_exif.
-            // We intentionally don't filter by library here — a hash that
-            // lives in any library is alive, even if the library a given
-            // download attributed it to has since been disabled.
-            let alive_hashes: HashSet<String> = {
-                let context = opentelemetry::Context::new();
-                let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
-                match dao.list_distinct_content_hashes(&context) {
-                    Ok(hashes) => hashes.into_iter().collect(),
-                    Err(e) => {
-                        error!(
-                            "Failed to load distinct content hashes; skipping HLS cleanup: {:?}",
-                            e
+            // Find all .m3u8 files in VIDEO_PATH
+            let playlists: Vec<PathBuf> = WalkDir::new(&video_path)
+                .into_iter()
+                .filter_map(|e| e.ok())
+                .filter(|e| e.file_type().is_file())
+                .filter(|e| {
+                    e.path()
+                        .extension()
+                        .and_then(|s| s.to_str())
+                        .map(|ext| ext.eq_ignore_ascii_case("m3u8"))
+                        .unwrap_or(false)
+                })
+                .map(|e| e.path().to_path_buf())
+                .collect();
+
+            info!("Found {} playlist files to check", playlists.len());
+
+            for playlist_path in playlists {
+                // Extract the original video filename from playlist name
+                // Playlist format: {VIDEO_PATH}/{original_filename}.m3u8
+                if let Some(filename) = playlist_path.file_stem() {
+                    let video_filename = filename.to_string_lossy();
+
+                    // Search for this video file across every configured
+                    // library, respecting EXCLUDED_DIRS so we don't
+                    // false-resurrect playlists for videos that only
+                    // exist inside an excluded subtree. As soon as one
+                    // library has a matching source, we're done — the
+                    // playlist isn't orphaned.
+                    let mut video_exists = false;
+                    'libs: for lib in &libs {
+                        let effective = lib.effective_excluded_dirs(&excluded_dirs);
+                        for entry in image_api::file_scan::walk_library_files(
+                            Path::new(&lib.root_path),
+                            &effective,
+                        ) {
+                            if let Some(entry_stem) = entry.path().file_stem()
+                                && entry_stem == filename
+                                && file_types::is_video_file(entry.path())
+                            {
+                                video_exists = true;
+                                break 'libs;
+                            }
+                        }
+                    }
+
+                    if !video_exists {
+                        debug!(
+                            "Source video for playlist {} no longer exists, deleting",
+                            playlist_path.display()
                        );
-                        continue;
-                    }
-                }
-            };

-            let mut deleted_count = 0usize;
-            let mut error_count = 0usize;
-            let mut inspected = 0usize;
-
-            // Walk top-level entries of VIDEO_PATH. Each is either a
-            // legacy basename-keyed `.m3u8` / `.ts` (skip — migration
-            // owns those) or a 2-char shard directory.
-            let read_root = match std::fs::read_dir(&video_path) {
-                Ok(r) => r,
-                Err(e) => {
-                    error!(
-                        "HLS cleanup: failed to read VIDEO_PATH {}: {}",
-                        video_path.display(),
-                        e
-                    );
-                    continue;
-                }
-            };
-
-            for shard_entry in read_root.flatten() {
-                let shard_path = shard_entry.path();
-                if !shard_entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
-                    continue;
-                }
-                let shard_name = match shard_path.file_name().and_then(|n| n.to_str()) {
-                    Some(n) => n.to_owned(),
-                    None => continue,
-                };
-                if !is_hash_shard(&shard_name) {
-                    continue;
-                }
-
-                // Hash dirs inside this shard.
-                let read_shard = match std::fs::read_dir(&shard_path) {
-                    Ok(r) => r,
-                    Err(e) => {
-                        warn!(
-                            "HLS cleanup: failed to read shard {}: {}",
-                            shard_path.display(),
-                            e
-                        );
-                        continue;
-                    }
-                };
-
-                let mut shard_emptied = true;
-                for hash_entry in read_shard.flatten() {
-                    let hash_path = hash_entry.path();
-                    if !hash_entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
-                        shard_emptied = false;
-                        continue;
-                    }
-                    let Some(hash_name) = hash_path
-                        .file_name()
-                        .and_then(|n| n.to_str())
-                        .map(|n| n.to_owned())
-                    else {
-                        shard_emptied = false;
-                        continue;
-                    };
-                    if !is_full_hash(&hash_name) {
-                        shard_emptied = false;
-                        continue;
-                    }
-                    inspected += 1;
-
-                    if alive_hashes.contains(&hash_name) {
-                        shard_emptied = false;
-                        continue;
-                    }
-
-                    debug!(
-                        "HLS cleanup: removing orphan hash dir {}",
-                        hash_path.display()
-                    );
-                    match std::fs::remove_dir_all(&hash_path) {
-                        Ok(()) => deleted_count += 1,
-                        Err(e) => {
+                        // Delete the playlist file
+                        if let Err(e) = std::fs::remove_file(&playlist_path) {
                            warn!(
-                                "Failed to delete orphan hash dir {}: {}",
-                                hash_path.display(),
+                                "Failed to delete playlist {}: {}",
+                                playlist_path.display(),
                                e
                            );
                            error_count += 1;
-                            shard_emptied = false;
+                        } else {
+                            deleted_count += 1;
+
+                            // Also try to delete associated .ts segment files
+                            // They are typically named {filename}N.ts in the same directory
+                            if let Some(parent_dir) = playlist_path.parent() {
+                                for entry in WalkDir::new(parent_dir)
+                                    .max_depth(1)
+                                    .into_iter()
+                                    .filter_map(|e| e.ok())
+                                    .filter(|e| e.file_type().is_file())
+                                {
+                                    let entry_path = entry.path();
+                                    if let Some(ext) = entry_path.extension()
+                                        && ext.eq_ignore_ascii_case("ts")
+                                    {
+                                        // Check if this .ts file belongs to our playlist
+                                        if let Some(ts_stem) = entry_path.file_stem() {
+                                            let ts_name = ts_stem.to_string_lossy();
+                                            if ts_name.starts_with(&*video_filename) {
+                                                if let Err(e) = std::fs::remove_file(entry_path) {
+                                                    debug!(
+                                                        "Failed to delete segment {}: {}",
+                                                        entry_path.display(),
+                                                        e
+                                                    );
+                                                } else {
+                                                    debug!(
+                                                        "Deleted segment: {}",
+                                                        entry_path.display()
+                                                    );
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
                        }
                    }
                }
-
-                // If this shard now has no surviving hash dirs, reap
-                // the (empty) shard dir too. remove_dir fails if non-
-                // empty, which is the guard.
-                if shard_emptied {
-                    let _ = std::fs::remove_dir(&shard_path);
-                }
            }

            info!(
-                "Orphaned HLS cleanup completed in {:?}: inspected {} hash dirs, deleted {} orphans, {} errors",
+                "Orphaned playlist cleanup completed in {:?}: deleted {} playlists, {} errors",
                start.elapsed(),
-                inspected,
                deleted_count,
                error_count
            );
@@ -251,24 +223,11 @@ pub fn cleanup_orphaned_playlists(
    });
 }

-/// True iff `s` is a two-character lowercase-hex shard prefix.
-fn is_hash_shard(s: &str) -> bool {
-    s.len() == 2 && s.bytes().all(|b| b.is_ascii_hexdigit())
-}
-
-/// True iff `s` looks like a full blake3 hex digest (64 hex chars).
-/// Be strict so we don't accidentally rm a non-HLS directory operators
-/// have stashed under VIDEO_PATH.
-fn is_full_hash(s: &str) -> bool {
-    s.len() == 64 && s.bytes().all(|b| b.is_ascii_hexdigit())
-}
-
 pub fn watch_files(
    libs_lock: Arc<RwLock<Vec<libraries::Library>>>,
    playlist_manager: Addr<VideoPlaylistManager>,
    preview_generator: Addr<video::actors::PreviewClipGenerator>,
    face_client: crate::ai::face_client::FaceClient,
-    clip_client: crate::ai::clip_client::ClipClient,
    excluded_dirs: Vec<String>,
    library_health: libraries::LibraryHealthMap,
 ) {
@@ -301,14 +260,6 @@ pub fn watch_files(
                 or APOLLO_API_BASE_URL to enable)"
            );
        }
-        if clip_client.is_enabled() {
-            info!("  CLIP semantic search: ENABLED");
-        } else {
-            info!(
-                "  CLIP semantic search: DISABLED (set APOLLO_CLIP_API_BASE_URL \
-                 or APOLLO_API_BASE_URL to enable)"
-            );
-        }
        {
            let libs = libs_lock.read().unwrap_or_else(|e| e.into_inner());
            for lib in libs.iter() {
@@ -337,12 +288,7 @@ pub fn watch_files(
        ));

        let mut last_quick_scan = SystemTime::now();
-        // Initialize to UNIX_EPOCH so the *first* tick is treated as a
-        // full scan. That replaces the legacy startup ScanDirectoryMessage
-        // walk for HLS playlists: every library's existing media gets
-        // checked once at watcher boot, instead of waiting up to
-        // full_interval_secs (1h default) for the first natural full scan.
-        let mut last_full_scan = SystemTime::UNIX_EPOCH;
+        let mut last_full_scan = SystemTime::now();
        let mut scan_count = 0u64;

        // Per-library cursor for the missing-file scan. Each tick reads
@@ -472,21 +418,6 @@ pub fn watch_files(
                    );
                }

-                // CLIP embedding backlog. Independent of face detection —
-                // drain runs whenever CLIP is enabled, even on deploys
-                // that don't have the face engine wired up. Mirrors the
-                // face drain shape (capped per tick, no-op when disabled).
-                if clip_client.is_enabled() {
-                    let context = opentelemetry::Context::new();
-                    backfill::process_clip_backlog(
-                        &context,
-                        lib,
-                        &clip_client,
-                        &exif_dao,
-                        &effective_excludes,
-                    );
-                }
-
                // Date-taken backfill: drain rows whose canonical date is
                // either unresolved or only fs_time-sourced. Independent
                // of face detection — runs even on deploys that don't
@@ -600,16 +531,6 @@ pub fn watch_files(
            }

            if is_full_scan {
-                // End-of-full-scan HLS readiness summary: log a single
-                // info line + refresh the Prometheus gauges. Skipped on
-                // quick scans because the cost is non-trivial on big
-                // libraries and the data only meaningfully changes on
-                // full passes.
-                let video_dir_str = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
-                let stats =
-                    hls_stats::compute_and_publish(&libs, &exif_dao, Path::new(&video_dir_str));
-                hls_stats::log_summary(&stats);
-
                last_full_scan = now;
            }
            last_quick_scan = now;
@@ -679,18 +600,14 @@ pub fn process_new_files(
    // Batch query: Get all EXIF data for these files in one query
    let file_paths: Vec<String> = files.iter().map(|(_, rel_path)| rel_path.clone()).collect();

-    // Map of rel_path -> Option<content_hash>. The presence of the key
-    // tells us "row exists"; the Option value carries the hash for the
-    // HLS pipeline so video files without a hash (mid-backfill) skip
-    // this tick rather than fall back to a basename-colliding playlist.
-    let existing_exif: HashMap<String, Option<String>> = {
+    let existing_exif_paths: HashMap<String, bool> = {
        let mut dao = exif_dao.lock().expect("Unable to lock ExifDao");
        // Walk is per-library, so scope the lookup so a same-named file
        // in another library doesn't make this one look already-indexed.
        match dao.get_exif_batch(&context, Some(library.id), &file_paths) {
            Ok(exif_records) => exif_records
                .into_iter()
-                .map(|record| (record.file_path, record.content_hash))
+                .map(|record| (record.file_path, true))
                .collect(),
            Err(e) => {
                error!("Error batch querying EXIF data: {:?}", e);
@@ -720,7 +637,7 @@ pub fn process_new_files(
            && !bare_legacy_thumb_path.exists()
            && !thumbnails::unsupported_thumbnail_sentinel(&scoped_thumb_path).exists()
            && !thumbnails::unsupported_thumbnail_sentinel(&bare_legacy_thumb_path).exists();
-        let needs_row = !existing_exif.contains_key(relative_path);
+        let needs_row = !existing_exif_paths.contains_key(relative_path);

        if needs_thumbnail || needs_row {
            new_files_found = true;
@@ -879,45 +796,28 @@ pub fn process_new_files(
        }
    }

-    // Check for videos that need HLS playlists. All output is keyed on
-    // `content_hash` (see `crate::video::hls_paths`), so files whose
-    // `image_exif.content_hash` is still NULL — typically mid-backfill —
-    // are skipped this tick and picked up after the unhashed backlog
-    // drain populates the hash on a subsequent tick. Skipping is the
-    // correct call: queuing without a hash would either fall back to
-    // basename keying (the bug this refactor fixes) or fabricate one.
+    // Check for videos that need HLS playlists
    let video_path_base = dotenv::var("VIDEO_PATH").expect("VIDEO_PATH must be set");
-    let video_dir = Path::new(&video_path_base);
-    let mut videos_needing_playlists: Vec<VideoToQueue> = Vec::new();
-    let mut hashless_video_count = 0usize;
+    let mut videos_needing_playlists = Vec::new();

-    for (file_path, relative_path) in &files {
-        if !file_types::is_video_file(file_path) {
-            continue;
-        }
-        let Some(hash) = existing_exif.get(relative_path).and_then(|h| h.clone()) else {
-            hashless_video_count += 1;
-            continue;
-        };
-        let playlist_path = hls_paths::playlist_for_hash(video_dir, &hash);
-        if playlist_needs_generation(file_path, &playlist_path) {
-            videos_needing_playlists.push(VideoToQueue {
-                video_path: file_path.clone(),
-                content_hash: hash,
-            });
+    for (file_path, _relative_path) in &files {
+        if file_types::is_video_file(file_path) {
+            // Construct expected playlist path
+            let playlist_filename =
+                format!("{}.m3u8", file_path.file_name().unwrap().to_string_lossy());
+            let playlist_path = Path::new(&video_path_base).join(&playlist_filename);
+
+            // Check if playlist needs (re)generation
+            if playlist_needs_generation(file_path, &playlist_path) {
+                videos_needing_playlists.push(file_path.clone());
+            }
        }
    }

-    if hashless_video_count > 0 {
-        debug!(
-            "Watcher tick for '{}': skipped {} video(s) with NULL content_hash (will retry after backfill)",
-            library.name, hashless_video_count
-        );
-    }
-
+    // Send queue request to playlist manager
    if !videos_needing_playlists.is_empty() {
        playlist_manager.do_send(QueueVideosMessage {
-            videos: videos_needing_playlists,
+            video_paths: videos_needing_playlists,
        });
    }

@@ -1062,33 +962,6 @@ mod tests {
        assert!(playlist_needs_generation(&video, &playlist));
    }

-    #[test]
-    fn is_hash_shard_accepts_only_two_hex_chars() {
-        assert!(is_hash_shard("ab"));
-        assert!(is_hash_shard("00"));
-        assert!(is_hash_shard("FF")); // ASCII hexdigit covers upper-case too
-        assert!(!is_hash_shard("a"));
-        assert!(!is_hash_shard("abc"));
-        assert!(!is_hash_shard("zz"));
-        assert!(!is_hash_shard(""));
-        assert!(!is_hash_shard("a/"));
-    }
-
-    #[test]
-    fn is_full_hash_accepts_only_64_hex_chars() {
-        let h64 = "a".repeat(64);
-        assert!(is_full_hash(&h64));
-        let mixed = format!("ab{}", "0".repeat(62));
-        assert!(is_full_hash(&mixed));
-        assert!(!is_full_hash(&"a".repeat(63)));
-        assert!(!is_full_hash(&"a".repeat(65)));
-        assert!(!is_full_hash(&format!("z{}", "a".repeat(63))));
-        // Defends against operator stashing e.g. ".tmp" or "Plex" under
-        // VIDEO_PATH — neither passes the full-hash gate.
-        assert!(!is_full_hash(".tmp"));
-        assert!(!is_full_hash("Plex"));
-    }
-
    #[test]
    fn playlist_needs_generation_true_when_video_missing_metadata() {
        // Video doesn't exist; metadata fails for it. Falls through to the
@@ -1,13 +0,0 @@
-{
-  "Worcester": "Wuster",
-  "Spokane": "Spo can",
-  "wsl": "W S L",
-  "sql": "sequel",
-  "api": "A P I",
-  "US": "U S",
-  "Dr.": "Doctor",
-  "St.": "Saint",
-  "blvd": "boulevard",
-  "vs.": "versus",
-  "etc.": "et cetera"
-}